@livekit/agents 1.0.24 → 1.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/dist/inference/llm.cjs +1 -2
  2. package/dist/inference/llm.cjs.map +1 -1
  3. package/dist/inference/llm.d.ts.map +1 -1
  4. package/dist/inference/llm.js +1 -2
  5. package/dist/inference/llm.js.map +1 -1
  6. package/dist/inference/stt.cjs +1 -1
  7. package/dist/inference/stt.cjs.map +1 -1
  8. package/dist/inference/stt.d.ts.map +1 -1
  9. package/dist/inference/stt.js +1 -1
  10. package/dist/inference/stt.js.map +1 -1
  11. package/dist/inference/tts.cjs +4 -4
  12. package/dist/inference/tts.cjs.map +1 -1
  13. package/dist/inference/tts.d.cts +0 -1
  14. package/dist/inference/tts.d.ts +0 -1
  15. package/dist/inference/tts.d.ts.map +1 -1
  16. package/dist/inference/tts.js +4 -4
  17. package/dist/inference/tts.js.map +1 -1
  18. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  19. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  20. package/dist/ipc/job_proc_lazy_main.js +1 -1
  21. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  22. package/dist/job.cjs +29 -2
  23. package/dist/job.cjs.map +1 -1
  24. package/dist/job.d.cts +6 -0
  25. package/dist/job.d.ts +6 -0
  26. package/dist/job.d.ts.map +1 -1
  27. package/dist/job.js +19 -2
  28. package/dist/job.js.map +1 -1
  29. package/dist/llm/llm.cjs +2 -1
  30. package/dist/llm/llm.cjs.map +1 -1
  31. package/dist/llm/llm.d.cts +1 -1
  32. package/dist/llm/llm.d.ts +1 -1
  33. package/dist/llm/llm.d.ts.map +1 -1
  34. package/dist/llm/llm.js +2 -1
  35. package/dist/llm/llm.js.map +1 -1
  36. package/dist/stream/deferred_stream.cjs +12 -4
  37. package/dist/stream/deferred_stream.cjs.map +1 -1
  38. package/dist/stream/deferred_stream.d.cts +6 -1
  39. package/dist/stream/deferred_stream.d.ts +6 -1
  40. package/dist/stream/deferred_stream.d.ts.map +1 -1
  41. package/dist/stream/deferred_stream.js +12 -4
  42. package/dist/stream/deferred_stream.js.map +1 -1
  43. package/dist/stream/deferred_stream.test.cjs +2 -2
  44. package/dist/stream/deferred_stream.test.cjs.map +1 -1
  45. package/dist/stream/deferred_stream.test.js +2 -2
  46. package/dist/stream/deferred_stream.test.js.map +1 -1
  47. package/dist/stt/stream_adapter.cjs +15 -8
  48. package/dist/stt/stream_adapter.cjs.map +1 -1
  49. package/dist/stt/stream_adapter.d.cts +7 -3
  50. package/dist/stt/stream_adapter.d.ts +7 -3
  51. package/dist/stt/stream_adapter.d.ts.map +1 -1
  52. package/dist/stt/stream_adapter.js +15 -8
  53. package/dist/stt/stream_adapter.js.map +1 -1
  54. package/dist/stt/stt.cjs +8 -3
  55. package/dist/stt/stt.cjs.map +1 -1
  56. package/dist/stt/stt.d.cts +9 -3
  57. package/dist/stt/stt.d.ts +9 -3
  58. package/dist/stt/stt.d.ts.map +1 -1
  59. package/dist/stt/stt.js +9 -4
  60. package/dist/stt/stt.js.map +1 -1
  61. package/dist/telemetry/traces.cjs +23 -2
  62. package/dist/telemetry/traces.cjs.map +1 -1
  63. package/dist/telemetry/traces.d.ts.map +1 -1
  64. package/dist/telemetry/traces.js +23 -2
  65. package/dist/telemetry/traces.js.map +1 -1
  66. package/dist/tts/stream_adapter.cjs +10 -7
  67. package/dist/tts/stream_adapter.cjs.map +1 -1
  68. package/dist/tts/stream_adapter.d.cts +6 -3
  69. package/dist/tts/stream_adapter.d.ts +6 -3
  70. package/dist/tts/stream_adapter.d.ts.map +1 -1
  71. package/dist/tts/stream_adapter.js +10 -7
  72. package/dist/tts/stream_adapter.js.map +1 -1
  73. package/dist/tts/tts.cjs +27 -16
  74. package/dist/tts/tts.cjs.map +1 -1
  75. package/dist/tts/tts.d.cts +12 -5
  76. package/dist/tts/tts.d.ts +12 -5
  77. package/dist/tts/tts.d.ts.map +1 -1
  78. package/dist/tts/tts.js +28 -17
  79. package/dist/tts/tts.js.map +1 -1
  80. package/dist/types.cjs +21 -32
  81. package/dist/types.cjs.map +1 -1
  82. package/dist/types.d.cts +41 -10
  83. package/dist/types.d.ts +41 -10
  84. package/dist/types.d.ts.map +1 -1
  85. package/dist/types.js +18 -30
  86. package/dist/types.js.map +1 -1
  87. package/dist/voice/agent.cjs +54 -19
  88. package/dist/voice/agent.cjs.map +1 -1
  89. package/dist/voice/agent.d.ts.map +1 -1
  90. package/dist/voice/agent.js +54 -19
  91. package/dist/voice/agent.js.map +1 -1
  92. package/dist/voice/agent_activity.cjs +0 -3
  93. package/dist/voice/agent_activity.cjs.map +1 -1
  94. package/dist/voice/agent_activity.d.ts.map +1 -1
  95. package/dist/voice/agent_activity.js +0 -3
  96. package/dist/voice/agent_activity.js.map +1 -1
  97. package/dist/voice/agent_session.cjs +107 -27
  98. package/dist/voice/agent_session.cjs.map +1 -1
  99. package/dist/voice/agent_session.d.cts +16 -2
  100. package/dist/voice/agent_session.d.ts +16 -2
  101. package/dist/voice/agent_session.d.ts.map +1 -1
  102. package/dist/voice/agent_session.js +110 -27
  103. package/dist/voice/agent_session.js.map +1 -1
  104. package/dist/voice/events.cjs.map +1 -1
  105. package/dist/voice/events.d.cts +4 -4
  106. package/dist/voice/events.d.ts +4 -4
  107. package/dist/voice/events.d.ts.map +1 -1
  108. package/dist/voice/events.js.map +1 -1
  109. package/dist/voice/generation.cjs +6 -7
  110. package/dist/voice/generation.cjs.map +1 -1
  111. package/dist/voice/generation.d.ts.map +1 -1
  112. package/dist/voice/generation.js +7 -8
  113. package/dist/voice/generation.js.map +1 -1
  114. package/dist/voice/io.cjs +16 -0
  115. package/dist/voice/io.cjs.map +1 -1
  116. package/dist/voice/io.d.cts +8 -0
  117. package/dist/voice/io.d.ts +8 -0
  118. package/dist/voice/io.d.ts.map +1 -1
  119. package/dist/voice/io.js +16 -0
  120. package/dist/voice/io.js.map +1 -1
  121. package/dist/voice/recorder_io/index.cjs +23 -0
  122. package/dist/voice/recorder_io/index.cjs.map +1 -0
  123. package/dist/voice/recorder_io/index.d.cts +2 -0
  124. package/dist/voice/recorder_io/index.d.ts +2 -0
  125. package/dist/voice/recorder_io/index.d.ts.map +1 -0
  126. package/dist/voice/recorder_io/index.js +2 -0
  127. package/dist/voice/recorder_io/index.js.map +1 -0
  128. package/dist/voice/recorder_io/recorder_io.cjs +542 -0
  129. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
  130. package/dist/voice/recorder_io/recorder_io.d.cts +100 -0
  131. package/dist/voice/recorder_io/recorder_io.d.ts +100 -0
  132. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
  133. package/dist/voice/recorder_io/recorder_io.js +508 -0
  134. package/dist/voice/recorder_io/recorder_io.js.map +1 -0
  135. package/dist/voice/report.cjs +7 -2
  136. package/dist/voice/report.cjs.map +1 -1
  137. package/dist/voice/report.d.cts +11 -1
  138. package/dist/voice/report.d.ts +11 -1
  139. package/dist/voice/report.d.ts.map +1 -1
  140. package/dist/voice/report.js +7 -2
  141. package/dist/voice/report.js.map +1 -1
  142. package/dist/voice/room_io/_input.cjs +2 -1
  143. package/dist/voice/room_io/_input.cjs.map +1 -1
  144. package/dist/voice/room_io/_input.d.ts.map +1 -1
  145. package/dist/voice/room_io/_input.js +2 -1
  146. package/dist/voice/room_io/_input.js.map +1 -1
  147. package/dist/voice/room_io/_output.cjs +8 -7
  148. package/dist/voice/room_io/_output.cjs.map +1 -1
  149. package/dist/voice/room_io/_output.d.cts +2 -1
  150. package/dist/voice/room_io/_output.d.ts +2 -1
  151. package/dist/voice/room_io/_output.d.ts.map +1 -1
  152. package/dist/voice/room_io/_output.js +8 -7
  153. package/dist/voice/room_io/_output.js.map +1 -1
  154. package/dist/worker.cjs +4 -3
  155. package/dist/worker.cjs.map +1 -1
  156. package/dist/worker.js +4 -3
  157. package/dist/worker.js.map +1 -1
  158. package/package.json +1 -1
  159. package/src/inference/llm.ts +0 -1
  160. package/src/inference/stt.ts +1 -2
  161. package/src/inference/tts.ts +5 -4
  162. package/src/ipc/job_proc_lazy_main.ts +1 -1
  163. package/src/job.ts +21 -2
  164. package/src/llm/llm.ts +2 -2
  165. package/src/stream/deferred_stream.test.ts +3 -3
  166. package/src/stream/deferred_stream.ts +22 -5
  167. package/src/stt/stream_adapter.ts +18 -8
  168. package/src/stt/stt.ts +19 -6
  169. package/src/telemetry/traces.ts +25 -3
  170. package/src/tts/stream_adapter.ts +15 -7
  171. package/src/tts/tts.ts +46 -21
  172. package/src/types.ts +57 -33
  173. package/src/voice/agent.ts +59 -19
  174. package/src/voice/agent_activity.ts +0 -3
  175. package/src/voice/agent_session.ts +142 -35
  176. package/src/voice/events.ts +6 -3
  177. package/src/voice/generation.ts +10 -8
  178. package/src/voice/io.ts +19 -0
  179. package/src/voice/recorder_io/index.ts +4 -0
  180. package/src/voice/recorder_io/recorder_io.ts +690 -0
  181. package/src/voice/report.ts +20 -3
  182. package/src/voice/room_io/_input.ts +2 -1
  183. package/src/voice/room_io/_output.ts +10 -7
  184. package/src/worker.ts +1 -1
@@ -0,0 +1,690 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import ffmpegInstaller from '@ffmpeg-installer/ffmpeg';
5
+ import { Mutex } from '@livekit/mutex';
6
+ import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
7
+ import ffmpeg from 'fluent-ffmpeg';
8
+ import fs from 'node:fs';
9
+ import path from 'node:path';
10
+ import { PassThrough } from 'node:stream';
11
+ import type { ReadableStream } from 'node:stream/web';
12
+ import { TransformStream } from 'node:stream/web';
13
+ import { log } from '../../log.js';
14
+ import { isStreamReaderReleaseError } from '../../stream/deferred_stream.js';
15
+ import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
16
+ import { Future, Task, cancelAndWait, delay } from '../../utils.js';
17
+ import type { AgentSession } from '../agent_session.js';
18
+ import { AudioInput, AudioOutput, type PlaybackFinishedEvent } from '../io.js';
19
+
20
+ ffmpeg.setFfmpegPath(ffmpegInstaller.path);
21
+
22
+ const WRITE_INTERVAL_MS = 2500;
23
+ const DEFAULT_SAMPLE_RATE = 48000;
24
+
25
+ export interface RecorderOptions {
26
+ agentSession: AgentSession;
27
+ sampleRate?: number;
28
+ }
29
+
30
+ interface ResampleAndMixOptions {
31
+ frames: AudioFrame[];
32
+ resampler: AudioResampler | undefined;
33
+ flush?: boolean;
34
+ }
35
+
36
+ export class RecorderIO {
37
+ private inRecord?: RecorderAudioInput;
38
+ private outRecord?: RecorderAudioOutput;
39
+
40
+ private inChan: StreamChannel<AudioFrame[]> = createStreamChannel<AudioFrame[]>();
41
+ private outChan: StreamChannel<AudioFrame[]> = createStreamChannel<AudioFrame[]>();
42
+
43
+ private session: AgentSession;
44
+ private sampleRate: number;
45
+
46
+ private _outputPath?: string;
47
+ private forwardTask?: Task<void>;
48
+ private encodeTask?: Task<void>;
49
+
50
+ private closeFuture: Future<void> = new Future();
51
+ private lock: Mutex = new Mutex();
52
+ private started: boolean = false;
53
+
54
+ // FFmpeg streaming state
55
+ private pcmStream?: PassThrough;
56
+ private ffmpegPromise?: Promise<void>;
57
+ private inResampler?: AudioResampler;
58
+ private outResampler?: AudioResampler;
59
+
60
+ private logger = log();
61
+
62
+ constructor(opts: RecorderOptions) {
63
+ const { agentSession, sampleRate = DEFAULT_SAMPLE_RATE } = opts;
64
+
65
+ this.session = agentSession;
66
+ this.sampleRate = sampleRate;
67
+ }
68
+
69
+ async start(outputPath: string): Promise<void> {
70
+ const unlock = await this.lock.lock();
71
+
72
+ try {
73
+ if (this.started) return;
74
+
75
+ if (!this.inRecord || !this.outRecord) {
76
+ throw new Error(
77
+ 'RecorderIO not properly initialized: both `recordInput()` and `recordOutput()` must be called before starting the recorder.',
78
+ );
79
+ }
80
+
81
+ this._outputPath = outputPath;
82
+ this.started = true;
83
+ this.closeFuture = new Future();
84
+
85
+ // Ensure output directory exists
86
+ const dir = path.dirname(outputPath);
87
+ if (!fs.existsSync(dir)) {
88
+ fs.mkdirSync(dir, { recursive: true });
89
+ }
90
+
91
+ this.forwardTask = Task.from(({ signal }) => this.forward(signal));
92
+ this.encodeTask = Task.from(() => this.encode(), undefined, 'recorder_io_encode_task');
93
+ } finally {
94
+ unlock();
95
+ }
96
+ }
97
+
98
+ async close(): Promise<void> {
99
+ const unlock = await this.lock.lock();
100
+
101
+ try {
102
+ if (!this.started) return;
103
+
104
+ await this.inChan.close();
105
+ await this.outChan.close();
106
+ await this.closeFuture.await;
107
+ await cancelAndWait([this.forwardTask!, this.encodeTask!]);
108
+
109
+ this.started = false;
110
+ } finally {
111
+ unlock();
112
+ }
113
+ }
114
+
115
+ recordInput(audioInput: AudioInput): RecorderAudioInput {
116
+ this.inRecord = new RecorderAudioInput(this, audioInput);
117
+ return this.inRecord;
118
+ }
119
+
120
+ recordOutput(audioOutput: AudioOutput): RecorderAudioOutput {
121
+ this.outRecord = new RecorderAudioOutput(this, audioOutput, (buf) => this.writeCb(buf));
122
+ return this.outRecord;
123
+ }
124
+
125
+ private writeCb(buf: AudioFrame[]): void {
126
+ const inputBuf = this.inRecord!.takeBuf();
127
+ this.inChan.write(inputBuf);
128
+ this.outChan.write(buf);
129
+ }
130
+
131
+ get recording(): boolean {
132
+ return this.started;
133
+ }
134
+
135
+ get outputPath(): string | undefined {
136
+ return this._outputPath;
137
+ }
138
+
139
+ get recordingStartedAt(): number | undefined {
140
+ // Use session start time to align with trace timestamps
141
+ return this.session._startedAt;
142
+ }
143
+
144
+ /**
145
+ * Forward task: periodically flush input buffer to encoder
146
+ */
147
+ private async forward(signal: AbortSignal): Promise<void> {
148
+ while (!signal.aborted) {
149
+ try {
150
+ await delay(WRITE_INTERVAL_MS, { signal });
151
+ } catch {
152
+ // Aborted
153
+ break;
154
+ }
155
+
156
+ if (this.outRecord!.hasPendingData) {
157
+ // If the output is currently playing audio, wait for it to stay in sync
158
+ continue;
159
+ }
160
+
161
+ // Flush input buffer
162
+ const inputBuf = this.inRecord!.takeBuf();
163
+ this.inChan
164
+ .write(inputBuf)
165
+ .catch((err) => this.logger.error({ err }, 'Error writing RecorderIO input buffer'));
166
+ this.outChan
167
+ .write([])
168
+ .catch((err) => this.logger.error({ err }, 'Error writing RecorderIO output buffer'));
169
+ }
170
+ }
171
+
172
+ /**
173
+ * Start FFmpeg process for streaming encoding
174
+ */
175
+ private startFFmpeg(): void {
176
+ if (this.pcmStream) return;
177
+
178
+ this.pcmStream = new PassThrough();
179
+
180
+ this.ffmpegPromise = new Promise<void>((resolve, reject) => {
181
+ ffmpeg(this.pcmStream!)
182
+ .inputFormat('s16le')
183
+ .inputOptions([`-ar ${this.sampleRate}`, '-ac 2'])
184
+ .audioCodec('libopus')
185
+ .audioChannels(2)
186
+ .audioFrequency(this.sampleRate)
187
+ .format('ogg')
188
+ .output(this._outputPath!)
189
+ .on('end', () => {
190
+ this.logger.debug('FFmpeg encoding finished');
191
+ resolve();
192
+ })
193
+ .on('error', (err) => {
194
+ // Ignore errors from intentional stream closure or SIGINT during shutdown
195
+ if (
196
+ err.message?.includes('Output stream closed') ||
197
+ err.message?.includes('received signal 2') ||
198
+ err.message?.includes('SIGKILL') ||
199
+ err.message?.includes('SIGINT')
200
+ ) {
201
+ resolve();
202
+ } else {
203
+ this.logger.error({ err }, 'FFmpeg encoding error');
204
+ reject(err);
205
+ }
206
+ })
207
+ .run();
208
+ });
209
+ }
210
+
211
+ /**
212
+ * Resample and mix frames to mono Float32
213
+ */
214
+ private resampleAndMix(opts: ResampleAndMixOptions): {
215
+ samples: Float32Array;
216
+ resampler: AudioResampler | undefined;
217
+ } {
218
+ const INV_INT16 = 1.0 / 32768.0;
219
+ const { frames, flush = false } = opts;
220
+ let { resampler } = opts;
221
+
222
+ if (frames.length === 0 && !flush) {
223
+ return { samples: new Float32Array(0), resampler };
224
+ }
225
+
226
+ if (!resampler && frames.length > 0) {
227
+ const firstFrame = frames[0]!;
228
+ resampler = new AudioResampler(firstFrame.sampleRate, this.sampleRate, firstFrame.channels);
229
+ }
230
+
231
+ const resampledFrames: AudioFrame[] = [];
232
+ for (const frame of frames) {
233
+ if (resampler) {
234
+ resampledFrames.push(...resampler.push(frame));
235
+ }
236
+ }
237
+
238
+ if (flush && resampler) {
239
+ resampledFrames.push(...resampler.flush());
240
+ }
241
+
242
+ const totalSamples = resampledFrames.reduce((acc, frame) => acc + frame.samplesPerChannel, 0);
243
+ const samples = new Float32Array(totalSamples);
244
+
245
+ let pos = 0;
246
+ for (const frame of resampledFrames) {
247
+ const data = frame.data;
248
+ const numChannels = frame.channels;
249
+ for (let i = 0; i < frame.samplesPerChannel; i++) {
250
+ let sum = 0;
251
+ for (let ch = 0; ch < numChannels; ch++) {
252
+ sum += data[i * numChannels + ch]!;
253
+ }
254
+ samples[pos++] = (sum / numChannels) * INV_INT16;
255
+ }
256
+ }
257
+
258
+ return { samples, resampler };
259
+ }
260
+
261
+ /**
262
+ * Write PCM chunk to FFmpeg stream
263
+ */
264
+ private writePCM(leftSamples: Float32Array, rightSamples: Float32Array): void {
265
+ if (!this.pcmStream) {
266
+ this.startFFmpeg();
267
+ }
268
+
269
+ // Handle length mismatch by prepending silence
270
+ if (leftSamples.length !== rightSamples.length) {
271
+ const diff = Math.abs(leftSamples.length - rightSamples.length);
272
+ if (leftSamples.length < rightSamples.length) {
273
+ this.logger.warn(
274
+ `Input is shorter by ${diff} samples; silence has been prepended to align the input channel.`,
275
+ );
276
+ const padded = new Float32Array(rightSamples.length);
277
+ padded.set(leftSamples, diff);
278
+ leftSamples = padded;
279
+ } else {
280
+ const padded = new Float32Array(leftSamples.length);
281
+ padded.set(rightSamples, diff);
282
+ rightSamples = padded;
283
+ }
284
+ }
285
+
286
+ const maxLen = Math.max(leftSamples.length, rightSamples.length);
287
+ if (maxLen <= 0) return;
288
+
289
+ // Interleave stereo samples and convert back to Int16
290
+ const stereoData = new Int16Array(maxLen * 2);
291
+ for (let i = 0; i < maxLen; i++) {
292
+ stereoData[i * 2] = Math.max(
293
+ -32768,
294
+ Math.min(32767, Math.round((leftSamples[i] ?? 0) * 32768)),
295
+ );
296
+ stereoData[i * 2 + 1] = Math.max(
297
+ -32768,
298
+ Math.min(32767, Math.round((rightSamples[i] ?? 0) * 32768)),
299
+ );
300
+ }
301
+
302
+ this.pcmStream!.write(Buffer.from(stereoData.buffer));
303
+ }
304
+
305
+ /**
306
+ * Encode task: read from channels, mix to stereo, stream to FFmpeg
307
+ */
308
+ private async encode(): Promise<void> {
309
+ if (!this._outputPath) return;
310
+
311
+ const inReader = this.inChan.stream().getReader();
312
+ const outReader = this.outChan.stream().getReader();
313
+
314
+ try {
315
+ while (true) {
316
+ const [inResult, outResult] = await Promise.all([inReader.read(), outReader.read()]);
317
+
318
+ if (inResult.done || outResult.done) {
319
+ break;
320
+ }
321
+
322
+ const inputBuf = inResult.value;
323
+ const outputBuf = outResult.value;
324
+
325
+ const inMixed = this.resampleAndMix({ frames: inputBuf, resampler: this.inResampler });
326
+ this.inResampler = inMixed.resampler;
327
+
328
+ const outMixed = this.resampleAndMix({
329
+ frames: outputBuf,
330
+ resampler: this.outResampler,
331
+ flush: outputBuf.length > 0,
332
+ });
333
+ this.outResampler = outMixed.resampler;
334
+
335
+ // Stream PCM data directly to FFmpeg
336
+ this.writePCM(inMixed.samples, outMixed.samples);
337
+ }
338
+
339
+ // Close FFmpeg stream and wait for encoding to complete
340
+ if (this.pcmStream) {
341
+ this.pcmStream.end();
342
+ await this.ffmpegPromise;
343
+ }
344
+ } catch (err) {
345
+ this.logger.error({ err }, 'Error in encode task');
346
+ } finally {
347
+ inReader.releaseLock();
348
+ outReader.releaseLock();
349
+
350
+ if (!this.closeFuture.done) {
351
+ this.closeFuture.resolve();
352
+ }
353
+ }
354
+ }
355
+ }
356
+
357
+ class RecorderAudioInput extends AudioInput {
358
+ private source: AudioInput;
359
+ private recorderIO: RecorderIO;
360
+ private accFrames: AudioFrame[] = [];
361
+ private _startedWallTime?: number;
362
+
363
+ constructor(recorderIO: RecorderIO, source: AudioInput) {
364
+ super();
365
+ this.recorderIO = recorderIO;
366
+ this.source = source;
367
+
368
+ // Set up the intercepting stream
369
+ this.deferredStream.setSource(this.createInterceptingStream());
370
+ }
371
+
372
+ /**
373
+ * Wall-clock time when the first frame was captured
374
+ */
375
+ get startedWallTime(): number | undefined {
376
+ return this._startedWallTime;
377
+ }
378
+
379
+ /**
380
+ * Take accumulated frames and clear the buffer
381
+ */
382
+ takeBuf(): AudioFrame[] {
383
+ const frames = this.accFrames;
384
+ this.accFrames = [];
385
+ return frames;
386
+ }
387
+
388
+ /**
389
+ * Creates a stream that intercepts frames from the source,
390
+ * accumulates them when recording, and passes them through unchanged.
391
+ */
392
+ private createInterceptingStream(): ReadableStream<AudioFrame> {
393
+ const sourceStream = this.source.stream;
394
+ const reader = sourceStream.getReader();
395
+
396
+ const transform = new TransformStream<AudioFrame, AudioFrame>({
397
+ transform: (frame, controller) => {
398
+ // Accumulate frames when recording is active
399
+ if (this.recorderIO.recording) {
400
+ if (this._startedWallTime === undefined) {
401
+ this._startedWallTime = Date.now();
402
+ }
403
+ this.accFrames.push(frame);
404
+ }
405
+
406
+ controller.enqueue(frame);
407
+ },
408
+ });
409
+
410
+ const pump = async () => {
411
+ const writer = transform.writable.getWriter();
412
+ let sourceError: unknown;
413
+
414
+ try {
415
+ while (true) {
416
+ const { done, value } = await reader.read();
417
+ if (done) break;
418
+ await writer.write(value);
419
+ }
420
+ } catch (e) {
421
+ if (isStreamReaderReleaseError(e)) return;
422
+ sourceError = e;
423
+ } finally {
424
+ if (sourceError) {
425
+ writer.abort(sourceError);
426
+ return;
427
+ }
428
+
429
+ writer.releaseLock();
430
+
431
+ try {
432
+ await transform.writable.close();
433
+ } catch {
434
+ // ignore "WritableStream is closed" errors
435
+ }
436
+ }
437
+ };
438
+
439
+ pump();
440
+
441
+ return transform.readable;
442
+ }
443
+
444
+ onAttached(): void {
445
+ this.source.onAttached();
446
+ }
447
+
448
+ onDetached(): void {
449
+ this.source.onDetached();
450
+ }
451
+ }
452
+
453
+ class RecorderAudioOutput extends AudioOutput {
454
+ private recorderIO: RecorderIO;
455
+ private writeFn: (buf: AudioFrame[]) => void;
456
+ private accFrames: AudioFrame[] = [];
457
+ private _startedWallTime?: number;
458
+
459
+ // Pause tracking
460
+ private currentPauseStart?: number;
461
+ private pauseWallTimes: Array<[number, number]> = []; // [start, end] pairs
462
+
463
+ constructor(
464
+ recorderIO: RecorderIO,
465
+ audioOutput: AudioOutput,
466
+ writeFn: (buf: AudioFrame[]) => void,
467
+ ) {
468
+ super(audioOutput.sampleRate, audioOutput);
469
+ this.recorderIO = recorderIO;
470
+ this.writeFn = writeFn;
471
+ }
472
+
473
+ get startedWallTime(): number | undefined {
474
+ return this._startedWallTime;
475
+ }
476
+
477
+ get hasPendingData(): boolean {
478
+ return this.accFrames.length > 0;
479
+ }
480
+
481
+ pause(): void {
482
+ if (this.currentPauseStart === undefined && this.recorderIO.recording) {
483
+ this.currentPauseStart = Date.now();
484
+ }
485
+
486
+ if (this.nextInChain) {
487
+ this.nextInChain.pause();
488
+ }
489
+ }
490
+
491
+ /**
492
+ * Resume playback and record the pause interval
493
+ */
494
+ resume(): void {
495
+ if (this.currentPauseStart !== undefined && this.recorderIO.recording) {
496
+ this.pauseWallTimes.push([this.currentPauseStart, Date.now()]);
497
+ this.currentPauseStart = undefined;
498
+ }
499
+
500
+ if (this.nextInChain) {
501
+ this.nextInChain.resume();
502
+ }
503
+ }
504
+
505
+ private resetPauseState(): void {
506
+ this.currentPauseStart = undefined;
507
+ this.pauseWallTimes = [];
508
+ }
509
+
510
+ onPlaybackFinished(options: PlaybackFinishedEvent): void {
511
+ const finishTime = Date.now();
512
+
513
+ super.onPlaybackFinished(options);
514
+
515
+ if (!this.recorderIO.recording) {
516
+ return;
517
+ }
518
+
519
+ if (this.currentPauseStart !== undefined) {
520
+ this.pauseWallTimes.push([this.currentPauseStart, finishTime]);
521
+ this.currentPauseStart = undefined;
522
+ }
523
+
524
+ if (this.accFrames.length === 0) {
525
+ this.resetPauseState();
526
+ return;
527
+ }
528
+
529
+ const playbackPosition = options.playbackPosition;
530
+
531
+ const pauseEvents: Array<[number, number]> = [];
532
+
533
+ if (this.pauseWallTimes.length > 0) {
534
+ const totalPauseDuration = this.pauseWallTimes.reduce(
535
+ (sum, [start, end]) => sum + (end - start),
536
+ 0,
537
+ );
538
+ // Convert playbackPosition from seconds to milliseconds for wall time calculations
539
+ const playbackStartTime = finishTime - playbackPosition * 1000 - totalPauseDuration;
540
+
541
+ let accumulatedPause = 0;
542
+ for (const [pauseStart, pauseEnd] of this.pauseWallTimes) {
543
+ let position = (pauseStart - playbackStartTime - accumulatedPause) / 1000; // Convert to seconds
544
+ const duration = (pauseEnd - pauseStart) / 1000; // Convert to seconds
545
+ position = Math.max(0, Math.min(position, playbackPosition));
546
+ pauseEvents.push([position, duration]);
547
+ accumulatedPause += pauseEnd - pauseStart;
548
+ }
549
+ }
550
+
551
+ const buf: AudioFrame[] = [];
552
+ let accDur = 0;
553
+ const sampleRate = this.accFrames[0]!.sampleRate;
554
+ const numChannels = this.accFrames[0]!.channels;
555
+
556
+ let pauseIdx = 0;
557
+ let shouldBreak = false;
558
+
559
+ for (const frame of this.accFrames) {
560
+ let currentFrame = frame;
561
+ const frameDuration = frame.samplesPerChannel / frame.sampleRate;
562
+
563
+ if (frameDuration + accDur > playbackPosition) {
564
+ const [left] = splitFrame(currentFrame, playbackPosition - accDur);
565
+ currentFrame = left;
566
+ shouldBreak = true;
567
+ }
568
+
569
+ // Process any pauses before this frame starts
570
+ while (pauseIdx < pauseEvents.length && pauseEvents[pauseIdx]![0] <= accDur) {
571
+ const [, pauseDur] = pauseEvents[pauseIdx]!;
572
+ buf.push(createSilenceFrame(pauseDur, sampleRate, numChannels));
573
+ pauseIdx++;
574
+ }
575
+
576
+ // Process any pauses within this frame
577
+ const currentFrameDuration = currentFrame.samplesPerChannel / currentFrame.sampleRate;
578
+ while (
579
+ pauseIdx < pauseEvents.length &&
580
+ pauseEvents[pauseIdx]![0] < accDur + currentFrameDuration
581
+ ) {
582
+ const [pausePos, pauseDur] = pauseEvents[pauseIdx]!;
583
+ const [left, right] = splitFrame(currentFrame, pausePos - accDur);
584
+ buf.push(left);
585
+ accDur += left.samplesPerChannel / left.sampleRate;
586
+ buf.push(createSilenceFrame(pauseDur, sampleRate, numChannels));
587
+ currentFrame = right;
588
+ pauseIdx++;
589
+ }
590
+
591
+ buf.push(currentFrame);
592
+ accDur += currentFrame.samplesPerChannel / currentFrame.sampleRate;
593
+
594
+ if (shouldBreak) {
595
+ break;
596
+ }
597
+ }
598
+
599
+ // Process remaining pauses
600
+ while (pauseIdx < pauseEvents.length) {
601
+ const [pausePos, pauseDur] = pauseEvents[pauseIdx]!;
602
+ if (pausePos <= playbackPosition) {
603
+ buf.push(createSilenceFrame(pauseDur, sampleRate, numChannels));
604
+ }
605
+ pauseIdx++;
606
+ }
607
+
608
+ if (buf.length > 0) {
609
+ this.writeFn(buf);
610
+ }
611
+
612
+ this.accFrames = [];
613
+ this.resetPauseState();
614
+ }
615
+
616
+ async captureFrame(frame: AudioFrame): Promise<void> {
617
+ await super.captureFrame(frame);
618
+
619
+ if (this.recorderIO.recording) {
620
+ if (this._startedWallTime === undefined) {
621
+ this._startedWallTime = Date.now();
622
+ }
623
+ this.accFrames.push(frame);
624
+ }
625
+
626
+ if (this.nextInChain) {
627
+ await this.nextInChain.captureFrame(frame);
628
+ }
629
+ }
630
+
631
+ flush(): void {
632
+ super.flush();
633
+
634
+ if (this.nextInChain) {
635
+ this.nextInChain.flush();
636
+ }
637
+ }
638
+
639
+ clearBuffer(): void {
640
+ if (this.nextInChain) {
641
+ this.nextInChain.clearBuffer();
642
+ }
643
+ }
644
+ }
645
+
646
+ /**
647
+ * Create a silent audio frame with the given duration
648
+ */
649
+ function createSilenceFrame(duration: number, sampleRate: number, numChannels: number): AudioFrame {
650
+ const samples = Math.floor(duration * sampleRate);
651
+ const data = new Int16Array(samples * numChannels); // Zero-filled by default
652
+ return new AudioFrame(data, sampleRate, numChannels, samples);
653
+ }
654
+
655
+ /**
656
+ * Split an audio frame at the given position (in seconds)
657
+ * Returns [left, right] frames
658
+ */
659
+ function splitFrame(frame: AudioFrame, position: number): [AudioFrame, AudioFrame] {
660
+ if (position <= 0) {
661
+ const emptyFrame = new AudioFrame(new Int16Array(0), frame.sampleRate, frame.channels, 0);
662
+ return [emptyFrame, frame];
663
+ }
664
+
665
+ const frameDuration = frame.samplesPerChannel / frame.sampleRate;
666
+ if (position >= frameDuration) {
667
+ const emptyFrame = new AudioFrame(new Int16Array(0), frame.sampleRate, frame.channels, 0);
668
+ return [frame, emptyFrame];
669
+ }
670
+
671
+ // samplesNeeded is samples per channel (i.e., sample count in time)
672
+ const samplesNeeded = Math.floor(position * frame.sampleRate);
673
+ // Int16Array: each element is one sample, interleaved by channel
674
+ // So total elements = samplesPerChannel * channels
675
+ const numChannels = frame.channels;
676
+
677
+ const leftData = frame.data.slice(0, samplesNeeded * numChannels);
678
+ const rightData = frame.data.slice(samplesNeeded * numChannels);
679
+
680
+ const leftFrame = new AudioFrame(leftData, frame.sampleRate, frame.channels, samplesNeeded);
681
+
682
+ const rightFrame = new AudioFrame(
683
+ rightData,
684
+ frame.sampleRate,
685
+ frame.channels,
686
+ frame.samplesPerChannel - samplesNeeded,
687
+ );
688
+
689
+ return [leftFrame, rightFrame];
690
+ }