@whereby.com/assistant-sdk 0.0.0-canary-20250916140846 → 0.0.0-canary-20250917154617
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +340 -278
- package/dist/index.mjs +340 -278
- package/dist/legacy-esm.js +340 -278
- package/dist/tools.cjs +337 -275
- package/dist/tools.d.ts +1 -0
- package/package.json +4 -4
package/dist/index.cjs
CHANGED
|
@@ -99,301 +99,362 @@ const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
|
99
99
|
const BYTES_PER_SAMPLE = 2;
|
|
100
100
|
// 480 samples per 10ms frame at 48kHz
|
|
101
101
|
const FRAME_10MS_SAMPLES = 480;
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
116
|
-
}
|
|
117
|
-
({
|
|
118
|
-
enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
119
|
-
enqSamples: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
120
|
-
wroteFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
121
|
-
wroteSamples: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
122
|
-
lastFramesSeen: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
123
|
-
});
|
|
124
|
-
let slots = [];
|
|
125
|
-
let stopPacerFn = null;
|
|
126
|
-
let outputPacerState = null;
|
|
127
|
-
/**
|
|
128
|
-
* Simple linear interpolation resampler to convert audio to 48kHz.
|
|
129
|
-
* This handles the common case of 16kHz -> 48kHz (3x upsampling).
|
|
130
|
-
*/
|
|
131
|
-
function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
132
|
-
const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
|
|
133
|
-
const outputLength = Math.floor(inputFrames * ratio);
|
|
134
|
-
const output = new Int16Array(outputLength);
|
|
135
|
-
for (let i = 0; i < outputLength; i++) {
|
|
136
|
-
const inputIndex = i / ratio;
|
|
137
|
-
const index = Math.floor(inputIndex);
|
|
138
|
-
const fraction = inputIndex - index;
|
|
139
|
-
if (index + 1 < inputSamples.length) {
|
|
140
|
-
const sample1 = inputSamples[index];
|
|
141
|
-
const sample2 = inputSamples[index + 1];
|
|
142
|
-
output[i] = Math.round(sample1 + (sample2 - sample1) * fraction);
|
|
102
|
+
function createFfmpegMixer() {
|
|
103
|
+
const slotBuffers = new Map();
|
|
104
|
+
function appendAndDrainTo480(slot, newSamples) {
|
|
105
|
+
var _a;
|
|
106
|
+
const prev = (_a = slotBuffers.get(slot)) !== null && _a !== void 0 ? _a : new Int16Array(0);
|
|
107
|
+
const merged = new Int16Array(prev.length + newSamples.length);
|
|
108
|
+
merged.set(prev, 0);
|
|
109
|
+
merged.set(newSamples, prev.length);
|
|
110
|
+
let offset = 0;
|
|
111
|
+
while (merged.length - offset >= FRAME_10MS_SAMPLES) {
|
|
112
|
+
const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
|
|
113
|
+
enqueueFrame(slot, chunk); // always 480
|
|
114
|
+
offset += FRAME_10MS_SAMPLES;
|
|
143
115
|
}
|
|
144
|
-
|
|
145
|
-
|
|
116
|
+
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
117
|
+
}
|
|
118
|
+
({
|
|
119
|
+
enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
120
|
+
enqSamples: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
121
|
+
wroteFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
122
|
+
wroteSamples: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
123
|
+
lastFramesSeen: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
124
|
+
});
|
|
125
|
+
let slots = [];
|
|
126
|
+
let stopPacerFn = null;
|
|
127
|
+
let outputPacerState = null;
|
|
128
|
+
/**
|
|
129
|
+
* Simple linear interpolation resampler to convert audio to 48kHz.
|
|
130
|
+
* This handles the common case of 16kHz -> 48kHz (3x upsampling).
|
|
131
|
+
*/
|
|
132
|
+
function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
133
|
+
const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
|
|
134
|
+
const outputLength = Math.floor(inputFrames * ratio);
|
|
135
|
+
const output = new Int16Array(outputLength);
|
|
136
|
+
for (let i = 0; i < outputLength; i++) {
|
|
137
|
+
const inputIndex = i / ratio;
|
|
138
|
+
const index = Math.floor(inputIndex);
|
|
139
|
+
const fraction = inputIndex - index;
|
|
140
|
+
if (index + 1 < inputSamples.length) {
|
|
141
|
+
const sample1 = inputSamples[index];
|
|
142
|
+
const sample2 = inputSamples[index + 1];
|
|
143
|
+
output[i] = Math.round(sample1 + (sample2 - sample1) * fraction);
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
output[i] = inputSamples[Math.min(index, inputSamples.length - 1)];
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return output;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Enqueue an audio frame for paced delivery to the RTCAudioSource.
|
|
153
|
+
*/
|
|
154
|
+
function enqueueOutputFrame(samples) {
|
|
155
|
+
if (outputPacerState) {
|
|
156
|
+
outputPacerState.frameQueue.push(samples);
|
|
146
157
|
}
|
|
147
158
|
}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
|
|
214
|
-
if (t >= st.nextDueMs) {
|
|
215
|
-
const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
|
|
216
|
-
if (!w.write(buf)) {
|
|
217
|
-
// Just continue without adding drain listener - backpressure will naturally resolve
|
|
159
|
+
/**
|
|
160
|
+
* Start the audio pacer loop for all input slots in an FFmpeg process.
|
|
161
|
+
*
|
|
162
|
+
* The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
|
|
163
|
+
* real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
|
|
164
|
+
* arrive jittery, bursty, or with slightly different clocks.
|
|
165
|
+
*
|
|
166
|
+
* Key behavior:
|
|
167
|
+
* - Writes exactly one frame per period, on a shared wall-clock grid.
|
|
168
|
+
* - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
|
|
169
|
+
* never stalls.
|
|
170
|
+
* - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
|
|
171
|
+
* - Honors Node stream backpressure (`write()` return false) without breaking
|
|
172
|
+
* the timing grid.
|
|
173
|
+
*
|
|
174
|
+
* This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
|
|
175
|
+
* can mix them without slow-downs or drift.
|
|
176
|
+
*
|
|
177
|
+
* Call this once right after spawning FFmpeg:
|
|
178
|
+
* ```ts
|
|
179
|
+
* const ff = spawnFFmpegProcess();
|
|
180
|
+
* startPacer(ff, PARTICIPANT_SLOTS);
|
|
181
|
+
* ```
|
|
182
|
+
*
|
|
183
|
+
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
184
|
+
*
|
|
185
|
+
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
186
|
+
* @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
|
|
187
|
+
*/
|
|
188
|
+
function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
189
|
+
if (stopPacerFn) {
|
|
190
|
+
stopPacerFn();
|
|
191
|
+
stopPacerFn = null;
|
|
192
|
+
}
|
|
193
|
+
const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
|
|
194
|
+
const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
|
|
195
|
+
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
|
|
196
|
+
const t0 = nowMs();
|
|
197
|
+
slots = Array.from({ length: slotCount }, () => ({
|
|
198
|
+
q: [],
|
|
199
|
+
lastFrames: FRAME_10MS_SAMPLES, // keep constant
|
|
200
|
+
nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
|
|
201
|
+
}));
|
|
202
|
+
outputPacerState = {
|
|
203
|
+
frameQueue: [],
|
|
204
|
+
nextDueMs: t0 + outputFrameMs,
|
|
205
|
+
rtcAudioSource,
|
|
206
|
+
onAudioStreamReady,
|
|
207
|
+
didEmitReadyEvent: false,
|
|
208
|
+
};
|
|
209
|
+
const iv = setInterval(() => {
|
|
210
|
+
const t = nowMs();
|
|
211
|
+
for (let s = 0; s < slotCount; s++) {
|
|
212
|
+
const st = slots[s];
|
|
213
|
+
const w = writers[s];
|
|
214
|
+
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
|
|
215
|
+
if (t >= st.nextDueMs) {
|
|
216
|
+
const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
|
|
217
|
+
if (!w.write(buf)) {
|
|
218
|
+
// Just continue without adding drain listener - backpressure will naturally resolve
|
|
219
|
+
const late = t - st.nextDueMs;
|
|
220
|
+
const steps = Math.max(1, Math.ceil(late / frameMs));
|
|
221
|
+
st.nextDueMs += steps * frameMs;
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
218
224
|
const late = t - st.nextDueMs;
|
|
219
225
|
const steps = Math.max(1, Math.ceil(late / frameMs));
|
|
220
226
|
st.nextDueMs += steps * frameMs;
|
|
221
|
-
continue;
|
|
222
227
|
}
|
|
223
|
-
const late = t - st.nextDueMs;
|
|
224
|
-
const steps = Math.max(1, Math.ceil(late / frameMs));
|
|
225
|
-
st.nextDueMs += steps * frameMs;
|
|
226
228
|
}
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
229
|
+
if (!outputPacerState)
|
|
230
|
+
return;
|
|
231
|
+
// Handle output pacer for RTCAudioSource
|
|
232
|
+
const state = outputPacerState;
|
|
233
|
+
if (t >= state.nextDueMs) {
|
|
234
|
+
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
|
|
235
|
+
if (!state.didEmitReadyEvent) {
|
|
236
|
+
state.onAudioStreamReady();
|
|
237
|
+
state.didEmitReadyEvent = true;
|
|
238
|
+
}
|
|
239
|
+
state.rtcAudioSource.onData({
|
|
240
|
+
samples: samples,
|
|
241
|
+
sampleRate: STREAM_INPUT_SAMPLE_RATE_IN_HZ,
|
|
242
|
+
});
|
|
243
|
+
const late = t - state.nextDueMs;
|
|
244
|
+
const steps = Math.max(1, Math.ceil(late / outputFrameMs));
|
|
245
|
+
state.nextDueMs += steps * outputFrameMs;
|
|
237
246
|
}
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
247
|
+
}, 5);
|
|
248
|
+
stopPacerFn = () => clearInterval(iv);
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Stop the audio pacer loop and clear all input slots.
|
|
252
|
+
* Call this before killing the FFmpeg process to ensure clean shutdown.
|
|
253
|
+
*/
|
|
254
|
+
function stopPacer() {
|
|
255
|
+
if (stopPacerFn)
|
|
256
|
+
stopPacerFn();
|
|
257
|
+
stopPacerFn = null;
|
|
258
|
+
slots = [];
|
|
259
|
+
slotBuffers.clear();
|
|
260
|
+
outputPacerState = null;
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Queue a live frame for a given slot (0..N-1).
|
|
264
|
+
* Auto-resnaps the slot's schedule if the frame size (480/960) changes.
|
|
265
|
+
*/
|
|
266
|
+
function enqueueFrame(slot, samples, numberOfFrames) {
|
|
267
|
+
const st = slots[slot];
|
|
268
|
+
if (!st)
|
|
269
|
+
return;
|
|
270
|
+
const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
|
|
271
|
+
st.q.push(buf);
|
|
272
|
+
}
|
|
273
|
+
/**
|
|
274
|
+
* Clear the audio queue for a specific slot when a participant leaves.
|
|
275
|
+
* This prevents stale audio data from continuing to play after disconnect.
|
|
276
|
+
*/
|
|
277
|
+
function clearSlotQueue(slot) {
|
|
278
|
+
const st = slots[slot];
|
|
279
|
+
if (st) {
|
|
280
|
+
st.q = [];
|
|
281
|
+
slotBuffers.delete(slot);
|
|
282
|
+
const now = Number(process.hrtime.bigint()) / 1e6;
|
|
283
|
+
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
284
|
+
st.nextDueMs = now + frameMs;
|
|
245
285
|
}
|
|
246
|
-
}, 5);
|
|
247
|
-
stopPacerFn = () => clearInterval(iv);
|
|
248
|
-
}
|
|
249
|
-
/**
|
|
250
|
-
* Stop the audio pacer loop and clear all input slots.
|
|
251
|
-
* Call this before killing the FFmpeg process to ensure clean shutdown.
|
|
252
|
-
*/
|
|
253
|
-
function stopPacer() {
|
|
254
|
-
if (stopPacerFn)
|
|
255
|
-
stopPacerFn();
|
|
256
|
-
stopPacerFn = null;
|
|
257
|
-
slots = [];
|
|
258
|
-
}
|
|
259
|
-
/**
|
|
260
|
-
* Queue a live frame for a given slot (0..N-1).
|
|
261
|
-
* Auto-resnaps the slot's schedule if the frame size (480/960) changes.
|
|
262
|
-
*/
|
|
263
|
-
function enqueueFrame(slot, samples, numberOfFrames) {
|
|
264
|
-
const st = slots[slot];
|
|
265
|
-
if (!st)
|
|
266
|
-
return;
|
|
267
|
-
const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
|
|
268
|
-
st.q.push(buf);
|
|
269
|
-
}
|
|
270
|
-
/**
|
|
271
|
-
* Clear the audio queue for a specific slot when a participant leaves.
|
|
272
|
-
* This prevents stale audio data from continuing to play after disconnect.
|
|
273
|
-
*/
|
|
274
|
-
function clearSlotQueue(slot) {
|
|
275
|
-
const st = slots[slot];
|
|
276
|
-
if (st) {
|
|
277
|
-
st.q = [];
|
|
278
|
-
const now = Number(process.hrtime.bigint()) / 1e6;
|
|
279
|
-
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
280
|
-
st.nextDueMs = now + frameMs;
|
|
281
286
|
}
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
ffArgs.push("-f", "s16le", "-ar", String(SR), "-ac", "1", "-i", `pipe:${3 + i}`);
|
|
294
|
-
}
|
|
295
|
-
const pre = [];
|
|
296
|
-
for (let i = 0; i < N; i++) {
|
|
297
|
-
pre.push(`[${i}:a]aresample=async=1:first_pts=0,asetpts=N/SR/TB[a${i}]`);
|
|
298
|
-
}
|
|
299
|
-
const labels = Array.from({ length: N }, (_, i) => `[a${i}]`).join("");
|
|
300
|
-
const amix = `${labels}amix=inputs=${N}:duration=longest:dropout_transition=250:normalize=0[mix]`;
|
|
301
|
-
const filter = `${pre.join(";")};${amix}`;
|
|
302
|
-
ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
|
|
303
|
-
return ffArgs;
|
|
304
|
-
}
|
|
305
|
-
/**
|
|
306
|
-
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
307
|
-
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
308
|
-
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
309
|
-
* The process will log its output to stderr.
|
|
310
|
-
* @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
|
|
311
|
-
* @return The spawned FFmpeg process.
|
|
312
|
-
*/
|
|
313
|
-
function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
314
|
-
const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
|
|
315
|
-
const args = getFFmpegArguments();
|
|
316
|
-
const ffmpegProcess = child_process.spawn("ffmpeg", args, { stdio });
|
|
317
|
-
startPacer(ffmpegProcess, PARTICIPANT_SLOTS, rtcAudioSource, onAudioStreamReady);
|
|
318
|
-
ffmpegProcess.stderr.setEncoding("utf8");
|
|
319
|
-
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
320
|
-
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
321
|
-
let audioBuffer = Buffer.alloc(0);
|
|
322
|
-
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
|
|
323
|
-
ffmpegProcess.stdout.on("data", (chunk) => {
|
|
324
|
-
audioBuffer = Buffer.concat([audioBuffer, chunk]);
|
|
325
|
-
while (audioBuffer.length >= FRAME_SIZE_BYTES) {
|
|
326
|
-
const frameData = audioBuffer.subarray(0, FRAME_SIZE_BYTES);
|
|
327
|
-
const samples = new Int16Array(FRAME_10MS_SAMPLES);
|
|
328
|
-
for (let i = 0; i < FRAME_10MS_SAMPLES; i++) {
|
|
329
|
-
samples[i] = frameData.readInt16LE(i * 2);
|
|
330
|
-
}
|
|
331
|
-
enqueueOutputFrame(samples);
|
|
332
|
-
audioBuffer = audioBuffer.subarray(FRAME_SIZE_BYTES);
|
|
287
|
+
/**
|
|
288
|
+
* Get the FFmpeg arguments for debugging, which writes each participant's audio to a separate WAV file
|
|
289
|
+
* and also mixes them into a single WAV file.
|
|
290
|
+
* This is useful for inspecting the audio quality and timing of each participant.
|
|
291
|
+
*/
|
|
292
|
+
function getFFmpegArgumentsDebug() {
|
|
293
|
+
const N = PARTICIPANT_SLOTS;
|
|
294
|
+
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
295
|
+
const ffArgs = [];
|
|
296
|
+
for (let i = 0; i < N; i++) {
|
|
297
|
+
ffArgs.push("-f", "s16le", "-ar", String(SR), "-ac", "1", "-i", `pipe:${3 + i}`);
|
|
333
298
|
}
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
}
|
|
337
|
-
/**
|
|
338
|
-
* Write audio data from a MediaStreamTrack to the FFmpeg process.
|
|
339
|
-
* This function creates an AudioSink for the track and sets up a data handler
|
|
340
|
-
* that enqueues audio frames into the pacer.
|
|
341
|
-
*
|
|
342
|
-
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
343
|
-
* @param slot The participant slot number (0..N-1) to which this track belongs.
|
|
344
|
-
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
345
|
-
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
346
|
-
*/
|
|
347
|
-
function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
348
|
-
const writer = ffmpegProcess.stdio[3 + slot];
|
|
349
|
-
const sink = new AudioSink(audioTrack);
|
|
350
|
-
const unsubscribe = sink.subscribe(({ samples, sampleRate: sr, channelCount: ch, bitsPerSample, numberOfFrames }) => {
|
|
351
|
-
if (ch !== 1 || bitsPerSample !== 16)
|
|
352
|
-
return;
|
|
353
|
-
let out = samples;
|
|
354
|
-
if (sr !== STREAM_INPUT_SAMPLE_RATE_IN_HZ) {
|
|
355
|
-
const resampled = resampleTo48kHz(samples, sr, numberOfFrames !== null && numberOfFrames !== void 0 ? numberOfFrames : samples.length);
|
|
356
|
-
out = resampled;
|
|
299
|
+
const pre = [];
|
|
300
|
+
for (let i = 0; i < N; i++) {
|
|
301
|
+
pre.push(`[${i}:a]aresample=async=0:first_pts=0,asetpts=PTS-STARTPTS,asplit=2[a${i}tap][a${i}mix]`);
|
|
357
302
|
}
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
sink.stop();
|
|
303
|
+
const mixInputs = Array.from({ length: N }, (_, i) => `[a${i}mix]`).join("");
|
|
304
|
+
const filter = `${pre.join(";")};${mixInputs}amix=inputs=${N}:duration=first:dropout_transition=0:normalize=0[mix]`;
|
|
305
|
+
ffArgs.push("-hide_banner", "-nostats", "-loglevel", "info", "-y", "-filter_complex", filter);
|
|
306
|
+
for (let i = 0; i < N; i++) {
|
|
307
|
+
ffArgs.push("-map", `[a${i}tap]`, "-f", "wav", "-c:a", "pcm_s16le", `pre${i}.wav`);
|
|
364
308
|
}
|
|
365
|
-
|
|
366
|
-
|
|
309
|
+
ffArgs.push("-map", "[mix]", "-f", "wav", "-c:a", "pcm_s16le", "mixed.wav");
|
|
310
|
+
return ffArgs;
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
314
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
315
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
316
|
+
*/
|
|
317
|
+
function getFFmpegArguments() {
|
|
318
|
+
const N = PARTICIPANT_SLOTS;
|
|
319
|
+
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
320
|
+
const ffArgs = [];
|
|
321
|
+
for (let i = 0; i < N; i++) {
|
|
322
|
+
ffArgs.push("-f", "s16le", "-ar", String(SR), "-ac", "1", "-i", `pipe:${3 + i}`);
|
|
367
323
|
}
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
}
|
|
371
|
-
/**
|
|
372
|
-
* Stop the FFmpeg process and clean up all resources.
|
|
373
|
-
* This function will unpipe the stdout, end all writable streams for each participant slot,
|
|
374
|
-
* and kill the FFmpeg process.
|
|
375
|
-
* @param ffmpegProcess The FFmpeg process to stop.
|
|
376
|
-
*/
|
|
377
|
-
function stopFFmpegProcess(ffmpegProcess) {
|
|
378
|
-
stopPacer();
|
|
379
|
-
if (ffmpegProcess && !ffmpegProcess.killed) {
|
|
380
|
-
try {
|
|
381
|
-
ffmpegProcess.stdout.unpipe();
|
|
382
|
-
}
|
|
383
|
-
catch (_a) {
|
|
384
|
-
console.error("Failed to unpipe ffmpeg stdout");
|
|
324
|
+
const pre = [];
|
|
325
|
+
for (let i = 0; i < N; i++) {
|
|
326
|
+
pre.push(`[${i}:a]aresample=async=0:first_pts=0,asetpts=PTS-STARTPTS[a${i}]`);
|
|
385
327
|
}
|
|
386
|
-
|
|
387
|
-
|
|
328
|
+
const labels = Array.from({ length: N }, (_, i) => `[a${i}]`).join("");
|
|
329
|
+
const amix = `${labels}amix=inputs=${N}:duration=first:dropout_transition=0:normalize=0[mix]`;
|
|
330
|
+
const filter = `${pre.join(";")};${amix}`;
|
|
331
|
+
ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
|
|
332
|
+
return ffArgs;
|
|
333
|
+
}
|
|
334
|
+
/*
|
|
335
|
+
* Spawn a new FFmpeg process for debugging purposes.
|
|
336
|
+
* This will write each participant's audio to a separate WAV file and also mix them into a single WAV file.
|
|
337
|
+
* The output files will be named pre0.wav, pre1.wav, ..., and mixed.wav.
|
|
338
|
+
* The process will log its output to stderr.
|
|
339
|
+
* @return The spawned FFmpeg process.
|
|
340
|
+
*/
|
|
341
|
+
function spawnFFmpegProcessDebug(rtcAudioSource, onAudioStreamReady) {
|
|
342
|
+
const stdio = ["ignore", "ignore", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
|
|
343
|
+
const args = getFFmpegArgumentsDebug();
|
|
344
|
+
const ffmpegProcess = child_process.spawn("ffmpeg", args, { stdio });
|
|
345
|
+
startPacer(ffmpegProcess, PARTICIPANT_SLOTS, rtcAudioSource, onAudioStreamReady);
|
|
346
|
+
ffmpegProcess.stderr.setEncoding("utf8");
|
|
347
|
+
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
348
|
+
ffmpegProcess.on("error", () => console.error("FFmpeg process error (debug): is ffmpeg installed?"));
|
|
349
|
+
return ffmpegProcess;
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
353
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
354
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
355
|
+
* The process will log its output to stderr.
|
|
356
|
+
* @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
|
|
357
|
+
* @return The spawned FFmpeg process.
|
|
358
|
+
*/
|
|
359
|
+
function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
360
|
+
const stdio = ["pipe", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
|
|
361
|
+
const args = getFFmpegArguments();
|
|
362
|
+
const ffmpegProcess = child_process.spawn("ffmpeg", args, { stdio });
|
|
363
|
+
startPacer(ffmpegProcess, PARTICIPANT_SLOTS, rtcAudioSource, onAudioStreamReady);
|
|
364
|
+
ffmpegProcess.stderr.setEncoding("utf8");
|
|
365
|
+
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
366
|
+
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
367
|
+
let audioBuffer = Buffer.alloc(0);
|
|
368
|
+
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
|
|
369
|
+
ffmpegProcess.stdout.on("data", (chunk) => {
|
|
370
|
+
audioBuffer = Buffer.concat([audioBuffer, chunk]);
|
|
371
|
+
while (audioBuffer.length >= FRAME_SIZE_BYTES) {
|
|
372
|
+
const frameData = audioBuffer.subarray(0, FRAME_SIZE_BYTES);
|
|
373
|
+
const samples = new Int16Array(FRAME_10MS_SAMPLES);
|
|
374
|
+
for (let i = 0; i < FRAME_10MS_SAMPLES; i++) {
|
|
375
|
+
samples[i] = frameData.readInt16LE(i * 2);
|
|
376
|
+
}
|
|
377
|
+
enqueueOutputFrame(samples);
|
|
378
|
+
audioBuffer = audioBuffer.subarray(FRAME_SIZE_BYTES);
|
|
379
|
+
}
|
|
380
|
+
});
|
|
381
|
+
return ffmpegProcess;
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Write audio data from a MediaStreamTrack to the FFmpeg process.
|
|
385
|
+
* This function creates an AudioSink for the track and sets up a data handler
|
|
386
|
+
* that enqueues audio frames into the pacer.
|
|
387
|
+
*
|
|
388
|
+
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
389
|
+
* @param slot The participant slot number (0..N-1) to which this track belongs.
|
|
390
|
+
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
391
|
+
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
392
|
+
*/
|
|
393
|
+
function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
394
|
+
const writer = ffmpegProcess.stdio[3 + slot];
|
|
395
|
+
const sink = new AudioSink(audioTrack);
|
|
396
|
+
const unsubscribe = sink.subscribe(({ samples, sampleRate: sr, channelCount: ch, bitsPerSample, numberOfFrames }) => {
|
|
397
|
+
if (ch !== 1 || bitsPerSample !== 16)
|
|
398
|
+
return;
|
|
399
|
+
let out = samples;
|
|
400
|
+
if (sr !== STREAM_INPUT_SAMPLE_RATE_IN_HZ) {
|
|
401
|
+
const resampled = resampleTo48kHz(samples, sr, numberOfFrames !== null && numberOfFrames !== void 0 ? numberOfFrames : samples.length);
|
|
402
|
+
out = resampled;
|
|
403
|
+
}
|
|
404
|
+
appendAndDrainTo480(slot, out);
|
|
405
|
+
});
|
|
406
|
+
const stop = () => {
|
|
407
|
+
try {
|
|
408
|
+
unsubscribe();
|
|
409
|
+
sink.stop();
|
|
410
|
+
}
|
|
411
|
+
catch (_a) {
|
|
412
|
+
console.error("Failed to stop AudioSink");
|
|
413
|
+
}
|
|
414
|
+
};
|
|
415
|
+
return { sink, writer, stop };
|
|
416
|
+
}
|
|
417
|
+
/**
|
|
418
|
+
* Stop the FFmpeg process and clean up all resources.
|
|
419
|
+
* This function will unpipe the stdout, end all writable streams for each participant slot,
|
|
420
|
+
* and kill the FFmpeg process.
|
|
421
|
+
* @param ffmpegProcess The FFmpeg process to stop.
|
|
422
|
+
*/
|
|
423
|
+
function stopFFmpegProcess(ffmpegProcess) {
|
|
424
|
+
var _a, _b;
|
|
425
|
+
stopPacer();
|
|
426
|
+
if (ffmpegProcess && !ffmpegProcess.killed) {
|
|
427
|
+
try {
|
|
428
|
+
ffmpegProcess.stdout.unpipe();
|
|
429
|
+
}
|
|
430
|
+
catch (_c) {
|
|
431
|
+
console.error("Failed to unpipe ffmpeg stdout");
|
|
432
|
+
}
|
|
433
|
+
for (let i = 0; i < PARTICIPANT_SLOTS; i++) {
|
|
434
|
+
const w = ffmpegProcess.stdio[3 + i];
|
|
435
|
+
try {
|
|
436
|
+
w.end();
|
|
437
|
+
}
|
|
438
|
+
catch (_d) {
|
|
439
|
+
console.error("Failed to end ffmpeg writable stream");
|
|
440
|
+
}
|
|
441
|
+
}
|
|
388
442
|
try {
|
|
389
|
-
|
|
443
|
+
(_a = ffmpegProcess.stdin) === null || _a === void 0 ? void 0 : _a.write("q\n");
|
|
444
|
+
(_b = ffmpegProcess.stdin) === null || _b === void 0 ? void 0 : _b.end();
|
|
390
445
|
}
|
|
391
|
-
catch (
|
|
392
|
-
console.error("Failed to end ffmpeg
|
|
446
|
+
catch (_e) {
|
|
447
|
+
console.error("Failed to end ffmpeg stdin");
|
|
393
448
|
}
|
|
394
449
|
}
|
|
395
|
-
ffmpegProcess.kill("SIGTERM");
|
|
396
450
|
}
|
|
451
|
+
return {
|
|
452
|
+
spawnFFmpegProcess,
|
|
453
|
+
spawnFFmpegProcessDebug,
|
|
454
|
+
writeAudioDataToFFmpeg,
|
|
455
|
+
stopFFmpegProcess,
|
|
456
|
+
clearSlotQueue,
|
|
457
|
+
};
|
|
397
458
|
}
|
|
398
459
|
|
|
399
460
|
class AudioMixer extends EventEmitter.EventEmitter {
|
|
@@ -404,6 +465,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
404
465
|
this.rtcAudioSource = null;
|
|
405
466
|
this.participantSlots = new Map();
|
|
406
467
|
this.activeSlots = {};
|
|
468
|
+
this.mixer = createFfmpegMixer();
|
|
407
469
|
this.setupMediaStream();
|
|
408
470
|
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
409
471
|
this.onStreamReady = onStreamReady;
|
|
@@ -422,7 +484,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
422
484
|
return;
|
|
423
485
|
}
|
|
424
486
|
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
425
|
-
this.ffmpegProcess = spawnFFmpegProcess(this.rtcAudioSource, this.onStreamReady);
|
|
487
|
+
this.ffmpegProcess = this.mixer.spawnFFmpegProcess(this.rtcAudioSource, this.onStreamReady);
|
|
426
488
|
}
|
|
427
489
|
for (const p of participants)
|
|
428
490
|
this.attachParticipantIfNeeded(p);
|
|
@@ -435,7 +497,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
435
497
|
}
|
|
436
498
|
stopAudioMixer() {
|
|
437
499
|
if (this.ffmpegProcess) {
|
|
438
|
-
stopFFmpegProcess(this.ffmpegProcess);
|
|
500
|
+
this.mixer.stopFFmpegProcess(this.ffmpegProcess);
|
|
439
501
|
this.ffmpegProcess = null;
|
|
440
502
|
}
|
|
441
503
|
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
@@ -488,7 +550,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
488
550
|
}
|
|
489
551
|
this.activeSlots[slot] = undefined;
|
|
490
552
|
}
|
|
491
|
-
const { sink, writer, stop } = writeAudioDataToFFmpeg(this.ffmpegProcess, slot, audioTrack);
|
|
553
|
+
const { sink, writer, stop } = this.mixer.writeAudioDataToFFmpeg(this.ffmpegProcess, slot, audioTrack);
|
|
492
554
|
this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id };
|
|
493
555
|
(_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.detachParticipant(participantId));
|
|
494
556
|
}
|
|
@@ -507,7 +569,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
507
569
|
this.activeSlots[slot] = undefined;
|
|
508
570
|
}
|
|
509
571
|
// Clear any queued audio data for this slot to prevent stale audio
|
|
510
|
-
clearSlotQueue(slot);
|
|
572
|
+
this.mixer.clearSlotQueue(slot);
|
|
511
573
|
this.participantSlots.set(slot, "");
|
|
512
574
|
}
|
|
513
575
|
}
|