@whereby.com/assistant-sdk 0.0.0-canary-20250912102624 → 0.0.0-canary-20250912142319
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +99 -11
- package/dist/index.mjs +99 -11
- package/dist/legacy-esm.js +99 -11
- package/dist/polyfills.cjs +7 -1
- package/dist/tools.cjs +94 -7
- package/package.json +2 -2
package/dist/index.cjs
CHANGED
|
@@ -88,9 +88,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
|
|
|
88
88
|
}
|
|
89
89
|
}
|
|
90
90
|
|
|
91
|
+
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
92
|
+
// participants to these slots based on mute/unmute state.
|
|
91
93
|
const PARTICIPANT_SLOTS = 20;
|
|
94
|
+
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
95
|
+
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
92
96
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
93
97
|
const BYTES_PER_SAMPLE = 2;
|
|
98
|
+
// 480 samples per 10ms frame at 48kHz
|
|
94
99
|
const FRAME_10MS_SAMPLES = 480;
|
|
95
100
|
const slotBuffers = new Map();
|
|
96
101
|
function appendAndDrainTo480(slot, newSamples) {
|
|
@@ -102,10 +107,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
102
107
|
let offset = 0;
|
|
103
108
|
while (merged.length - offset >= FRAME_10MS_SAMPLES) {
|
|
104
109
|
const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
|
|
105
|
-
enqueueFrame(slot, chunk);
|
|
110
|
+
enqueueFrame(slot, chunk); // always 480
|
|
106
111
|
offset += FRAME_10MS_SAMPLES;
|
|
107
112
|
}
|
|
108
|
-
slotBuffers.set(slot, merged.subarray(offset));
|
|
113
|
+
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
109
114
|
}
|
|
110
115
|
({
|
|
111
116
|
enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
@@ -117,6 +122,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
117
122
|
let slots = [];
|
|
118
123
|
let stopPacerFn = null;
|
|
119
124
|
let outputPacerState = null;
|
|
125
|
+
/**
|
|
126
|
+
* Simple linear interpolation resampler to convert audio to 48kHz.
|
|
127
|
+
* This handles the common case of 16kHz -> 48kHz (3x upsampling).
|
|
128
|
+
*/
|
|
120
129
|
function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
121
130
|
const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
|
|
122
131
|
const outputLength = Math.floor(inputFrames * ratio);
|
|
@@ -136,11 +145,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
|
136
145
|
}
|
|
137
146
|
return output;
|
|
138
147
|
}
|
|
148
|
+
/**
|
|
149
|
+
* Enqueue an audio frame for paced delivery to the RTCAudioSource.
|
|
150
|
+
*/
|
|
139
151
|
function enqueueOutputFrame(samples) {
|
|
140
152
|
if (outputPacerState) {
|
|
141
153
|
outputPacerState.frameQueue.push(samples);
|
|
142
154
|
}
|
|
143
155
|
}
|
|
156
|
+
/**
|
|
157
|
+
* Start the audio pacer loop for all input slots in an FFmpeg process.
|
|
158
|
+
*
|
|
159
|
+
* The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
|
|
160
|
+
* real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
|
|
161
|
+
* arrive jittery, bursty, or with slightly different clocks.
|
|
162
|
+
*
|
|
163
|
+
* Key behavior:
|
|
164
|
+
* - Writes exactly one frame per period, on a shared wall-clock grid.
|
|
165
|
+
* - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
|
|
166
|
+
* never stalls.
|
|
167
|
+
* - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
|
|
168
|
+
* - Honors Node stream backpressure (`write()` return false) without breaking
|
|
169
|
+
* the timing grid.
|
|
170
|
+
*
|
|
171
|
+
* This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
|
|
172
|
+
* can mix them without slow-downs or drift.
|
|
173
|
+
*
|
|
174
|
+
* Call this once right after spawning FFmpeg:
|
|
175
|
+
* ```ts
|
|
176
|
+
* const ff = spawnFFmpegProcess();
|
|
177
|
+
* startPacer(ff, PARTICIPANT_SLOTS);
|
|
178
|
+
* ```
|
|
179
|
+
*
|
|
180
|
+
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
181
|
+
*
|
|
182
|
+
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
183
|
+
* @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
|
|
184
|
+
*/
|
|
144
185
|
function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
145
186
|
if (stopPacerFn) {
|
|
146
187
|
stopPacerFn();
|
|
@@ -148,11 +189,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
148
189
|
}
|
|
149
190
|
const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
|
|
150
191
|
const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
|
|
151
|
-
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
192
|
+
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
|
|
152
193
|
const t0 = nowMs();
|
|
153
194
|
slots = Array.from({ length: slotCount }, () => ({
|
|
154
195
|
q: [],
|
|
155
|
-
lastFrames: FRAME_10MS_SAMPLES,
|
|
196
|
+
lastFrames: FRAME_10MS_SAMPLES, // keep constant
|
|
156
197
|
nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
|
|
157
198
|
}));
|
|
158
199
|
outputPacerState = {
|
|
@@ -167,10 +208,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
167
208
|
for (let s = 0; s < slotCount; s++) {
|
|
168
209
|
const st = slots[s];
|
|
169
210
|
const w = writers[s];
|
|
170
|
-
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
211
|
+
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
|
|
171
212
|
if (t >= st.nextDueMs) {
|
|
172
213
|
const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
|
|
173
214
|
if (!w.write(buf)) {
|
|
215
|
+
// Just continue without adding drain listener - backpressure will naturally resolve
|
|
174
216
|
const late = t - st.nextDueMs;
|
|
175
217
|
const steps = Math.max(1, Math.ceil(late / frameMs));
|
|
176
218
|
st.nextDueMs += steps * frameMs;
|
|
@@ -183,9 +225,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
183
225
|
}
|
|
184
226
|
if (!outputPacerState)
|
|
185
227
|
return;
|
|
228
|
+
// Handle output pacer for RTCAudioSource
|
|
186
229
|
const state = outputPacerState;
|
|
187
230
|
if (t >= state.nextDueMs) {
|
|
188
|
-
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
|
|
231
|
+
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
|
|
189
232
|
if (!state.didEmitReadyEvent) {
|
|
190
233
|
state.onAudioStreamReady();
|
|
191
234
|
state.didEmitReadyEvent = true;
|
|
@@ -201,12 +244,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
201
244
|
}, 5);
|
|
202
245
|
stopPacerFn = () => clearInterval(iv);
|
|
203
246
|
}
|
|
247
|
+
/**
|
|
248
|
+
* Stop the audio pacer loop and clear all input slots.
|
|
249
|
+
* Call this before killing the FFmpeg process to ensure clean shutdown.
|
|
250
|
+
*/
|
|
204
251
|
function stopPacer() {
|
|
205
252
|
if (stopPacerFn)
|
|
206
253
|
stopPacerFn();
|
|
207
254
|
stopPacerFn = null;
|
|
208
255
|
slots = [];
|
|
209
256
|
}
|
|
257
|
+
/**
|
|
258
|
+
* Queue a live frame for a given slot (0..N-1).
|
|
259
|
+
* Auto-resnaps the slot's schedule if the frame size (480/960) changes.
|
|
260
|
+
*/
|
|
210
261
|
function enqueueFrame(slot, samples, numberOfFrames) {
|
|
211
262
|
const st = slots[slot];
|
|
212
263
|
if (!st)
|
|
@@ -214,6 +265,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
|
|
|
214
265
|
const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
|
|
215
266
|
st.q.push(buf);
|
|
216
267
|
}
|
|
268
|
+
/**
|
|
269
|
+
* Clear the audio queue for a specific slot when a participant leaves.
|
|
270
|
+
* This prevents stale audio data from continuing to play after disconnect.
|
|
271
|
+
*/
|
|
217
272
|
function clearSlotQueue(slot) {
|
|
218
273
|
const st = slots[slot];
|
|
219
274
|
if (st) {
|
|
@@ -223,6 +278,11 @@ function clearSlotQueue(slot) {
|
|
|
223
278
|
st.nextDueMs = now + frameMs;
|
|
224
279
|
}
|
|
225
280
|
}
|
|
281
|
+
/**
|
|
282
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
283
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
284
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
285
|
+
*/
|
|
226
286
|
function getFFmpegArguments() {
|
|
227
287
|
const N = PARTICIPANT_SLOTS;
|
|
228
288
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
@@ -240,6 +300,14 @@ function getFFmpegArguments() {
|
|
|
240
300
|
ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
|
|
241
301
|
return ffArgs;
|
|
242
302
|
}
|
|
303
|
+
/**
|
|
304
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
305
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
306
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
307
|
+
* The process will log its output to stderr.
|
|
308
|
+
* @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
|
|
309
|
+
* @return The spawned FFmpeg process.
|
|
310
|
+
*/
|
|
243
311
|
function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
244
312
|
const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
|
|
245
313
|
const args = getFFmpegArguments();
|
|
@@ -249,7 +317,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
249
317
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
250
318
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
251
319
|
let audioBuffer = Buffer.alloc(0);
|
|
252
|
-
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
|
|
320
|
+
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
|
|
253
321
|
ffmpegProcess.stdout.on("data", (chunk) => {
|
|
254
322
|
audioBuffer = Buffer.concat([audioBuffer, chunk]);
|
|
255
323
|
while (audioBuffer.length >= FRAME_SIZE_BYTES) {
|
|
@@ -264,6 +332,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
264
332
|
});
|
|
265
333
|
return ffmpegProcess;
|
|
266
334
|
}
|
|
335
|
+
/**
|
|
336
|
+
* Write audio data from a MediaStreamTrack to the FFmpeg process.
|
|
337
|
+
* This function creates an AudioSink for the track and sets up a data handler
|
|
338
|
+
* that enqueues audio frames into the pacer.
|
|
339
|
+
*
|
|
340
|
+
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
341
|
+
* @param slot The participant slot number (0..N-1) to which this track belongs.
|
|
342
|
+
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
343
|
+
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
344
|
+
*/
|
|
267
345
|
function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
268
346
|
const writer = ffmpegProcess.stdio[3 + slot];
|
|
269
347
|
const sink = new AudioSink(audioTrack);
|
|
@@ -288,6 +366,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
|
288
366
|
};
|
|
289
367
|
return { sink, writer, stop };
|
|
290
368
|
}
|
|
369
|
+
/**
|
|
370
|
+
* Stop the FFmpeg process and clean up all resources.
|
|
371
|
+
* This function will unpipe the stdout, end all writable streams for each participant slot,
|
|
372
|
+
* and kill the FFmpeg process.
|
|
373
|
+
* @param ffmpegProcess The FFmpeg process to stop.
|
|
374
|
+
*/
|
|
291
375
|
function stopFFmpegProcess(ffmpegProcess) {
|
|
292
376
|
stopPacer();
|
|
293
377
|
if (ffmpegProcess && !ffmpegProcess.killed) {
|
|
@@ -341,6 +425,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
341
425
|
for (const p of participants)
|
|
342
426
|
this.attachParticipantIfNeeded(p);
|
|
343
427
|
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
428
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
344
429
|
for (const [slot, pid] of this.participantSlots) {
|
|
345
430
|
if (pid && !liveIds.has(pid))
|
|
346
431
|
this.detachParticipant(pid);
|
|
@@ -353,6 +438,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
353
438
|
}
|
|
354
439
|
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
355
440
|
this.activeSlots = {};
|
|
441
|
+
// Recreate the media stream to avoid stale references
|
|
356
442
|
this.setupMediaStream();
|
|
357
443
|
}
|
|
358
444
|
slotForParticipant(participantId) {
|
|
@@ -418,6 +504,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
418
504
|
}
|
|
419
505
|
this.activeSlots[slot] = undefined;
|
|
420
506
|
}
|
|
507
|
+
// Clear any queued audio data for this slot to prevent stale audio
|
|
421
508
|
clearSlotQueue(slot);
|
|
422
509
|
this.participantSlots.set(slot, "");
|
|
423
510
|
}
|
|
@@ -545,10 +632,7 @@ dotenv__namespace.config();
|
|
|
545
632
|
const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
|
|
546
633
|
function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
547
634
|
let wherebyDomain;
|
|
548
|
-
if (
|
|
549
|
-
wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
|
|
550
|
-
}
|
|
551
|
-
else {
|
|
635
|
+
if (IS_LOCAL === "true") {
|
|
552
636
|
const ifaceAddrs = os.networkInterfaces()[BIND_INTERFACE];
|
|
553
637
|
if (!ifaceAddrs) {
|
|
554
638
|
throw new Error(`Unknown interface ${BIND_INTERFACE}`);
|
|
@@ -559,6 +643,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
|
559
643
|
}
|
|
560
644
|
wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
|
|
561
645
|
}
|
|
646
|
+
else {
|
|
647
|
+
wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
|
|
648
|
+
}
|
|
562
649
|
return `https://${wherebyDomain}${roomPath}`;
|
|
563
650
|
}
|
|
564
651
|
|
|
@@ -599,6 +686,7 @@ class Trigger extends EventEmitter.EventEmitter {
|
|
|
599
686
|
const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
|
|
600
687
|
app.use(router);
|
|
601
688
|
const server = app.listen(this.port, () => {
|
|
689
|
+
// console.log(`Bot trigger server now running on port[${this.port}]`);
|
|
602
690
|
});
|
|
603
691
|
process.on("SIGTERM", () => {
|
|
604
692
|
server.close();
|
package/dist/index.mjs
CHANGED
|
@@ -67,9 +67,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
|
|
|
67
67
|
}
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
+
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
71
|
+
// participants to these slots based on mute/unmute state.
|
|
70
72
|
const PARTICIPANT_SLOTS = 20;
|
|
73
|
+
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
74
|
+
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
71
75
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
72
76
|
const BYTES_PER_SAMPLE = 2;
|
|
77
|
+
// 480 samples per 10ms frame at 48kHz
|
|
73
78
|
const FRAME_10MS_SAMPLES = 480;
|
|
74
79
|
const slotBuffers = new Map();
|
|
75
80
|
function appendAndDrainTo480(slot, newSamples) {
|
|
@@ -81,10 +86,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
81
86
|
let offset = 0;
|
|
82
87
|
while (merged.length - offset >= FRAME_10MS_SAMPLES) {
|
|
83
88
|
const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
|
|
84
|
-
enqueueFrame(slot, chunk);
|
|
89
|
+
enqueueFrame(slot, chunk); // always 480
|
|
85
90
|
offset += FRAME_10MS_SAMPLES;
|
|
86
91
|
}
|
|
87
|
-
slotBuffers.set(slot, merged.subarray(offset));
|
|
92
|
+
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
88
93
|
}
|
|
89
94
|
({
|
|
90
95
|
enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
@@ -96,6 +101,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
96
101
|
let slots = [];
|
|
97
102
|
let stopPacerFn = null;
|
|
98
103
|
let outputPacerState = null;
|
|
104
|
+
/**
|
|
105
|
+
* Simple linear interpolation resampler to convert audio to 48kHz.
|
|
106
|
+
* This handles the common case of 16kHz -> 48kHz (3x upsampling).
|
|
107
|
+
*/
|
|
99
108
|
function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
100
109
|
const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
|
|
101
110
|
const outputLength = Math.floor(inputFrames * ratio);
|
|
@@ -115,11 +124,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
|
115
124
|
}
|
|
116
125
|
return output;
|
|
117
126
|
}
|
|
127
|
+
/**
|
|
128
|
+
* Enqueue an audio frame for paced delivery to the RTCAudioSource.
|
|
129
|
+
*/
|
|
118
130
|
function enqueueOutputFrame(samples) {
|
|
119
131
|
if (outputPacerState) {
|
|
120
132
|
outputPacerState.frameQueue.push(samples);
|
|
121
133
|
}
|
|
122
134
|
}
|
|
135
|
+
/**
|
|
136
|
+
* Start the audio pacer loop for all input slots in an FFmpeg process.
|
|
137
|
+
*
|
|
138
|
+
* The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
|
|
139
|
+
* real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
|
|
140
|
+
* arrive jittery, bursty, or with slightly different clocks.
|
|
141
|
+
*
|
|
142
|
+
* Key behavior:
|
|
143
|
+
* - Writes exactly one frame per period, on a shared wall-clock grid.
|
|
144
|
+
* - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
|
|
145
|
+
* never stalls.
|
|
146
|
+
* - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
|
|
147
|
+
* - Honors Node stream backpressure (`write()` return false) without breaking
|
|
148
|
+
* the timing grid.
|
|
149
|
+
*
|
|
150
|
+
* This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
|
|
151
|
+
* can mix them without slow-downs or drift.
|
|
152
|
+
*
|
|
153
|
+
* Call this once right after spawning FFmpeg:
|
|
154
|
+
* ```ts
|
|
155
|
+
* const ff = spawnFFmpegProcess();
|
|
156
|
+
* startPacer(ff, PARTICIPANT_SLOTS);
|
|
157
|
+
* ```
|
|
158
|
+
*
|
|
159
|
+
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
160
|
+
*
|
|
161
|
+
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
162
|
+
* @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
|
|
163
|
+
*/
|
|
123
164
|
function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
124
165
|
if (stopPacerFn) {
|
|
125
166
|
stopPacerFn();
|
|
@@ -127,11 +168,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
127
168
|
}
|
|
128
169
|
const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
|
|
129
170
|
const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
|
|
130
|
-
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
171
|
+
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
|
|
131
172
|
const t0 = nowMs();
|
|
132
173
|
slots = Array.from({ length: slotCount }, () => ({
|
|
133
174
|
q: [],
|
|
134
|
-
lastFrames: FRAME_10MS_SAMPLES,
|
|
175
|
+
lastFrames: FRAME_10MS_SAMPLES, // keep constant
|
|
135
176
|
nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
|
|
136
177
|
}));
|
|
137
178
|
outputPacerState = {
|
|
@@ -146,10 +187,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
146
187
|
for (let s = 0; s < slotCount; s++) {
|
|
147
188
|
const st = slots[s];
|
|
148
189
|
const w = writers[s];
|
|
149
|
-
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
190
|
+
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
|
|
150
191
|
if (t >= st.nextDueMs) {
|
|
151
192
|
const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
|
|
152
193
|
if (!w.write(buf)) {
|
|
194
|
+
// Just continue without adding drain listener - backpressure will naturally resolve
|
|
153
195
|
const late = t - st.nextDueMs;
|
|
154
196
|
const steps = Math.max(1, Math.ceil(late / frameMs));
|
|
155
197
|
st.nextDueMs += steps * frameMs;
|
|
@@ -162,9 +204,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
162
204
|
}
|
|
163
205
|
if (!outputPacerState)
|
|
164
206
|
return;
|
|
207
|
+
// Handle output pacer for RTCAudioSource
|
|
165
208
|
const state = outputPacerState;
|
|
166
209
|
if (t >= state.nextDueMs) {
|
|
167
|
-
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
|
|
210
|
+
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
|
|
168
211
|
if (!state.didEmitReadyEvent) {
|
|
169
212
|
state.onAudioStreamReady();
|
|
170
213
|
state.didEmitReadyEvent = true;
|
|
@@ -180,12 +223,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
180
223
|
}, 5);
|
|
181
224
|
stopPacerFn = () => clearInterval(iv);
|
|
182
225
|
}
|
|
226
|
+
/**
|
|
227
|
+
* Stop the audio pacer loop and clear all input slots.
|
|
228
|
+
* Call this before killing the FFmpeg process to ensure clean shutdown.
|
|
229
|
+
*/
|
|
183
230
|
function stopPacer() {
|
|
184
231
|
if (stopPacerFn)
|
|
185
232
|
stopPacerFn();
|
|
186
233
|
stopPacerFn = null;
|
|
187
234
|
slots = [];
|
|
188
235
|
}
|
|
236
|
+
/**
|
|
237
|
+
* Queue a live frame for a given slot (0..N-1).
|
|
238
|
+
* Auto-resnaps the slot's schedule if the frame size (480/960) changes.
|
|
239
|
+
*/
|
|
189
240
|
function enqueueFrame(slot, samples, numberOfFrames) {
|
|
190
241
|
const st = slots[slot];
|
|
191
242
|
if (!st)
|
|
@@ -193,6 +244,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
|
|
|
193
244
|
const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
|
|
194
245
|
st.q.push(buf);
|
|
195
246
|
}
|
|
247
|
+
/**
|
|
248
|
+
* Clear the audio queue for a specific slot when a participant leaves.
|
|
249
|
+
* This prevents stale audio data from continuing to play after disconnect.
|
|
250
|
+
*/
|
|
196
251
|
function clearSlotQueue(slot) {
|
|
197
252
|
const st = slots[slot];
|
|
198
253
|
if (st) {
|
|
@@ -202,6 +257,11 @@ function clearSlotQueue(slot) {
|
|
|
202
257
|
st.nextDueMs = now + frameMs;
|
|
203
258
|
}
|
|
204
259
|
}
|
|
260
|
+
/**
|
|
261
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
262
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
263
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
264
|
+
*/
|
|
205
265
|
function getFFmpegArguments() {
|
|
206
266
|
const N = PARTICIPANT_SLOTS;
|
|
207
267
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
@@ -219,6 +279,14 @@ function getFFmpegArguments() {
|
|
|
219
279
|
ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
|
|
220
280
|
return ffArgs;
|
|
221
281
|
}
|
|
282
|
+
/**
|
|
283
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
284
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
285
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
286
|
+
* The process will log its output to stderr.
|
|
287
|
+
* @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
|
|
288
|
+
* @return The spawned FFmpeg process.
|
|
289
|
+
*/
|
|
222
290
|
function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
223
291
|
const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
|
|
224
292
|
const args = getFFmpegArguments();
|
|
@@ -228,7 +296,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
228
296
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
229
297
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
230
298
|
let audioBuffer = Buffer.alloc(0);
|
|
231
|
-
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
|
|
299
|
+
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
|
|
232
300
|
ffmpegProcess.stdout.on("data", (chunk) => {
|
|
233
301
|
audioBuffer = Buffer.concat([audioBuffer, chunk]);
|
|
234
302
|
while (audioBuffer.length >= FRAME_SIZE_BYTES) {
|
|
@@ -243,6 +311,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
243
311
|
});
|
|
244
312
|
return ffmpegProcess;
|
|
245
313
|
}
|
|
314
|
+
/**
|
|
315
|
+
* Write audio data from a MediaStreamTrack to the FFmpeg process.
|
|
316
|
+
* This function creates an AudioSink for the track and sets up a data handler
|
|
317
|
+
* that enqueues audio frames into the pacer.
|
|
318
|
+
*
|
|
319
|
+
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
320
|
+
* @param slot The participant slot number (0..N-1) to which this track belongs.
|
|
321
|
+
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
322
|
+
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
323
|
+
*/
|
|
246
324
|
function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
247
325
|
const writer = ffmpegProcess.stdio[3 + slot];
|
|
248
326
|
const sink = new AudioSink(audioTrack);
|
|
@@ -267,6 +345,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
|
267
345
|
};
|
|
268
346
|
return { sink, writer, stop };
|
|
269
347
|
}
|
|
348
|
+
/**
|
|
349
|
+
* Stop the FFmpeg process and clean up all resources.
|
|
350
|
+
* This function will unpipe the stdout, end all writable streams for each participant slot,
|
|
351
|
+
* and kill the FFmpeg process.
|
|
352
|
+
* @param ffmpegProcess The FFmpeg process to stop.
|
|
353
|
+
*/
|
|
270
354
|
function stopFFmpegProcess(ffmpegProcess) {
|
|
271
355
|
stopPacer();
|
|
272
356
|
if (ffmpegProcess && !ffmpegProcess.killed) {
|
|
@@ -320,6 +404,7 @@ class AudioMixer extends EventEmitter {
|
|
|
320
404
|
for (const p of participants)
|
|
321
405
|
this.attachParticipantIfNeeded(p);
|
|
322
406
|
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
407
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
323
408
|
for (const [slot, pid] of this.participantSlots) {
|
|
324
409
|
if (pid && !liveIds.has(pid))
|
|
325
410
|
this.detachParticipant(pid);
|
|
@@ -332,6 +417,7 @@ class AudioMixer extends EventEmitter {
|
|
|
332
417
|
}
|
|
333
418
|
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
334
419
|
this.activeSlots = {};
|
|
420
|
+
// Recreate the media stream to avoid stale references
|
|
335
421
|
this.setupMediaStream();
|
|
336
422
|
}
|
|
337
423
|
slotForParticipant(participantId) {
|
|
@@ -397,6 +483,7 @@ class AudioMixer extends EventEmitter {
|
|
|
397
483
|
}
|
|
398
484
|
this.activeSlots[slot] = undefined;
|
|
399
485
|
}
|
|
486
|
+
// Clear any queued audio data for this slot to prevent stale audio
|
|
400
487
|
clearSlotQueue(slot);
|
|
401
488
|
this.participantSlots.set(slot, "");
|
|
402
489
|
}
|
|
@@ -524,10 +611,7 @@ dotenv.config();
|
|
|
524
611
|
const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
|
|
525
612
|
function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
526
613
|
let wherebyDomain;
|
|
527
|
-
if (
|
|
528
|
-
wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
|
|
529
|
-
}
|
|
530
|
-
else {
|
|
614
|
+
if (IS_LOCAL === "true") {
|
|
531
615
|
const ifaceAddrs = networkInterfaces()[BIND_INTERFACE];
|
|
532
616
|
if (!ifaceAddrs) {
|
|
533
617
|
throw new Error(`Unknown interface ${BIND_INTERFACE}`);
|
|
@@ -538,6 +622,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
|
538
622
|
}
|
|
539
623
|
wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
|
|
540
624
|
}
|
|
625
|
+
else {
|
|
626
|
+
wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
|
|
627
|
+
}
|
|
541
628
|
return `https://${wherebyDomain}${roomPath}`;
|
|
542
629
|
}
|
|
543
630
|
|
|
@@ -578,6 +665,7 @@ class Trigger extends EventEmitter {
|
|
|
578
665
|
const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
|
|
579
666
|
app.use(router);
|
|
580
667
|
const server = app.listen(this.port, () => {
|
|
668
|
+
// console.log(`Bot trigger server now running on port[${this.port}]`);
|
|
581
669
|
});
|
|
582
670
|
process.on("SIGTERM", () => {
|
|
583
671
|
server.close();
|
package/dist/legacy-esm.js
CHANGED
|
@@ -67,9 +67,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
|
|
|
67
67
|
}
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
+
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
71
|
+
// participants to these slots based on mute/unmute state.
|
|
70
72
|
const PARTICIPANT_SLOTS = 20;
|
|
73
|
+
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
74
|
+
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
71
75
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
72
76
|
const BYTES_PER_SAMPLE = 2;
|
|
77
|
+
// 480 samples per 10ms frame at 48kHz
|
|
73
78
|
const FRAME_10MS_SAMPLES = 480;
|
|
74
79
|
const slotBuffers = new Map();
|
|
75
80
|
function appendAndDrainTo480(slot, newSamples) {
|
|
@@ -81,10 +86,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
81
86
|
let offset = 0;
|
|
82
87
|
while (merged.length - offset >= FRAME_10MS_SAMPLES) {
|
|
83
88
|
const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
|
|
84
|
-
enqueueFrame(slot, chunk);
|
|
89
|
+
enqueueFrame(slot, chunk); // always 480
|
|
85
90
|
offset += FRAME_10MS_SAMPLES;
|
|
86
91
|
}
|
|
87
|
-
slotBuffers.set(slot, merged.subarray(offset));
|
|
92
|
+
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
88
93
|
}
|
|
89
94
|
({
|
|
90
95
|
enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
@@ -96,6 +101,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
96
101
|
let slots = [];
|
|
97
102
|
let stopPacerFn = null;
|
|
98
103
|
let outputPacerState = null;
|
|
104
|
+
/**
|
|
105
|
+
* Simple linear interpolation resampler to convert audio to 48kHz.
|
|
106
|
+
* This handles the common case of 16kHz -> 48kHz (3x upsampling).
|
|
107
|
+
*/
|
|
99
108
|
function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
100
109
|
const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
|
|
101
110
|
const outputLength = Math.floor(inputFrames * ratio);
|
|
@@ -115,11 +124,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
|
115
124
|
}
|
|
116
125
|
return output;
|
|
117
126
|
}
|
|
127
|
+
/**
|
|
128
|
+
* Enqueue an audio frame for paced delivery to the RTCAudioSource.
|
|
129
|
+
*/
|
|
118
130
|
function enqueueOutputFrame(samples) {
|
|
119
131
|
if (outputPacerState) {
|
|
120
132
|
outputPacerState.frameQueue.push(samples);
|
|
121
133
|
}
|
|
122
134
|
}
|
|
135
|
+
/**
|
|
136
|
+
* Start the audio pacer loop for all input slots in an FFmpeg process.
|
|
137
|
+
*
|
|
138
|
+
* The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
|
|
139
|
+
* real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
|
|
140
|
+
* arrive jittery, bursty, or with slightly different clocks.
|
|
141
|
+
*
|
|
142
|
+
* Key behavior:
|
|
143
|
+
* - Writes exactly one frame per period, on a shared wall-clock grid.
|
|
144
|
+
* - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
|
|
145
|
+
* never stalls.
|
|
146
|
+
* - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
|
|
147
|
+
* - Honors Node stream backpressure (`write()` return false) without breaking
|
|
148
|
+
* the timing grid.
|
|
149
|
+
*
|
|
150
|
+
* This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
|
|
151
|
+
* can mix them without slow-downs or drift.
|
|
152
|
+
*
|
|
153
|
+
* Call this once right after spawning FFmpeg:
|
|
154
|
+
* ```ts
|
|
155
|
+
* const ff = spawnFFmpegProcess();
|
|
156
|
+
* startPacer(ff, PARTICIPANT_SLOTS);
|
|
157
|
+
* ```
|
|
158
|
+
*
|
|
159
|
+
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
160
|
+
*
|
|
161
|
+
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
162
|
+
* @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
|
|
163
|
+
*/
|
|
123
164
|
function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
124
165
|
if (stopPacerFn) {
|
|
125
166
|
stopPacerFn();
|
|
@@ -127,11 +168,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
127
168
|
}
|
|
128
169
|
const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
|
|
129
170
|
const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
|
|
130
|
-
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
171
|
+
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
|
|
131
172
|
const t0 = nowMs();
|
|
132
173
|
slots = Array.from({ length: slotCount }, () => ({
|
|
133
174
|
q: [],
|
|
134
|
-
lastFrames: FRAME_10MS_SAMPLES,
|
|
175
|
+
lastFrames: FRAME_10MS_SAMPLES, // keep constant
|
|
135
176
|
nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
|
|
136
177
|
}));
|
|
137
178
|
outputPacerState = {
|
|
@@ -146,10 +187,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
146
187
|
for (let s = 0; s < slotCount; s++) {
|
|
147
188
|
const st = slots[s];
|
|
148
189
|
const w = writers[s];
|
|
149
|
-
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
190
|
+
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
|
|
150
191
|
if (t >= st.nextDueMs) {
|
|
151
192
|
const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
|
|
152
193
|
if (!w.write(buf)) {
|
|
194
|
+
// Just continue without adding drain listener - backpressure will naturally resolve
|
|
153
195
|
const late = t - st.nextDueMs;
|
|
154
196
|
const steps = Math.max(1, Math.ceil(late / frameMs));
|
|
155
197
|
st.nextDueMs += steps * frameMs;
|
|
@@ -162,9 +204,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
162
204
|
}
|
|
163
205
|
if (!outputPacerState)
|
|
164
206
|
return;
|
|
207
|
+
// Handle output pacer for RTCAudioSource
|
|
165
208
|
const state = outputPacerState;
|
|
166
209
|
if (t >= state.nextDueMs) {
|
|
167
|
-
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
|
|
210
|
+
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
|
|
168
211
|
if (!state.didEmitReadyEvent) {
|
|
169
212
|
state.onAudioStreamReady();
|
|
170
213
|
state.didEmitReadyEvent = true;
|
|
@@ -180,12 +223,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
180
223
|
}, 5);
|
|
181
224
|
stopPacerFn = () => clearInterval(iv);
|
|
182
225
|
}
|
|
226
|
+
/**
|
|
227
|
+
* Stop the audio pacer loop and clear all input slots.
|
|
228
|
+
* Call this before killing the FFmpeg process to ensure clean shutdown.
|
|
229
|
+
*/
|
|
183
230
|
function stopPacer() {
|
|
184
231
|
if (stopPacerFn)
|
|
185
232
|
stopPacerFn();
|
|
186
233
|
stopPacerFn = null;
|
|
187
234
|
slots = [];
|
|
188
235
|
}
|
|
236
|
+
/**
|
|
237
|
+
* Queue a live frame for a given slot (0..N-1).
|
|
238
|
+
* Auto-resnaps the slot's schedule if the frame size (480/960) changes.
|
|
239
|
+
*/
|
|
189
240
|
function enqueueFrame(slot, samples, numberOfFrames) {
|
|
190
241
|
const st = slots[slot];
|
|
191
242
|
if (!st)
|
|
@@ -193,6 +244,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
|
|
|
193
244
|
const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
|
|
194
245
|
st.q.push(buf);
|
|
195
246
|
}
|
|
247
|
+
/**
|
|
248
|
+
* Clear the audio queue for a specific slot when a participant leaves.
|
|
249
|
+
* This prevents stale audio data from continuing to play after disconnect.
|
|
250
|
+
*/
|
|
196
251
|
function clearSlotQueue(slot) {
|
|
197
252
|
const st = slots[slot];
|
|
198
253
|
if (st) {
|
|
@@ -202,6 +257,11 @@ function clearSlotQueue(slot) {
|
|
|
202
257
|
st.nextDueMs = now + frameMs;
|
|
203
258
|
}
|
|
204
259
|
}
|
|
260
|
+
/**
|
|
261
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
262
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
263
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
264
|
+
*/
|
|
205
265
|
function getFFmpegArguments() {
|
|
206
266
|
const N = PARTICIPANT_SLOTS;
|
|
207
267
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
@@ -219,6 +279,14 @@ function getFFmpegArguments() {
|
|
|
219
279
|
ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
|
|
220
280
|
return ffArgs;
|
|
221
281
|
}
|
|
282
|
+
/**
|
|
283
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
284
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
285
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
286
|
+
* The process will log its output to stderr.
|
|
287
|
+
* @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
|
|
288
|
+
* @return The spawned FFmpeg process.
|
|
289
|
+
*/
|
|
222
290
|
function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
223
291
|
const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
|
|
224
292
|
const args = getFFmpegArguments();
|
|
@@ -228,7 +296,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
228
296
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
229
297
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
230
298
|
let audioBuffer = Buffer.alloc(0);
|
|
231
|
-
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
|
|
299
|
+
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
|
|
232
300
|
ffmpegProcess.stdout.on("data", (chunk) => {
|
|
233
301
|
audioBuffer = Buffer.concat([audioBuffer, chunk]);
|
|
234
302
|
while (audioBuffer.length >= FRAME_SIZE_BYTES) {
|
|
@@ -243,6 +311,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
243
311
|
});
|
|
244
312
|
return ffmpegProcess;
|
|
245
313
|
}
|
|
314
|
+
/**
|
|
315
|
+
* Write audio data from a MediaStreamTrack to the FFmpeg process.
|
|
316
|
+
* This function creates an AudioSink for the track and sets up a data handler
|
|
317
|
+
* that enqueues audio frames into the pacer.
|
|
318
|
+
*
|
|
319
|
+
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
320
|
+
* @param slot The participant slot number (0..N-1) to which this track belongs.
|
|
321
|
+
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
322
|
+
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
323
|
+
*/
|
|
246
324
|
function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
247
325
|
const writer = ffmpegProcess.stdio[3 + slot];
|
|
248
326
|
const sink = new AudioSink(audioTrack);
|
|
@@ -267,6 +345,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
|
267
345
|
};
|
|
268
346
|
return { sink, writer, stop };
|
|
269
347
|
}
|
|
348
|
+
/**
|
|
349
|
+
* Stop the FFmpeg process and clean up all resources.
|
|
350
|
+
* This function will unpipe the stdout, end all writable streams for each participant slot,
|
|
351
|
+
* and kill the FFmpeg process.
|
|
352
|
+
* @param ffmpegProcess The FFmpeg process to stop.
|
|
353
|
+
*/
|
|
270
354
|
function stopFFmpegProcess(ffmpegProcess) {
|
|
271
355
|
stopPacer();
|
|
272
356
|
if (ffmpegProcess && !ffmpegProcess.killed) {
|
|
@@ -320,6 +404,7 @@ class AudioMixer extends EventEmitter {
|
|
|
320
404
|
for (const p of participants)
|
|
321
405
|
this.attachParticipantIfNeeded(p);
|
|
322
406
|
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
407
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
323
408
|
for (const [slot, pid] of this.participantSlots) {
|
|
324
409
|
if (pid && !liveIds.has(pid))
|
|
325
410
|
this.detachParticipant(pid);
|
|
@@ -332,6 +417,7 @@ class AudioMixer extends EventEmitter {
|
|
|
332
417
|
}
|
|
333
418
|
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
334
419
|
this.activeSlots = {};
|
|
420
|
+
// Recreate the media stream to avoid stale references
|
|
335
421
|
this.setupMediaStream();
|
|
336
422
|
}
|
|
337
423
|
slotForParticipant(participantId) {
|
|
@@ -397,6 +483,7 @@ class AudioMixer extends EventEmitter {
|
|
|
397
483
|
}
|
|
398
484
|
this.activeSlots[slot] = undefined;
|
|
399
485
|
}
|
|
486
|
+
// Clear any queued audio data for this slot to prevent stale audio
|
|
400
487
|
clearSlotQueue(slot);
|
|
401
488
|
this.participantSlots.set(slot, "");
|
|
402
489
|
}
|
|
@@ -524,10 +611,7 @@ dotenv.config();
|
|
|
524
611
|
const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
|
|
525
612
|
function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
526
613
|
let wherebyDomain;
|
|
527
|
-
if (
|
|
528
|
-
wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
|
|
529
|
-
}
|
|
530
|
-
else {
|
|
614
|
+
if (IS_LOCAL === "true") {
|
|
531
615
|
const ifaceAddrs = networkInterfaces()[BIND_INTERFACE];
|
|
532
616
|
if (!ifaceAddrs) {
|
|
533
617
|
throw new Error(`Unknown interface ${BIND_INTERFACE}`);
|
|
@@ -538,6 +622,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
|
538
622
|
}
|
|
539
623
|
wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
|
|
540
624
|
}
|
|
625
|
+
else {
|
|
626
|
+
wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
|
|
627
|
+
}
|
|
541
628
|
return `https://${wherebyDomain}${roomPath}`;
|
|
542
629
|
}
|
|
543
630
|
|
|
@@ -578,6 +665,7 @@ class Trigger extends EventEmitter {
|
|
|
578
665
|
const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
|
|
579
666
|
app.use(router);
|
|
580
667
|
const server = app.listen(this.port, () => {
|
|
668
|
+
// console.log(`Bot trigger server now running on port[${this.port}]`);
|
|
581
669
|
});
|
|
582
670
|
process.on("SIGTERM", () => {
|
|
583
671
|
server.close();
|
package/dist/polyfills.cjs
CHANGED
|
@@ -38,8 +38,10 @@ typeof SuppressedError === "function" ? SuppressedError : function (error, suppr
|
|
|
38
38
|
|
|
39
39
|
function setWebsocketOrigin(roomUrl) {
|
|
40
40
|
try {
|
|
41
|
+
// add pathname needed for parsing in rtcstats-server.
|
|
41
42
|
const url = new URL(roomUrl);
|
|
42
43
|
global.window.location.pathname = url.pathname;
|
|
44
|
+
// fix origin header needed for parsing in rtcstats-server.
|
|
43
45
|
const defaultClientOptions = {
|
|
44
46
|
origin: url.origin,
|
|
45
47
|
};
|
|
@@ -90,6 +92,10 @@ class RTCPeerConnection extends wrtc.RTCPeerConnection {
|
|
|
90
92
|
}
|
|
91
93
|
getStats(arg) {
|
|
92
94
|
return __awaiter(this, void 0, void 0, function* () {
|
|
95
|
+
/**
|
|
96
|
+
* node-wrtc seems to expect an Object argument, and doesn't handle the null arg we pass, so we
|
|
97
|
+
* wrap the call and filter the arg
|
|
98
|
+
**/
|
|
93
99
|
arg = arg instanceof Object ? arg : undefined;
|
|
94
100
|
const stats = yield this.wrappedGetStats(arg);
|
|
95
101
|
return stats;
|
|
@@ -112,6 +118,6 @@ global.RTCRtpSender = wrtc.RTCRtpSender;
|
|
|
112
118
|
global.RTCRtpTransceiver = wrtc.RTCRtpTransceiver;
|
|
113
119
|
global.RTCSctpTransport = wrtc.RTCSctpTransport;
|
|
114
120
|
global.RTCSessionDescription = wrtc.RTCSessionDescription;
|
|
115
|
-
global.window = Object.assign(Object.assign({}, global), { location: { pathname: "" }, screen: { width: 0 }, setInterval: global.setInterval });
|
|
121
|
+
global.window = Object.assign(Object.assign({}, global), { location: { pathname: "" }, screen: { width: 0 }, setInterval: global.setInterval }); // make sure all the classes / setInterval are available on window for rtcstats
|
|
116
122
|
|
|
117
123
|
exports.setWebsocketOrigin = setWebsocketOrigin;
|
package/dist/tools.cjs
CHANGED
|
@@ -19,9 +19,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
|
|
|
19
19
|
}
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
23
|
+
// participants to these slots based on mute/unmute state.
|
|
22
24
|
const PARTICIPANT_SLOTS = 20;
|
|
25
|
+
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
26
|
+
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
23
27
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
24
28
|
const BYTES_PER_SAMPLE = 2;
|
|
29
|
+
// 480 samples per 10ms frame at 48kHz
|
|
25
30
|
const FRAME_10MS_SAMPLES = 480;
|
|
26
31
|
const slotBuffers = new Map();
|
|
27
32
|
function appendAndDrainTo480(slot, newSamples) {
|
|
@@ -33,10 +38,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
33
38
|
let offset = 0;
|
|
34
39
|
while (merged.length - offset >= FRAME_10MS_SAMPLES) {
|
|
35
40
|
const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
|
|
36
|
-
enqueueFrame(slot, chunk);
|
|
41
|
+
enqueueFrame(slot, chunk); // always 480
|
|
37
42
|
offset += FRAME_10MS_SAMPLES;
|
|
38
43
|
}
|
|
39
|
-
slotBuffers.set(slot, merged.subarray(offset));
|
|
44
|
+
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
40
45
|
}
|
|
41
46
|
({
|
|
42
47
|
enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
@@ -48,6 +53,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
48
53
|
let slots = [];
|
|
49
54
|
let stopPacerFn = null;
|
|
50
55
|
let outputPacerState = null;
|
|
56
|
+
/**
|
|
57
|
+
* Simple linear interpolation resampler to convert audio to 48kHz.
|
|
58
|
+
* This handles the common case of 16kHz -> 48kHz (3x upsampling).
|
|
59
|
+
*/
|
|
51
60
|
function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
52
61
|
const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
|
|
53
62
|
const outputLength = Math.floor(inputFrames * ratio);
|
|
@@ -67,11 +76,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
|
67
76
|
}
|
|
68
77
|
return output;
|
|
69
78
|
}
|
|
79
|
+
/**
|
|
80
|
+
* Enqueue an audio frame for paced delivery to the RTCAudioSource.
|
|
81
|
+
*/
|
|
70
82
|
function enqueueOutputFrame(samples) {
|
|
71
83
|
if (outputPacerState) {
|
|
72
84
|
outputPacerState.frameQueue.push(samples);
|
|
73
85
|
}
|
|
74
86
|
}
|
|
87
|
+
/**
|
|
88
|
+
* Start the audio pacer loop for all input slots in an FFmpeg process.
|
|
89
|
+
*
|
|
90
|
+
* The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
|
|
91
|
+
* real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
|
|
92
|
+
* arrive jittery, bursty, or with slightly different clocks.
|
|
93
|
+
*
|
|
94
|
+
* Key behavior:
|
|
95
|
+
* - Writes exactly one frame per period, on a shared wall-clock grid.
|
|
96
|
+
* - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
|
|
97
|
+
* never stalls.
|
|
98
|
+
* - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
|
|
99
|
+
* - Honors Node stream backpressure (`write()` return false) without breaking
|
|
100
|
+
* the timing grid.
|
|
101
|
+
*
|
|
102
|
+
* This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
|
|
103
|
+
* can mix them without slow-downs or drift.
|
|
104
|
+
*
|
|
105
|
+
* Call this once right after spawning FFmpeg:
|
|
106
|
+
* ```ts
|
|
107
|
+
* const ff = spawnFFmpegProcess();
|
|
108
|
+
* startPacer(ff, PARTICIPANT_SLOTS);
|
|
109
|
+
* ```
|
|
110
|
+
*
|
|
111
|
+
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
112
|
+
*
|
|
113
|
+
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
114
|
+
* @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
|
|
115
|
+
*/
|
|
75
116
|
function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
76
117
|
if (stopPacerFn) {
|
|
77
118
|
stopPacerFn();
|
|
@@ -79,11 +120,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
79
120
|
}
|
|
80
121
|
const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
|
|
81
122
|
const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
|
|
82
|
-
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
123
|
+
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
|
|
83
124
|
const t0 = nowMs();
|
|
84
125
|
slots = Array.from({ length: slotCount }, () => ({
|
|
85
126
|
q: [],
|
|
86
|
-
lastFrames: FRAME_10MS_SAMPLES,
|
|
127
|
+
lastFrames: FRAME_10MS_SAMPLES, // keep constant
|
|
87
128
|
nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
|
|
88
129
|
}));
|
|
89
130
|
outputPacerState = {
|
|
@@ -98,10 +139,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
98
139
|
for (let s = 0; s < slotCount; s++) {
|
|
99
140
|
const st = slots[s];
|
|
100
141
|
const w = writers[s];
|
|
101
|
-
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
142
|
+
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
|
|
102
143
|
if (t >= st.nextDueMs) {
|
|
103
144
|
const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
|
|
104
145
|
if (!w.write(buf)) {
|
|
146
|
+
// Just continue without adding drain listener - backpressure will naturally resolve
|
|
105
147
|
const late = t - st.nextDueMs;
|
|
106
148
|
const steps = Math.max(1, Math.ceil(late / frameMs));
|
|
107
149
|
st.nextDueMs += steps * frameMs;
|
|
@@ -114,9 +156,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
114
156
|
}
|
|
115
157
|
if (!outputPacerState)
|
|
116
158
|
return;
|
|
159
|
+
// Handle output pacer for RTCAudioSource
|
|
117
160
|
const state = outputPacerState;
|
|
118
161
|
if (t >= state.nextDueMs) {
|
|
119
|
-
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
|
|
162
|
+
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
|
|
120
163
|
if (!state.didEmitReadyEvent) {
|
|
121
164
|
state.onAudioStreamReady();
|
|
122
165
|
state.didEmitReadyEvent = true;
|
|
@@ -132,12 +175,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
132
175
|
}, 5);
|
|
133
176
|
stopPacerFn = () => clearInterval(iv);
|
|
134
177
|
}
|
|
178
|
+
/**
|
|
179
|
+
* Stop the audio pacer loop and clear all input slots.
|
|
180
|
+
* Call this before killing the FFmpeg process to ensure clean shutdown.
|
|
181
|
+
*/
|
|
135
182
|
function stopPacer() {
|
|
136
183
|
if (stopPacerFn)
|
|
137
184
|
stopPacerFn();
|
|
138
185
|
stopPacerFn = null;
|
|
139
186
|
slots = [];
|
|
140
187
|
}
|
|
188
|
+
/**
|
|
189
|
+
* Queue a live frame for a given slot (0..N-1).
|
|
190
|
+
* Auto-resnaps the slot's schedule if the frame size (480/960) changes.
|
|
191
|
+
*/
|
|
141
192
|
function enqueueFrame(slot, samples, numberOfFrames) {
|
|
142
193
|
const st = slots[slot];
|
|
143
194
|
if (!st)
|
|
@@ -145,6 +196,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
|
|
|
145
196
|
const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
|
|
146
197
|
st.q.push(buf);
|
|
147
198
|
}
|
|
199
|
+
/**
|
|
200
|
+
* Clear the audio queue for a specific slot when a participant leaves.
|
|
201
|
+
* This prevents stale audio data from continuing to play after disconnect.
|
|
202
|
+
*/
|
|
148
203
|
function clearSlotQueue(slot) {
|
|
149
204
|
const st = slots[slot];
|
|
150
205
|
if (st) {
|
|
@@ -154,6 +209,11 @@ function clearSlotQueue(slot) {
|
|
|
154
209
|
st.nextDueMs = now + frameMs;
|
|
155
210
|
}
|
|
156
211
|
}
|
|
212
|
+
/**
|
|
213
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
214
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
215
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
216
|
+
*/
|
|
157
217
|
function getFFmpegArguments() {
|
|
158
218
|
const N = PARTICIPANT_SLOTS;
|
|
159
219
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
@@ -171,6 +231,14 @@ function getFFmpegArguments() {
|
|
|
171
231
|
ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
|
|
172
232
|
return ffArgs;
|
|
173
233
|
}
|
|
234
|
+
/**
|
|
235
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
236
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
237
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
238
|
+
* The process will log its output to stderr.
|
|
239
|
+
* @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
|
|
240
|
+
* @return The spawned FFmpeg process.
|
|
241
|
+
*/
|
|
174
242
|
function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
175
243
|
const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
|
|
176
244
|
const args = getFFmpegArguments();
|
|
@@ -180,7 +248,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
180
248
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
181
249
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
182
250
|
let audioBuffer = Buffer.alloc(0);
|
|
183
|
-
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
|
|
251
|
+
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
|
|
184
252
|
ffmpegProcess.stdout.on("data", (chunk) => {
|
|
185
253
|
audioBuffer = Buffer.concat([audioBuffer, chunk]);
|
|
186
254
|
while (audioBuffer.length >= FRAME_SIZE_BYTES) {
|
|
@@ -195,6 +263,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
195
263
|
});
|
|
196
264
|
return ffmpegProcess;
|
|
197
265
|
}
|
|
266
|
+
/**
|
|
267
|
+
* Write audio data from a MediaStreamTrack to the FFmpeg process.
|
|
268
|
+
* This function creates an AudioSink for the track and sets up a data handler
|
|
269
|
+
* that enqueues audio frames into the pacer.
|
|
270
|
+
*
|
|
271
|
+
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
272
|
+
* @param slot The participant slot number (0..N-1) to which this track belongs.
|
|
273
|
+
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
274
|
+
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
275
|
+
*/
|
|
198
276
|
function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
199
277
|
const writer = ffmpegProcess.stdio[3 + slot];
|
|
200
278
|
const sink = new AudioSink(audioTrack);
|
|
@@ -219,6 +297,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
|
219
297
|
};
|
|
220
298
|
return { sink, writer, stop };
|
|
221
299
|
}
|
|
300
|
+
/**
|
|
301
|
+
* Stop the FFmpeg process and clean up all resources.
|
|
302
|
+
* This function will unpipe the stdout, end all writable streams for each participant slot,
|
|
303
|
+
* and kill the FFmpeg process.
|
|
304
|
+
* @param ffmpegProcess The FFmpeg process to stop.
|
|
305
|
+
*/
|
|
222
306
|
function stopFFmpegProcess(ffmpegProcess) {
|
|
223
307
|
stopPacer();
|
|
224
308
|
if (ffmpegProcess && !ffmpegProcess.killed) {
|
|
@@ -272,6 +356,7 @@ class AudioMixer extends events.EventEmitter {
|
|
|
272
356
|
for (const p of participants)
|
|
273
357
|
this.attachParticipantIfNeeded(p);
|
|
274
358
|
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
359
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
275
360
|
for (const [slot, pid] of this.participantSlots) {
|
|
276
361
|
if (pid && !liveIds.has(pid))
|
|
277
362
|
this.detachParticipant(pid);
|
|
@@ -284,6 +369,7 @@ class AudioMixer extends events.EventEmitter {
|
|
|
284
369
|
}
|
|
285
370
|
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
286
371
|
this.activeSlots = {};
|
|
372
|
+
// Recreate the media stream to avoid stale references
|
|
287
373
|
this.setupMediaStream();
|
|
288
374
|
}
|
|
289
375
|
slotForParticipant(participantId) {
|
|
@@ -349,6 +435,7 @@ class AudioMixer extends events.EventEmitter {
|
|
|
349
435
|
}
|
|
350
436
|
this.activeSlots[slot] = undefined;
|
|
351
437
|
}
|
|
438
|
+
// Clear any queued audio data for this slot to prevent stale audio
|
|
352
439
|
clearSlotQueue(slot);
|
|
353
440
|
this.participantSlots.set(slot, "");
|
|
354
441
|
}
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@whereby.com/assistant-sdk",
|
|
3
3
|
"description": "Assistant SDK for whereby.com",
|
|
4
4
|
"author": "Whereby AS",
|
|
5
|
-
"version": "0.0.0-canary-
|
|
5
|
+
"version": "0.0.0-canary-20250912142319",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"files": [
|
|
8
8
|
"dist",
|
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
"dotenv": "^16.4.5",
|
|
64
64
|
"uuid": "^11.0.3",
|
|
65
65
|
"ws": "^8.18.0",
|
|
66
|
-
"@whereby.com/core": "0.0.0-canary-
|
|
66
|
+
"@whereby.com/core": "0.0.0-canary-20250912142319"
|
|
67
67
|
},
|
|
68
68
|
"prettier": "@whereby.com/prettier-config",
|
|
69
69
|
"scripts": {
|