@whereby.com/assistant-sdk 1.2.15 → 1.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +106 -75
- package/dist/index.mjs +106 -75
- package/dist/legacy-esm.js +106 -75
- package/dist/tools.cjs +106 -75
- package/dist/tools.d.ts +8 -6
- package/package.json +2 -2
package/dist/legacy-esm.js
CHANGED
|
@@ -64,8 +64,8 @@ class AudioSink extends RTCAudioSink {
|
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
67
|
-
// participants to these slots based on mute/unmute state.
|
|
68
|
-
const
|
|
67
|
+
// participants/screenshares to these slots based on mute/unmute state.
|
|
68
|
+
const MIXER_SLOTS = 20;
|
|
69
69
|
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
70
70
|
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
71
71
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
@@ -89,11 +89,11 @@ function createFfmpegMixer() {
|
|
|
89
89
|
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
90
90
|
}
|
|
91
91
|
({
|
|
92
|
-
enqFrames: new Array(
|
|
93
|
-
enqSamples: new Array(
|
|
94
|
-
wroteFrames: new Array(
|
|
95
|
-
wroteSamples: new Array(
|
|
96
|
-
lastFramesSeen: new Array(
|
|
92
|
+
enqFrames: new Array(MIXER_SLOTS).fill(0),
|
|
93
|
+
enqSamples: new Array(MIXER_SLOTS).fill(0),
|
|
94
|
+
wroteFrames: new Array(MIXER_SLOTS).fill(0),
|
|
95
|
+
wroteSamples: new Array(MIXER_SLOTS).fill(0),
|
|
96
|
+
lastFramesSeen: new Array(MIXER_SLOTS).fill(0),
|
|
97
97
|
});
|
|
98
98
|
let slots = [];
|
|
99
99
|
let stopPacerFn = null;
|
|
@@ -150,13 +150,13 @@ function createFfmpegMixer() {
|
|
|
150
150
|
* Call this once right after spawning FFmpeg:
|
|
151
151
|
* ```ts
|
|
152
152
|
* const ff = spawnFFmpegProcess();
|
|
153
|
-
* startPacer(ff,
|
|
153
|
+
* startPacer(ff, MIXER_SLOTS);
|
|
154
154
|
* ```
|
|
155
155
|
*
|
|
156
156
|
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
157
157
|
*
|
|
158
158
|
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
159
|
-
* @param slotCount Number of
|
|
159
|
+
* @param slotCount Number of mixer input slots (0..N-1 → fd 3..3+N-1)
|
|
160
160
|
*/
|
|
161
161
|
function startPacer(ff, slotCount, rtcAudioSource) {
|
|
162
162
|
if (stopPacerFn) {
|
|
@@ -238,7 +238,7 @@ function createFfmpegMixer() {
|
|
|
238
238
|
st.q.push(buf);
|
|
239
239
|
}
|
|
240
240
|
/**
|
|
241
|
-
* Clear the audio queue for a specific slot when a participant leaves.
|
|
241
|
+
* Clear the audio queue for a specific slot when a participant leaves or screenshare stops.
|
|
242
242
|
* This prevents stale audio data from continuing to play after disconnect.
|
|
243
243
|
*/
|
|
244
244
|
function clearSlotQueue(slot) {
|
|
@@ -252,12 +252,12 @@ function createFfmpegMixer() {
|
|
|
252
252
|
}
|
|
253
253
|
}
|
|
254
254
|
/**
|
|
255
|
-
* Get the FFmpeg arguments for debugging, which writes each participant's audio to a separate WAV file
|
|
255
|
+
* Get the FFmpeg arguments for debugging, which writes each participant/screenshare's audio to a separate WAV file
|
|
256
256
|
* and also mixes them into a single WAV file.
|
|
257
|
-
* This is useful for inspecting the audio quality and timing of each participant.
|
|
257
|
+
* This is useful for inspecting the audio quality and timing of each participant/screenshare.
|
|
258
258
|
*/
|
|
259
259
|
function getFFmpegArgumentsDebug() {
|
|
260
|
-
const N =
|
|
260
|
+
const N = MIXER_SLOTS;
|
|
261
261
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
262
262
|
const ffArgs = [];
|
|
263
263
|
for (let i = 0; i < N; i++) {
|
|
@@ -277,12 +277,12 @@ function createFfmpegMixer() {
|
|
|
277
277
|
return ffArgs;
|
|
278
278
|
}
|
|
279
279
|
/**
|
|
280
|
-
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
280
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants/screenshares.
|
|
281
281
|
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
282
282
|
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
283
283
|
*/
|
|
284
284
|
function getFFmpegArguments() {
|
|
285
|
-
const N =
|
|
285
|
+
const N = MIXER_SLOTS;
|
|
286
286
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
287
287
|
const ffArgs = [];
|
|
288
288
|
for (let i = 0; i < N; i++) {
|
|
@@ -300,23 +300,23 @@ function createFfmpegMixer() {
|
|
|
300
300
|
}
|
|
301
301
|
/*
|
|
302
302
|
* Spawn a new FFmpeg process for debugging purposes.
|
|
303
|
-
* This will write each participant's audio to a separate WAV file and also mix them into a single WAV file.
|
|
303
|
+
* This will write each participant/screenshare's audio to a separate WAV file and also mix them into a single WAV file.
|
|
304
304
|
* The output files will be named pre0.wav, pre1.wav, ..., and mixed.wav.
|
|
305
305
|
* The process will log its output to stderr.
|
|
306
306
|
* @return The spawned FFmpeg process.
|
|
307
307
|
*/
|
|
308
308
|
function spawnFFmpegProcessDebug(rtcAudioSource) {
|
|
309
|
-
const stdio = ["ignore", "ignore", "pipe", ...Array(
|
|
309
|
+
const stdio = ["ignore", "ignore", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
|
|
310
310
|
const args = getFFmpegArgumentsDebug();
|
|
311
311
|
const ffmpegProcess = spawn("ffmpeg", args, { stdio });
|
|
312
|
-
startPacer(ffmpegProcess,
|
|
312
|
+
startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
|
|
313
313
|
ffmpegProcess.stderr.setEncoding("utf8");
|
|
314
314
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
315
315
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error (debug): is ffmpeg installed?"));
|
|
316
316
|
return ffmpegProcess;
|
|
317
317
|
}
|
|
318
318
|
/**
|
|
319
|
-
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
319
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants/screenshares.
|
|
320
320
|
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
321
321
|
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
322
322
|
* The process will log its output to stderr.
|
|
@@ -324,10 +324,10 @@ function createFfmpegMixer() {
|
|
|
324
324
|
* @return The spawned FFmpeg process.
|
|
325
325
|
*/
|
|
326
326
|
function spawnFFmpegProcess(rtcAudioSource) {
|
|
327
|
-
const stdio = ["pipe", "pipe", "pipe", ...Array(
|
|
327
|
+
const stdio = ["pipe", "pipe", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
|
|
328
328
|
const args = getFFmpegArguments();
|
|
329
329
|
const ffmpegProcess = spawn("ffmpeg", args, { stdio });
|
|
330
|
-
startPacer(ffmpegProcess,
|
|
330
|
+
startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
|
|
331
331
|
ffmpegProcess.stderr.setEncoding("utf8");
|
|
332
332
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
333
333
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
@@ -353,7 +353,7 @@ function createFfmpegMixer() {
|
|
|
353
353
|
* that enqueues audio frames into the pacer.
|
|
354
354
|
*
|
|
355
355
|
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
356
|
-
* @param slot The
|
|
356
|
+
* @param slot The mixer slot number (0..N-1) to which this track belongs.
|
|
357
357
|
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
358
358
|
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
359
359
|
*/
|
|
@@ -383,7 +383,7 @@ function createFfmpegMixer() {
|
|
|
383
383
|
}
|
|
384
384
|
/**
|
|
385
385
|
* Stop the FFmpeg process and clean up all resources.
|
|
386
|
-
* This function will unpipe the stdout, end all writable streams for each
|
|
386
|
+
* This function will unpipe the stdout, end all writable streams for each mixer slot,
|
|
387
387
|
* and kill the FFmpeg process.
|
|
388
388
|
* @param ffmpegProcess The FFmpeg process to stop.
|
|
389
389
|
*/
|
|
@@ -397,7 +397,7 @@ function createFfmpegMixer() {
|
|
|
397
397
|
catch (_c) {
|
|
398
398
|
console.error("Failed to unpipe ffmpeg stdout");
|
|
399
399
|
}
|
|
400
|
-
for (let i = 0; i <
|
|
400
|
+
for (let i = 0; i < MIXER_SLOTS; i++) {
|
|
401
401
|
const w = ffmpegProcess.stdio[3 + i];
|
|
402
402
|
try {
|
|
403
403
|
w.end();
|
|
@@ -424,84 +424,60 @@ function createFfmpegMixer() {
|
|
|
424
424
|
};
|
|
425
425
|
}
|
|
426
426
|
|
|
427
|
+
var _a;
|
|
428
|
+
// Debug: set to true to enable debug output (and write audio to .wav files)
|
|
429
|
+
const DEBUG_MIXER_OUTPUT = (_a = process.env.DEBUG_MIXER_OUTPUT) !== null && _a !== void 0 ? _a : false;
|
|
427
430
|
class AudioMixer extends EventEmitter {
|
|
428
431
|
constructor() {
|
|
429
432
|
super();
|
|
430
433
|
this.ffmpegProcess = null;
|
|
431
434
|
this.combinedAudioStream = null;
|
|
432
435
|
this.rtcAudioSource = null;
|
|
433
|
-
this.
|
|
436
|
+
this.mixableSlots = new Map();
|
|
434
437
|
this.activeSlots = {};
|
|
435
438
|
this.mixer = createFfmpegMixer();
|
|
436
439
|
this.setupMediaStream();
|
|
437
|
-
this.
|
|
440
|
+
this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
|
|
438
441
|
}
|
|
439
442
|
setupMediaStream() {
|
|
440
443
|
this.rtcAudioSource = new wrtc.nonstandard.RTCAudioSource();
|
|
441
444
|
const audioTrack = this.rtcAudioSource.createTrack();
|
|
442
445
|
this.combinedAudioStream = new wrtc.MediaStream([audioTrack]);
|
|
443
446
|
}
|
|
444
|
-
|
|
445
|
-
return this.combinedAudioStream;
|
|
446
|
-
}
|
|
447
|
-
handleRemoteParticipants(participants) {
|
|
448
|
-
if (participants.length === 0) {
|
|
449
|
-
this.stopAudioMixer();
|
|
450
|
-
return;
|
|
451
|
-
}
|
|
452
|
-
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
453
|
-
this.ffmpegProcess = this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
454
|
-
}
|
|
455
|
-
for (const p of participants)
|
|
456
|
-
this.attachParticipantIfNeeded(p);
|
|
457
|
-
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
458
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
459
|
-
for (const [slot, pid] of this.participantSlots) {
|
|
460
|
-
if (pid && !liveIds.has(pid))
|
|
461
|
-
this.detachParticipant(pid);
|
|
462
|
-
}
|
|
463
|
-
}
|
|
464
|
-
stopAudioMixer() {
|
|
465
|
-
if (this.ffmpegProcess) {
|
|
466
|
-
this.mixer.stopFFmpegProcess(this.ffmpegProcess);
|
|
467
|
-
this.ffmpegProcess = null;
|
|
468
|
-
}
|
|
469
|
-
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
470
|
-
this.activeSlots = {};
|
|
471
|
-
// Recreate the media stream to avoid stale references
|
|
472
|
-
this.setupMediaStream();
|
|
473
|
-
}
|
|
474
|
-
slotForParticipant(participantId) {
|
|
447
|
+
slotForMixable(mixableId) {
|
|
475
448
|
var _a;
|
|
476
|
-
const found = (_a = [...this.
|
|
449
|
+
const found = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === mixableId)) === null || _a === void 0 ? void 0 : _a[0];
|
|
477
450
|
return found === undefined ? null : found;
|
|
478
451
|
}
|
|
479
|
-
|
|
452
|
+
slotsByType(mixableType) {
|
|
453
|
+
return [...this.mixableSlots.entries()].filter(([slotId]) => { var _a; return ((_a = this.activeSlots[slotId]) === null || _a === void 0 ? void 0 : _a.type) === mixableType; });
|
|
454
|
+
}
|
|
455
|
+
acquireSlot(mixableId) {
|
|
480
456
|
var _a;
|
|
481
|
-
const existing = this.
|
|
457
|
+
const existing = this.slotForMixable(mixableId);
|
|
482
458
|
if (existing !== null)
|
|
483
459
|
return existing;
|
|
484
|
-
const empty = (_a = [...this.
|
|
460
|
+
const empty = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === "")) === null || _a === void 0 ? void 0 : _a[0];
|
|
485
461
|
if (empty === undefined)
|
|
486
462
|
return null;
|
|
487
|
-
this.
|
|
463
|
+
this.mixableSlots.set(empty, mixableId);
|
|
488
464
|
return empty;
|
|
489
465
|
}
|
|
490
|
-
|
|
466
|
+
attachMixableIfNeeded(mixable) {
|
|
491
467
|
var _a;
|
|
492
|
-
const { id:
|
|
493
|
-
if (!
|
|
468
|
+
const { id: mixableId, stream: mixableStream, isAudioEnabled, type } = mixable;
|
|
469
|
+
if (!mixableId)
|
|
494
470
|
return;
|
|
495
|
-
if (!
|
|
496
|
-
this.
|
|
471
|
+
if (!mixableStream || !isAudioEnabled) {
|
|
472
|
+
this.detachMixable(mixableId);
|
|
497
473
|
return;
|
|
498
474
|
}
|
|
499
|
-
const audioTrack =
|
|
475
|
+
const audioTrack = mixableStream.getTracks().find((t) => t.kind === "audio");
|
|
500
476
|
if (!audioTrack) {
|
|
501
|
-
this.
|
|
477
|
+
this.detachMixable(mixableId);
|
|
502
478
|
return;
|
|
503
479
|
}
|
|
504
|
-
const slot = this.acquireSlot(
|
|
480
|
+
const slot = this.acquireSlot(mixableId);
|
|
505
481
|
if (slot === null)
|
|
506
482
|
return;
|
|
507
483
|
const existing = this.activeSlots[slot];
|
|
@@ -517,11 +493,11 @@ class AudioMixer extends EventEmitter {
|
|
|
517
493
|
this.activeSlots[slot] = undefined;
|
|
518
494
|
}
|
|
519
495
|
const { sink, writer, stop } = this.mixer.writeAudioDataToFFmpeg(this.ffmpegProcess, slot, audioTrack);
|
|
520
|
-
this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id };
|
|
521
|
-
(_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.
|
|
496
|
+
this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id, type };
|
|
497
|
+
(_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.detachMixable(mixableId));
|
|
522
498
|
}
|
|
523
|
-
|
|
524
|
-
const slot = this.
|
|
499
|
+
detachMixable(mixableId) {
|
|
500
|
+
const slot = this.slotForMixable(mixableId);
|
|
525
501
|
if (slot === null)
|
|
526
502
|
return;
|
|
527
503
|
const binding = this.activeSlots[slot];
|
|
@@ -536,7 +512,62 @@ class AudioMixer extends EventEmitter {
|
|
|
536
512
|
}
|
|
537
513
|
// Clear any queued audio data for this slot to prevent stale audio
|
|
538
514
|
this.mixer.clearSlotQueue(slot);
|
|
539
|
-
this.
|
|
515
|
+
this.mixableSlots.set(slot, "");
|
|
516
|
+
}
|
|
517
|
+
getCombinedAudioStream() {
|
|
518
|
+
return this.combinedAudioStream;
|
|
519
|
+
}
|
|
520
|
+
handleRemoteParticipants(participants) {
|
|
521
|
+
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
522
|
+
const typedSlots = this.slotsByType("participant");
|
|
523
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
524
|
+
for (const [slot, pid] of typedSlots) {
|
|
525
|
+
if (pid && !liveIds.has(pid))
|
|
526
|
+
this.detachMixable(pid);
|
|
527
|
+
}
|
|
528
|
+
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
529
|
+
this.ffmpegProcess = DEBUG_MIXER_OUTPUT
|
|
530
|
+
? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
|
|
531
|
+
: this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
532
|
+
}
|
|
533
|
+
for (const p of participants)
|
|
534
|
+
this.attachMixableIfNeeded(Object.assign(Object.assign({}, p), { type: "participant" }));
|
|
535
|
+
}
|
|
536
|
+
handleScreenshares(screenshares) {
|
|
537
|
+
const screensharesWithAudio = screenshares.filter((screenshare) => screenshare.hasAudioTrack &&
|
|
538
|
+
screenshare.stream &&
|
|
539
|
+
screenshare.stream.getTracks().filter(({ kind }) => kind === "audio").length > 0);
|
|
540
|
+
const liveIds = new Set(screensharesWithAudio.map((p) => p.id).filter(Boolean));
|
|
541
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
542
|
+
for (const [slot, sid] of this.slotsByType("screenshare")) {
|
|
543
|
+
if (sid && !liveIds.has(sid))
|
|
544
|
+
this.detachMixable(sid);
|
|
545
|
+
}
|
|
546
|
+
if (screensharesWithAudio.length === 0) {
|
|
547
|
+
return;
|
|
548
|
+
}
|
|
549
|
+
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
550
|
+
this.ffmpegProcess = DEBUG_MIXER_OUTPUT
|
|
551
|
+
? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
|
|
552
|
+
: this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
553
|
+
}
|
|
554
|
+
const mixables = screensharesWithAudio.map(({ id, stream, hasAudioTrack }) => ({
|
|
555
|
+
id,
|
|
556
|
+
stream,
|
|
557
|
+
isAudioEnabled: hasAudioTrack,
|
|
558
|
+
}));
|
|
559
|
+
for (const s of mixables)
|
|
560
|
+
this.attachMixableIfNeeded(Object.assign(Object.assign({}, s), { type: "screenshare" }));
|
|
561
|
+
}
|
|
562
|
+
stopAudioMixer() {
|
|
563
|
+
if (this.ffmpegProcess) {
|
|
564
|
+
this.mixer.stopFFmpegProcess(this.ffmpegProcess);
|
|
565
|
+
this.ffmpegProcess = null;
|
|
566
|
+
}
|
|
567
|
+
this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
|
|
568
|
+
this.activeSlots = {};
|
|
569
|
+
// Recreate the media stream to avoid stale references
|
|
570
|
+
this.setupMediaStream();
|
|
540
571
|
}
|
|
541
572
|
}
|
|
542
573
|
|
package/dist/tools.cjs
CHANGED
|
@@ -19,8 +19,8 @@ class AudioSink extends RTCAudioSink {
|
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
22
|
-
// participants to these slots based on mute/unmute state.
|
|
23
|
-
const
|
|
22
|
+
// participants/screenshares to these slots based on mute/unmute state.
|
|
23
|
+
const MIXER_SLOTS = 20;
|
|
24
24
|
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
25
25
|
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
26
26
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
@@ -44,11 +44,11 @@ function createFfmpegMixer() {
|
|
|
44
44
|
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
45
45
|
}
|
|
46
46
|
({
|
|
47
|
-
enqFrames: new Array(
|
|
48
|
-
enqSamples: new Array(
|
|
49
|
-
wroteFrames: new Array(
|
|
50
|
-
wroteSamples: new Array(
|
|
51
|
-
lastFramesSeen: new Array(
|
|
47
|
+
enqFrames: new Array(MIXER_SLOTS).fill(0),
|
|
48
|
+
enqSamples: new Array(MIXER_SLOTS).fill(0),
|
|
49
|
+
wroteFrames: new Array(MIXER_SLOTS).fill(0),
|
|
50
|
+
wroteSamples: new Array(MIXER_SLOTS).fill(0),
|
|
51
|
+
lastFramesSeen: new Array(MIXER_SLOTS).fill(0),
|
|
52
52
|
});
|
|
53
53
|
let slots = [];
|
|
54
54
|
let stopPacerFn = null;
|
|
@@ -105,13 +105,13 @@ function createFfmpegMixer() {
|
|
|
105
105
|
* Call this once right after spawning FFmpeg:
|
|
106
106
|
* ```ts
|
|
107
107
|
* const ff = spawnFFmpegProcess();
|
|
108
|
-
* startPacer(ff,
|
|
108
|
+
* startPacer(ff, MIXER_SLOTS);
|
|
109
109
|
* ```
|
|
110
110
|
*
|
|
111
111
|
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
112
112
|
*
|
|
113
113
|
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
114
|
-
* @param slotCount Number of
|
|
114
|
+
* @param slotCount Number of mixer input slots (0..N-1 → fd 3..3+N-1)
|
|
115
115
|
*/
|
|
116
116
|
function startPacer(ff, slotCount, rtcAudioSource) {
|
|
117
117
|
if (stopPacerFn) {
|
|
@@ -193,7 +193,7 @@ function createFfmpegMixer() {
|
|
|
193
193
|
st.q.push(buf);
|
|
194
194
|
}
|
|
195
195
|
/**
|
|
196
|
-
* Clear the audio queue for a specific slot when a participant leaves.
|
|
196
|
+
* Clear the audio queue for a specific slot when a participant leaves or screenshare stops.
|
|
197
197
|
* This prevents stale audio data from continuing to play after disconnect.
|
|
198
198
|
*/
|
|
199
199
|
function clearSlotQueue(slot) {
|
|
@@ -207,12 +207,12 @@ function createFfmpegMixer() {
|
|
|
207
207
|
}
|
|
208
208
|
}
|
|
209
209
|
/**
|
|
210
|
-
* Get the FFmpeg arguments for debugging, which writes each participant's audio to a separate WAV file
|
|
210
|
+
* Get the FFmpeg arguments for debugging, which writes each participant/screenshare's audio to a separate WAV file
|
|
211
211
|
* and also mixes them into a single WAV file.
|
|
212
|
-
* This is useful for inspecting the audio quality and timing of each participant.
|
|
212
|
+
* This is useful for inspecting the audio quality and timing of each participant/screenshare.
|
|
213
213
|
*/
|
|
214
214
|
function getFFmpegArgumentsDebug() {
|
|
215
|
-
const N =
|
|
215
|
+
const N = MIXER_SLOTS;
|
|
216
216
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
217
217
|
const ffArgs = [];
|
|
218
218
|
for (let i = 0; i < N; i++) {
|
|
@@ -232,12 +232,12 @@ function createFfmpegMixer() {
|
|
|
232
232
|
return ffArgs;
|
|
233
233
|
}
|
|
234
234
|
/**
|
|
235
|
-
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
235
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants/screenshares.
|
|
236
236
|
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
237
237
|
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
238
238
|
*/
|
|
239
239
|
function getFFmpegArguments() {
|
|
240
|
-
const N =
|
|
240
|
+
const N = MIXER_SLOTS;
|
|
241
241
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
242
242
|
const ffArgs = [];
|
|
243
243
|
for (let i = 0; i < N; i++) {
|
|
@@ -255,23 +255,23 @@ function createFfmpegMixer() {
|
|
|
255
255
|
}
|
|
256
256
|
/*
|
|
257
257
|
* Spawn a new FFmpeg process for debugging purposes.
|
|
258
|
-
* This will write each participant's audio to a separate WAV file and also mix them into a single WAV file.
|
|
258
|
+
* This will write each participant/screenshare's audio to a separate WAV file and also mix them into a single WAV file.
|
|
259
259
|
* The output files will be named pre0.wav, pre1.wav, ..., and mixed.wav.
|
|
260
260
|
* The process will log its output to stderr.
|
|
261
261
|
* @return The spawned FFmpeg process.
|
|
262
262
|
*/
|
|
263
263
|
function spawnFFmpegProcessDebug(rtcAudioSource) {
|
|
264
|
-
const stdio = ["ignore", "ignore", "pipe", ...Array(
|
|
264
|
+
const stdio = ["ignore", "ignore", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
|
|
265
265
|
const args = getFFmpegArgumentsDebug();
|
|
266
266
|
const ffmpegProcess = child_process.spawn("ffmpeg", args, { stdio });
|
|
267
|
-
startPacer(ffmpegProcess,
|
|
267
|
+
startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
|
|
268
268
|
ffmpegProcess.stderr.setEncoding("utf8");
|
|
269
269
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
270
270
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error (debug): is ffmpeg installed?"));
|
|
271
271
|
return ffmpegProcess;
|
|
272
272
|
}
|
|
273
273
|
/**
|
|
274
|
-
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
274
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants/screenshares.
|
|
275
275
|
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
276
276
|
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
277
277
|
* The process will log its output to stderr.
|
|
@@ -279,10 +279,10 @@ function createFfmpegMixer() {
|
|
|
279
279
|
* @return The spawned FFmpeg process.
|
|
280
280
|
*/
|
|
281
281
|
function spawnFFmpegProcess(rtcAudioSource) {
|
|
282
|
-
const stdio = ["pipe", "pipe", "pipe", ...Array(
|
|
282
|
+
const stdio = ["pipe", "pipe", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
|
|
283
283
|
const args = getFFmpegArguments();
|
|
284
284
|
const ffmpegProcess = child_process.spawn("ffmpeg", args, { stdio });
|
|
285
|
-
startPacer(ffmpegProcess,
|
|
285
|
+
startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
|
|
286
286
|
ffmpegProcess.stderr.setEncoding("utf8");
|
|
287
287
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
288
288
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
@@ -308,7 +308,7 @@ function createFfmpegMixer() {
|
|
|
308
308
|
* that enqueues audio frames into the pacer.
|
|
309
309
|
*
|
|
310
310
|
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
311
|
-
* @param slot The
|
|
311
|
+
* @param slot The mixer slot number (0..N-1) to which this track belongs.
|
|
312
312
|
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
313
313
|
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
314
314
|
*/
|
|
@@ -338,7 +338,7 @@ function createFfmpegMixer() {
|
|
|
338
338
|
}
|
|
339
339
|
/**
|
|
340
340
|
* Stop the FFmpeg process and clean up all resources.
|
|
341
|
-
* This function will unpipe the stdout, end all writable streams for each
|
|
341
|
+
* This function will unpipe the stdout, end all writable streams for each mixer slot,
|
|
342
342
|
* and kill the FFmpeg process.
|
|
343
343
|
* @param ffmpegProcess The FFmpeg process to stop.
|
|
344
344
|
*/
|
|
@@ -352,7 +352,7 @@ function createFfmpegMixer() {
|
|
|
352
352
|
catch (_c) {
|
|
353
353
|
console.error("Failed to unpipe ffmpeg stdout");
|
|
354
354
|
}
|
|
355
|
-
for (let i = 0; i <
|
|
355
|
+
for (let i = 0; i < MIXER_SLOTS; i++) {
|
|
356
356
|
const w = ffmpegProcess.stdio[3 + i];
|
|
357
357
|
try {
|
|
358
358
|
w.end();
|
|
@@ -379,84 +379,60 @@ function createFfmpegMixer() {
|
|
|
379
379
|
};
|
|
380
380
|
}
|
|
381
381
|
|
|
382
|
+
var _a;
|
|
383
|
+
// Debug: set to true to enable debug output (and write audio to .wav files)
|
|
384
|
+
const DEBUG_MIXER_OUTPUT = (_a = process.env.DEBUG_MIXER_OUTPUT) !== null && _a !== void 0 ? _a : false;
|
|
382
385
|
class AudioMixer extends events.EventEmitter {
|
|
383
386
|
constructor() {
|
|
384
387
|
super();
|
|
385
388
|
this.ffmpegProcess = null;
|
|
386
389
|
this.combinedAudioStream = null;
|
|
387
390
|
this.rtcAudioSource = null;
|
|
388
|
-
this.
|
|
391
|
+
this.mixableSlots = new Map();
|
|
389
392
|
this.activeSlots = {};
|
|
390
393
|
this.mixer = createFfmpegMixer();
|
|
391
394
|
this.setupMediaStream();
|
|
392
|
-
this.
|
|
395
|
+
this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
|
|
393
396
|
}
|
|
394
397
|
setupMediaStream() {
|
|
395
398
|
this.rtcAudioSource = new wrtc.nonstandard.RTCAudioSource();
|
|
396
399
|
const audioTrack = this.rtcAudioSource.createTrack();
|
|
397
400
|
this.combinedAudioStream = new wrtc.MediaStream([audioTrack]);
|
|
398
401
|
}
|
|
399
|
-
|
|
400
|
-
return this.combinedAudioStream;
|
|
401
|
-
}
|
|
402
|
-
handleRemoteParticipants(participants) {
|
|
403
|
-
if (participants.length === 0) {
|
|
404
|
-
this.stopAudioMixer();
|
|
405
|
-
return;
|
|
406
|
-
}
|
|
407
|
-
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
408
|
-
this.ffmpegProcess = this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
409
|
-
}
|
|
410
|
-
for (const p of participants)
|
|
411
|
-
this.attachParticipantIfNeeded(p);
|
|
412
|
-
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
413
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
414
|
-
for (const [slot, pid] of this.participantSlots) {
|
|
415
|
-
if (pid && !liveIds.has(pid))
|
|
416
|
-
this.detachParticipant(pid);
|
|
417
|
-
}
|
|
418
|
-
}
|
|
419
|
-
stopAudioMixer() {
|
|
420
|
-
if (this.ffmpegProcess) {
|
|
421
|
-
this.mixer.stopFFmpegProcess(this.ffmpegProcess);
|
|
422
|
-
this.ffmpegProcess = null;
|
|
423
|
-
}
|
|
424
|
-
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
425
|
-
this.activeSlots = {};
|
|
426
|
-
// Recreate the media stream to avoid stale references
|
|
427
|
-
this.setupMediaStream();
|
|
428
|
-
}
|
|
429
|
-
slotForParticipant(participantId) {
|
|
402
|
+
slotForMixable(mixableId) {
|
|
430
403
|
var _a;
|
|
431
|
-
const found = (_a = [...this.
|
|
404
|
+
const found = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === mixableId)) === null || _a === void 0 ? void 0 : _a[0];
|
|
432
405
|
return found === undefined ? null : found;
|
|
433
406
|
}
|
|
434
|
-
|
|
407
|
+
slotsByType(mixableType) {
|
|
408
|
+
return [...this.mixableSlots.entries()].filter(([slotId]) => { var _a; return ((_a = this.activeSlots[slotId]) === null || _a === void 0 ? void 0 : _a.type) === mixableType; });
|
|
409
|
+
}
|
|
410
|
+
acquireSlot(mixableId) {
|
|
435
411
|
var _a;
|
|
436
|
-
const existing = this.
|
|
412
|
+
const existing = this.slotForMixable(mixableId);
|
|
437
413
|
if (existing !== null)
|
|
438
414
|
return existing;
|
|
439
|
-
const empty = (_a = [...this.
|
|
415
|
+
const empty = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === "")) === null || _a === void 0 ? void 0 : _a[0];
|
|
440
416
|
if (empty === undefined)
|
|
441
417
|
return null;
|
|
442
|
-
this.
|
|
418
|
+
this.mixableSlots.set(empty, mixableId);
|
|
443
419
|
return empty;
|
|
444
420
|
}
|
|
445
|
-
|
|
421
|
+
attachMixableIfNeeded(mixable) {
|
|
446
422
|
var _a;
|
|
447
|
-
const { id:
|
|
448
|
-
if (!
|
|
423
|
+
const { id: mixableId, stream: mixableStream, isAudioEnabled, type } = mixable;
|
|
424
|
+
if (!mixableId)
|
|
449
425
|
return;
|
|
450
|
-
if (!
|
|
451
|
-
this.
|
|
426
|
+
if (!mixableStream || !isAudioEnabled) {
|
|
427
|
+
this.detachMixable(mixableId);
|
|
452
428
|
return;
|
|
453
429
|
}
|
|
454
|
-
const audioTrack =
|
|
430
|
+
const audioTrack = mixableStream.getTracks().find((t) => t.kind === "audio");
|
|
455
431
|
if (!audioTrack) {
|
|
456
|
-
this.
|
|
432
|
+
this.detachMixable(mixableId);
|
|
457
433
|
return;
|
|
458
434
|
}
|
|
459
|
-
const slot = this.acquireSlot(
|
|
435
|
+
const slot = this.acquireSlot(mixableId);
|
|
460
436
|
if (slot === null)
|
|
461
437
|
return;
|
|
462
438
|
const existing = this.activeSlots[slot];
|
|
@@ -472,11 +448,11 @@ class AudioMixer extends events.EventEmitter {
|
|
|
472
448
|
this.activeSlots[slot] = undefined;
|
|
473
449
|
}
|
|
474
450
|
const { sink, writer, stop } = this.mixer.writeAudioDataToFFmpeg(this.ffmpegProcess, slot, audioTrack);
|
|
475
|
-
this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id };
|
|
476
|
-
(_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.
|
|
451
|
+
this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id, type };
|
|
452
|
+
(_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.detachMixable(mixableId));
|
|
477
453
|
}
|
|
478
|
-
|
|
479
|
-
const slot = this.
|
|
454
|
+
detachMixable(mixableId) {
|
|
455
|
+
const slot = this.slotForMixable(mixableId);
|
|
480
456
|
if (slot === null)
|
|
481
457
|
return;
|
|
482
458
|
const binding = this.activeSlots[slot];
|
|
@@ -491,7 +467,62 @@ class AudioMixer extends events.EventEmitter {
|
|
|
491
467
|
}
|
|
492
468
|
// Clear any queued audio data for this slot to prevent stale audio
|
|
493
469
|
this.mixer.clearSlotQueue(slot);
|
|
494
|
-
this.
|
|
470
|
+
this.mixableSlots.set(slot, "");
|
|
471
|
+
}
|
|
472
|
+
getCombinedAudioStream() {
|
|
473
|
+
return this.combinedAudioStream;
|
|
474
|
+
}
|
|
475
|
+
handleRemoteParticipants(participants) {
|
|
476
|
+
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
477
|
+
const typedSlots = this.slotsByType("participant");
|
|
478
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
479
|
+
for (const [slot, pid] of typedSlots) {
|
|
480
|
+
if (pid && !liveIds.has(pid))
|
|
481
|
+
this.detachMixable(pid);
|
|
482
|
+
}
|
|
483
|
+
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
484
|
+
this.ffmpegProcess = DEBUG_MIXER_OUTPUT
|
|
485
|
+
? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
|
|
486
|
+
: this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
487
|
+
}
|
|
488
|
+
for (const p of participants)
|
|
489
|
+
this.attachMixableIfNeeded(Object.assign(Object.assign({}, p), { type: "participant" }));
|
|
490
|
+
}
|
|
491
|
+
handleScreenshares(screenshares) {
|
|
492
|
+
const screensharesWithAudio = screenshares.filter((screenshare) => screenshare.hasAudioTrack &&
|
|
493
|
+
screenshare.stream &&
|
|
494
|
+
screenshare.stream.getTracks().filter(({ kind }) => kind === "audio").length > 0);
|
|
495
|
+
const liveIds = new Set(screensharesWithAudio.map((p) => p.id).filter(Boolean));
|
|
496
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
497
|
+
for (const [slot, sid] of this.slotsByType("screenshare")) {
|
|
498
|
+
if (sid && !liveIds.has(sid))
|
|
499
|
+
this.detachMixable(sid);
|
|
500
|
+
}
|
|
501
|
+
if (screensharesWithAudio.length === 0) {
|
|
502
|
+
return;
|
|
503
|
+
}
|
|
504
|
+
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
505
|
+
this.ffmpegProcess = DEBUG_MIXER_OUTPUT
|
|
506
|
+
? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
|
|
507
|
+
: this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
508
|
+
}
|
|
509
|
+
const mixables = screensharesWithAudio.map(({ id, stream, hasAudioTrack }) => ({
|
|
510
|
+
id,
|
|
511
|
+
stream,
|
|
512
|
+
isAudioEnabled: hasAudioTrack,
|
|
513
|
+
}));
|
|
514
|
+
for (const s of mixables)
|
|
515
|
+
this.attachMixableIfNeeded(Object.assign(Object.assign({}, s), { type: "screenshare" }));
|
|
516
|
+
}
|
|
517
|
+
stopAudioMixer() {
|
|
518
|
+
if (this.ffmpegProcess) {
|
|
519
|
+
this.mixer.stopFFmpegProcess(this.ffmpegProcess);
|
|
520
|
+
this.ffmpegProcess = null;
|
|
521
|
+
}
|
|
522
|
+
this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
|
|
523
|
+
this.activeSlots = {};
|
|
524
|
+
// Recreate the media stream to avoid stale references
|
|
525
|
+
this.setupMediaStream();
|
|
495
526
|
}
|
|
496
527
|
}
|
|
497
528
|
|