@whereby.com/assistant-sdk 1.2.16 → 1.2.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +106 -75
- package/dist/index.mjs +106 -75
- package/dist/legacy-esm.js +106 -75
- package/dist/tools.cjs +106 -75
- package/dist/tools.d.ts +8 -6
- package/package.json +2 -2
package/dist/index.cjs
CHANGED
|
@@ -66,8 +66,8 @@ class AudioSink extends RTCAudioSink {
|
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
69
|
-
// participants to these slots based on mute/unmute state.
|
|
70
|
-
const
|
|
69
|
+
// participants/screenshares to these slots based on mute/unmute state.
|
|
70
|
+
const MIXER_SLOTS = 20;
|
|
71
71
|
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
72
72
|
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
73
73
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
@@ -91,11 +91,11 @@ function createFfmpegMixer() {
|
|
|
91
91
|
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
92
92
|
}
|
|
93
93
|
({
|
|
94
|
-
enqFrames: new Array(
|
|
95
|
-
enqSamples: new Array(
|
|
96
|
-
wroteFrames: new Array(
|
|
97
|
-
wroteSamples: new Array(
|
|
98
|
-
lastFramesSeen: new Array(
|
|
94
|
+
enqFrames: new Array(MIXER_SLOTS).fill(0),
|
|
95
|
+
enqSamples: new Array(MIXER_SLOTS).fill(0),
|
|
96
|
+
wroteFrames: new Array(MIXER_SLOTS).fill(0),
|
|
97
|
+
wroteSamples: new Array(MIXER_SLOTS).fill(0),
|
|
98
|
+
lastFramesSeen: new Array(MIXER_SLOTS).fill(0),
|
|
99
99
|
});
|
|
100
100
|
let slots = [];
|
|
101
101
|
let stopPacerFn = null;
|
|
@@ -152,13 +152,13 @@ function createFfmpegMixer() {
|
|
|
152
152
|
* Call this once right after spawning FFmpeg:
|
|
153
153
|
* ```ts
|
|
154
154
|
* const ff = spawnFFmpegProcess();
|
|
155
|
-
* startPacer(ff,
|
|
155
|
+
* startPacer(ff, MIXER_SLOTS);
|
|
156
156
|
* ```
|
|
157
157
|
*
|
|
158
158
|
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
159
159
|
*
|
|
160
160
|
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
161
|
-
* @param slotCount Number of
|
|
161
|
+
* @param slotCount Number of mixer input slots (0..N-1 → fd 3..3+N-1)
|
|
162
162
|
*/
|
|
163
163
|
function startPacer(ff, slotCount, rtcAudioSource) {
|
|
164
164
|
if (stopPacerFn) {
|
|
@@ -240,7 +240,7 @@ function createFfmpegMixer() {
|
|
|
240
240
|
st.q.push(buf);
|
|
241
241
|
}
|
|
242
242
|
/**
|
|
243
|
-
* Clear the audio queue for a specific slot when a participant leaves.
|
|
243
|
+
* Clear the audio queue for a specific slot when a participant leaves or screenshare stops.
|
|
244
244
|
* This prevents stale audio data from continuing to play after disconnect.
|
|
245
245
|
*/
|
|
246
246
|
function clearSlotQueue(slot) {
|
|
@@ -254,12 +254,12 @@ function createFfmpegMixer() {
|
|
|
254
254
|
}
|
|
255
255
|
}
|
|
256
256
|
/**
|
|
257
|
-
* Get the FFmpeg arguments for debugging, which writes each participant's audio to a separate WAV file
|
|
257
|
+
* Get the FFmpeg arguments for debugging, which writes each participant/screenshare's audio to a separate WAV file
|
|
258
258
|
* and also mixes them into a single WAV file.
|
|
259
|
-
* This is useful for inspecting the audio quality and timing of each participant.
|
|
259
|
+
* This is useful for inspecting the audio quality and timing of each participant/screenshare.
|
|
260
260
|
*/
|
|
261
261
|
function getFFmpegArgumentsDebug() {
|
|
262
|
-
const N =
|
|
262
|
+
const N = MIXER_SLOTS;
|
|
263
263
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
264
264
|
const ffArgs = [];
|
|
265
265
|
for (let i = 0; i < N; i++) {
|
|
@@ -279,12 +279,12 @@ function createFfmpegMixer() {
|
|
|
279
279
|
return ffArgs;
|
|
280
280
|
}
|
|
281
281
|
/**
|
|
282
|
-
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
282
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants/screenshares.
|
|
283
283
|
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
284
284
|
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
285
285
|
*/
|
|
286
286
|
function getFFmpegArguments() {
|
|
287
|
-
const N =
|
|
287
|
+
const N = MIXER_SLOTS;
|
|
288
288
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
289
289
|
const ffArgs = [];
|
|
290
290
|
for (let i = 0; i < N; i++) {
|
|
@@ -302,23 +302,23 @@ function createFfmpegMixer() {
|
|
|
302
302
|
}
|
|
303
303
|
/*
|
|
304
304
|
* Spawn a new FFmpeg process for debugging purposes.
|
|
305
|
-
* This will write each participant's audio to a separate WAV file and also mix them into a single WAV file.
|
|
305
|
+
* This will write each participant/screenshare's audio to a separate WAV file and also mix them into a single WAV file.
|
|
306
306
|
* The output files will be named pre0.wav, pre1.wav, ..., and mixed.wav.
|
|
307
307
|
* The process will log its output to stderr.
|
|
308
308
|
* @return The spawned FFmpeg process.
|
|
309
309
|
*/
|
|
310
310
|
function spawnFFmpegProcessDebug(rtcAudioSource) {
|
|
311
|
-
const stdio = ["ignore", "ignore", "pipe", ...Array(
|
|
311
|
+
const stdio = ["ignore", "ignore", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
|
|
312
312
|
const args = getFFmpegArgumentsDebug();
|
|
313
313
|
const ffmpegProcess = child_process.spawn("ffmpeg", args, { stdio });
|
|
314
|
-
startPacer(ffmpegProcess,
|
|
314
|
+
startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
|
|
315
315
|
ffmpegProcess.stderr.setEncoding("utf8");
|
|
316
316
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
317
317
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error (debug): is ffmpeg installed?"));
|
|
318
318
|
return ffmpegProcess;
|
|
319
319
|
}
|
|
320
320
|
/**
|
|
321
|
-
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
321
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants/screenshares.
|
|
322
322
|
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
323
323
|
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
324
324
|
* The process will log its output to stderr.
|
|
@@ -326,10 +326,10 @@ function createFfmpegMixer() {
|
|
|
326
326
|
* @return The spawned FFmpeg process.
|
|
327
327
|
*/
|
|
328
328
|
function spawnFFmpegProcess(rtcAudioSource) {
|
|
329
|
-
const stdio = ["pipe", "pipe", "pipe", ...Array(
|
|
329
|
+
const stdio = ["pipe", "pipe", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
|
|
330
330
|
const args = getFFmpegArguments();
|
|
331
331
|
const ffmpegProcess = child_process.spawn("ffmpeg", args, { stdio });
|
|
332
|
-
startPacer(ffmpegProcess,
|
|
332
|
+
startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
|
|
333
333
|
ffmpegProcess.stderr.setEncoding("utf8");
|
|
334
334
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
335
335
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
@@ -355,7 +355,7 @@ function createFfmpegMixer() {
|
|
|
355
355
|
* that enqueues audio frames into the pacer.
|
|
356
356
|
*
|
|
357
357
|
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
358
|
-
* @param slot The
|
|
358
|
+
* @param slot The mixer slot number (0..N-1) to which this track belongs.
|
|
359
359
|
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
360
360
|
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
361
361
|
*/
|
|
@@ -385,7 +385,7 @@ function createFfmpegMixer() {
|
|
|
385
385
|
}
|
|
386
386
|
/**
|
|
387
387
|
* Stop the FFmpeg process and clean up all resources.
|
|
388
|
-
* This function will unpipe the stdout, end all writable streams for each
|
|
388
|
+
* This function will unpipe the stdout, end all writable streams for each mixer slot,
|
|
389
389
|
* and kill the FFmpeg process.
|
|
390
390
|
* @param ffmpegProcess The FFmpeg process to stop.
|
|
391
391
|
*/
|
|
@@ -399,7 +399,7 @@ function createFfmpegMixer() {
|
|
|
399
399
|
catch (_c) {
|
|
400
400
|
console.error("Failed to unpipe ffmpeg stdout");
|
|
401
401
|
}
|
|
402
|
-
for (let i = 0; i <
|
|
402
|
+
for (let i = 0; i < MIXER_SLOTS; i++) {
|
|
403
403
|
const w = ffmpegProcess.stdio[3 + i];
|
|
404
404
|
try {
|
|
405
405
|
w.end();
|
|
@@ -426,84 +426,60 @@ function createFfmpegMixer() {
|
|
|
426
426
|
};
|
|
427
427
|
}
|
|
428
428
|
|
|
429
|
+
var _a;
|
|
430
|
+
// Debug: set to true to enable debug output (and write audio to .wav files)
|
|
431
|
+
const DEBUG_MIXER_OUTPUT = (_a = process.env.DEBUG_MIXER_OUTPUT) !== null && _a !== void 0 ? _a : false;
|
|
429
432
|
class AudioMixer extends EventEmitter.EventEmitter {
|
|
430
433
|
constructor() {
|
|
431
434
|
super();
|
|
432
435
|
this.ffmpegProcess = null;
|
|
433
436
|
this.combinedAudioStream = null;
|
|
434
437
|
this.rtcAudioSource = null;
|
|
435
|
-
this.
|
|
438
|
+
this.mixableSlots = new Map();
|
|
436
439
|
this.activeSlots = {};
|
|
437
440
|
this.mixer = createFfmpegMixer();
|
|
438
441
|
this.setupMediaStream();
|
|
439
|
-
this.
|
|
442
|
+
this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
|
|
440
443
|
}
|
|
441
444
|
setupMediaStream() {
|
|
442
445
|
this.rtcAudioSource = new wrtc.nonstandard.RTCAudioSource();
|
|
443
446
|
const audioTrack = this.rtcAudioSource.createTrack();
|
|
444
447
|
this.combinedAudioStream = new wrtc.MediaStream([audioTrack]);
|
|
445
448
|
}
|
|
446
|
-
|
|
447
|
-
return this.combinedAudioStream;
|
|
448
|
-
}
|
|
449
|
-
handleRemoteParticipants(participants) {
|
|
450
|
-
if (participants.length === 0) {
|
|
451
|
-
this.stopAudioMixer();
|
|
452
|
-
return;
|
|
453
|
-
}
|
|
454
|
-
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
455
|
-
this.ffmpegProcess = this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
456
|
-
}
|
|
457
|
-
for (const p of participants)
|
|
458
|
-
this.attachParticipantIfNeeded(p);
|
|
459
|
-
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
460
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
461
|
-
for (const [slot, pid] of this.participantSlots) {
|
|
462
|
-
if (pid && !liveIds.has(pid))
|
|
463
|
-
this.detachParticipant(pid);
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
stopAudioMixer() {
|
|
467
|
-
if (this.ffmpegProcess) {
|
|
468
|
-
this.mixer.stopFFmpegProcess(this.ffmpegProcess);
|
|
469
|
-
this.ffmpegProcess = null;
|
|
470
|
-
}
|
|
471
|
-
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
472
|
-
this.activeSlots = {};
|
|
473
|
-
// Recreate the media stream to avoid stale references
|
|
474
|
-
this.setupMediaStream();
|
|
475
|
-
}
|
|
476
|
-
slotForParticipant(participantId) {
|
|
449
|
+
slotForMixable(mixableId) {
|
|
477
450
|
var _a;
|
|
478
|
-
const found = (_a = [...this.
|
|
451
|
+
const found = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === mixableId)) === null || _a === void 0 ? void 0 : _a[0];
|
|
479
452
|
return found === undefined ? null : found;
|
|
480
453
|
}
|
|
481
|
-
|
|
454
|
+
slotsByType(mixableType) {
|
|
455
|
+
return [...this.mixableSlots.entries()].filter(([slotId]) => { var _a; return ((_a = this.activeSlots[slotId]) === null || _a === void 0 ? void 0 : _a.type) === mixableType; });
|
|
456
|
+
}
|
|
457
|
+
acquireSlot(mixableId) {
|
|
482
458
|
var _a;
|
|
483
|
-
const existing = this.
|
|
459
|
+
const existing = this.slotForMixable(mixableId);
|
|
484
460
|
if (existing !== null)
|
|
485
461
|
return existing;
|
|
486
|
-
const empty = (_a = [...this.
|
|
462
|
+
const empty = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === "")) === null || _a === void 0 ? void 0 : _a[0];
|
|
487
463
|
if (empty === undefined)
|
|
488
464
|
return null;
|
|
489
|
-
this.
|
|
465
|
+
this.mixableSlots.set(empty, mixableId);
|
|
490
466
|
return empty;
|
|
491
467
|
}
|
|
492
|
-
|
|
468
|
+
attachMixableIfNeeded(mixable) {
|
|
493
469
|
var _a;
|
|
494
|
-
const { id:
|
|
495
|
-
if (!
|
|
470
|
+
const { id: mixableId, stream: mixableStream, isAudioEnabled, type } = mixable;
|
|
471
|
+
if (!mixableId)
|
|
496
472
|
return;
|
|
497
|
-
if (!
|
|
498
|
-
this.
|
|
473
|
+
if (!mixableStream || !isAudioEnabled) {
|
|
474
|
+
this.detachMixable(mixableId);
|
|
499
475
|
return;
|
|
500
476
|
}
|
|
501
|
-
const audioTrack =
|
|
477
|
+
const audioTrack = mixableStream.getTracks().find((t) => t.kind === "audio");
|
|
502
478
|
if (!audioTrack) {
|
|
503
|
-
this.
|
|
479
|
+
this.detachMixable(mixableId);
|
|
504
480
|
return;
|
|
505
481
|
}
|
|
506
|
-
const slot = this.acquireSlot(
|
|
482
|
+
const slot = this.acquireSlot(mixableId);
|
|
507
483
|
if (slot === null)
|
|
508
484
|
return;
|
|
509
485
|
const existing = this.activeSlots[slot];
|
|
@@ -519,11 +495,11 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
519
495
|
this.activeSlots[slot] = undefined;
|
|
520
496
|
}
|
|
521
497
|
const { sink, writer, stop } = this.mixer.writeAudioDataToFFmpeg(this.ffmpegProcess, slot, audioTrack);
|
|
522
|
-
this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id };
|
|
523
|
-
(_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.
|
|
498
|
+
this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id, type };
|
|
499
|
+
(_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.detachMixable(mixableId));
|
|
524
500
|
}
|
|
525
|
-
|
|
526
|
-
const slot = this.
|
|
501
|
+
detachMixable(mixableId) {
|
|
502
|
+
const slot = this.slotForMixable(mixableId);
|
|
527
503
|
if (slot === null)
|
|
528
504
|
return;
|
|
529
505
|
const binding = this.activeSlots[slot];
|
|
@@ -538,7 +514,62 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
538
514
|
}
|
|
539
515
|
// Clear any queued audio data for this slot to prevent stale audio
|
|
540
516
|
this.mixer.clearSlotQueue(slot);
|
|
541
|
-
this.
|
|
517
|
+
this.mixableSlots.set(slot, "");
|
|
518
|
+
}
|
|
519
|
+
getCombinedAudioStream() {
|
|
520
|
+
return this.combinedAudioStream;
|
|
521
|
+
}
|
|
522
|
+
handleRemoteParticipants(participants) {
|
|
523
|
+
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
524
|
+
const typedSlots = this.slotsByType("participant");
|
|
525
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
526
|
+
for (const [slot, pid] of typedSlots) {
|
|
527
|
+
if (pid && !liveIds.has(pid))
|
|
528
|
+
this.detachMixable(pid);
|
|
529
|
+
}
|
|
530
|
+
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
531
|
+
this.ffmpegProcess = DEBUG_MIXER_OUTPUT
|
|
532
|
+
? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
|
|
533
|
+
: this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
534
|
+
}
|
|
535
|
+
for (const p of participants)
|
|
536
|
+
this.attachMixableIfNeeded(Object.assign(Object.assign({}, p), { type: "participant" }));
|
|
537
|
+
}
|
|
538
|
+
handleScreenshares(screenshares) {
|
|
539
|
+
const screensharesWithAudio = screenshares.filter((screenshare) => screenshare.hasAudioTrack &&
|
|
540
|
+
screenshare.stream &&
|
|
541
|
+
screenshare.stream.getTracks().filter(({ kind }) => kind === "audio").length > 0);
|
|
542
|
+
const liveIds = new Set(screensharesWithAudio.map((p) => p.id).filter(Boolean));
|
|
543
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
544
|
+
for (const [slot, sid] of this.slotsByType("screenshare")) {
|
|
545
|
+
if (sid && !liveIds.has(sid))
|
|
546
|
+
this.detachMixable(sid);
|
|
547
|
+
}
|
|
548
|
+
if (screensharesWithAudio.length === 0) {
|
|
549
|
+
return;
|
|
550
|
+
}
|
|
551
|
+
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
552
|
+
this.ffmpegProcess = DEBUG_MIXER_OUTPUT
|
|
553
|
+
? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
|
|
554
|
+
: this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
555
|
+
}
|
|
556
|
+
const mixables = screensharesWithAudio.map(({ id, stream, hasAudioTrack }) => ({
|
|
557
|
+
id,
|
|
558
|
+
stream,
|
|
559
|
+
isAudioEnabled: hasAudioTrack,
|
|
560
|
+
}));
|
|
561
|
+
for (const s of mixables)
|
|
562
|
+
this.attachMixableIfNeeded(Object.assign(Object.assign({}, s), { type: "screenshare" }));
|
|
563
|
+
}
|
|
564
|
+
stopAudioMixer() {
|
|
565
|
+
if (this.ffmpegProcess) {
|
|
566
|
+
this.mixer.stopFFmpegProcess(this.ffmpegProcess);
|
|
567
|
+
this.ffmpegProcess = null;
|
|
568
|
+
}
|
|
569
|
+
this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
|
|
570
|
+
this.activeSlots = {};
|
|
571
|
+
// Recreate the media stream to avoid stale references
|
|
572
|
+
this.setupMediaStream();
|
|
542
573
|
}
|
|
543
574
|
}
|
|
544
575
|
|
package/dist/index.mjs
CHANGED
|
@@ -64,8 +64,8 @@ class AudioSink extends RTCAudioSink {
|
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
67
|
-
// participants to these slots based on mute/unmute state.
|
|
68
|
-
const
|
|
67
|
+
// participants/screenshares to these slots based on mute/unmute state.
|
|
68
|
+
const MIXER_SLOTS = 20;
|
|
69
69
|
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
70
70
|
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
71
71
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
@@ -89,11 +89,11 @@ function createFfmpegMixer() {
|
|
|
89
89
|
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
90
90
|
}
|
|
91
91
|
({
|
|
92
|
-
enqFrames: new Array(
|
|
93
|
-
enqSamples: new Array(
|
|
94
|
-
wroteFrames: new Array(
|
|
95
|
-
wroteSamples: new Array(
|
|
96
|
-
lastFramesSeen: new Array(
|
|
92
|
+
enqFrames: new Array(MIXER_SLOTS).fill(0),
|
|
93
|
+
enqSamples: new Array(MIXER_SLOTS).fill(0),
|
|
94
|
+
wroteFrames: new Array(MIXER_SLOTS).fill(0),
|
|
95
|
+
wroteSamples: new Array(MIXER_SLOTS).fill(0),
|
|
96
|
+
lastFramesSeen: new Array(MIXER_SLOTS).fill(0),
|
|
97
97
|
});
|
|
98
98
|
let slots = [];
|
|
99
99
|
let stopPacerFn = null;
|
|
@@ -150,13 +150,13 @@ function createFfmpegMixer() {
|
|
|
150
150
|
* Call this once right after spawning FFmpeg:
|
|
151
151
|
* ```ts
|
|
152
152
|
* const ff = spawnFFmpegProcess();
|
|
153
|
-
* startPacer(ff,
|
|
153
|
+
* startPacer(ff, MIXER_SLOTS);
|
|
154
154
|
* ```
|
|
155
155
|
*
|
|
156
156
|
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
157
157
|
*
|
|
158
158
|
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
159
|
-
* @param slotCount Number of
|
|
159
|
+
* @param slotCount Number of mixer input slots (0..N-1 → fd 3..3+N-1)
|
|
160
160
|
*/
|
|
161
161
|
function startPacer(ff, slotCount, rtcAudioSource) {
|
|
162
162
|
if (stopPacerFn) {
|
|
@@ -238,7 +238,7 @@ function createFfmpegMixer() {
|
|
|
238
238
|
st.q.push(buf);
|
|
239
239
|
}
|
|
240
240
|
/**
|
|
241
|
-
* Clear the audio queue for a specific slot when a participant leaves.
|
|
241
|
+
* Clear the audio queue for a specific slot when a participant leaves or screenshare stops.
|
|
242
242
|
* This prevents stale audio data from continuing to play after disconnect.
|
|
243
243
|
*/
|
|
244
244
|
function clearSlotQueue(slot) {
|
|
@@ -252,12 +252,12 @@ function createFfmpegMixer() {
|
|
|
252
252
|
}
|
|
253
253
|
}
|
|
254
254
|
/**
|
|
255
|
-
* Get the FFmpeg arguments for debugging, which writes each participant's audio to a separate WAV file
|
|
255
|
+
* Get the FFmpeg arguments for debugging, which writes each participant/screenshare's audio to a separate WAV file
|
|
256
256
|
* and also mixes them into a single WAV file.
|
|
257
|
-
* This is useful for inspecting the audio quality and timing of each participant.
|
|
257
|
+
* This is useful for inspecting the audio quality and timing of each participant/screenshare.
|
|
258
258
|
*/
|
|
259
259
|
function getFFmpegArgumentsDebug() {
|
|
260
|
-
const N =
|
|
260
|
+
const N = MIXER_SLOTS;
|
|
261
261
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
262
262
|
const ffArgs = [];
|
|
263
263
|
for (let i = 0; i < N; i++) {
|
|
@@ -277,12 +277,12 @@ function createFfmpegMixer() {
|
|
|
277
277
|
return ffArgs;
|
|
278
278
|
}
|
|
279
279
|
/**
|
|
280
|
-
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
280
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants/screenshares.
|
|
281
281
|
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
282
282
|
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
283
283
|
*/
|
|
284
284
|
function getFFmpegArguments() {
|
|
285
|
-
const N =
|
|
285
|
+
const N = MIXER_SLOTS;
|
|
286
286
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
287
287
|
const ffArgs = [];
|
|
288
288
|
for (let i = 0; i < N; i++) {
|
|
@@ -300,23 +300,23 @@ function createFfmpegMixer() {
|
|
|
300
300
|
}
|
|
301
301
|
/*
|
|
302
302
|
* Spawn a new FFmpeg process for debugging purposes.
|
|
303
|
-
* This will write each participant's audio to a separate WAV file and also mix them into a single WAV file.
|
|
303
|
+
* This will write each participant/screenshare's audio to a separate WAV file and also mix them into a single WAV file.
|
|
304
304
|
* The output files will be named pre0.wav, pre1.wav, ..., and mixed.wav.
|
|
305
305
|
* The process will log its output to stderr.
|
|
306
306
|
* @return The spawned FFmpeg process.
|
|
307
307
|
*/
|
|
308
308
|
function spawnFFmpegProcessDebug(rtcAudioSource) {
|
|
309
|
-
const stdio = ["ignore", "ignore", "pipe", ...Array(
|
|
309
|
+
const stdio = ["ignore", "ignore", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
|
|
310
310
|
const args = getFFmpegArgumentsDebug();
|
|
311
311
|
const ffmpegProcess = spawn("ffmpeg", args, { stdio });
|
|
312
|
-
startPacer(ffmpegProcess,
|
|
312
|
+
startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
|
|
313
313
|
ffmpegProcess.stderr.setEncoding("utf8");
|
|
314
314
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
315
315
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error (debug): is ffmpeg installed?"));
|
|
316
316
|
return ffmpegProcess;
|
|
317
317
|
}
|
|
318
318
|
/**
|
|
319
|
-
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
319
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants/screenshares.
|
|
320
320
|
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
321
321
|
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
322
322
|
* The process will log its output to stderr.
|
|
@@ -324,10 +324,10 @@ function createFfmpegMixer() {
|
|
|
324
324
|
* @return The spawned FFmpeg process.
|
|
325
325
|
*/
|
|
326
326
|
function spawnFFmpegProcess(rtcAudioSource) {
|
|
327
|
-
const stdio = ["pipe", "pipe", "pipe", ...Array(
|
|
327
|
+
const stdio = ["pipe", "pipe", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
|
|
328
328
|
const args = getFFmpegArguments();
|
|
329
329
|
const ffmpegProcess = spawn("ffmpeg", args, { stdio });
|
|
330
|
-
startPacer(ffmpegProcess,
|
|
330
|
+
startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
|
|
331
331
|
ffmpegProcess.stderr.setEncoding("utf8");
|
|
332
332
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
333
333
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
@@ -353,7 +353,7 @@ function createFfmpegMixer() {
|
|
|
353
353
|
* that enqueues audio frames into the pacer.
|
|
354
354
|
*
|
|
355
355
|
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
356
|
-
* @param slot The
|
|
356
|
+
* @param slot The mixer slot number (0..N-1) to which this track belongs.
|
|
357
357
|
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
358
358
|
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
359
359
|
*/
|
|
@@ -383,7 +383,7 @@ function createFfmpegMixer() {
|
|
|
383
383
|
}
|
|
384
384
|
/**
|
|
385
385
|
* Stop the FFmpeg process and clean up all resources.
|
|
386
|
-
* This function will unpipe the stdout, end all writable streams for each
|
|
386
|
+
* This function will unpipe the stdout, end all writable streams for each mixer slot,
|
|
387
387
|
* and kill the FFmpeg process.
|
|
388
388
|
* @param ffmpegProcess The FFmpeg process to stop.
|
|
389
389
|
*/
|
|
@@ -397,7 +397,7 @@ function createFfmpegMixer() {
|
|
|
397
397
|
catch (_c) {
|
|
398
398
|
console.error("Failed to unpipe ffmpeg stdout");
|
|
399
399
|
}
|
|
400
|
-
for (let i = 0; i <
|
|
400
|
+
for (let i = 0; i < MIXER_SLOTS; i++) {
|
|
401
401
|
const w = ffmpegProcess.stdio[3 + i];
|
|
402
402
|
try {
|
|
403
403
|
w.end();
|
|
@@ -424,84 +424,60 @@ function createFfmpegMixer() {
|
|
|
424
424
|
};
|
|
425
425
|
}
|
|
426
426
|
|
|
427
|
+
var _a;
|
|
428
|
+
// Debug: set to true to enable debug output (and write audio to .wav files)
|
|
429
|
+
const DEBUG_MIXER_OUTPUT = (_a = process.env.DEBUG_MIXER_OUTPUT) !== null && _a !== void 0 ? _a : false;
|
|
427
430
|
class AudioMixer extends EventEmitter {
|
|
428
431
|
constructor() {
|
|
429
432
|
super();
|
|
430
433
|
this.ffmpegProcess = null;
|
|
431
434
|
this.combinedAudioStream = null;
|
|
432
435
|
this.rtcAudioSource = null;
|
|
433
|
-
this.
|
|
436
|
+
this.mixableSlots = new Map();
|
|
434
437
|
this.activeSlots = {};
|
|
435
438
|
this.mixer = createFfmpegMixer();
|
|
436
439
|
this.setupMediaStream();
|
|
437
|
-
this.
|
|
440
|
+
this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
|
|
438
441
|
}
|
|
439
442
|
setupMediaStream() {
|
|
440
443
|
this.rtcAudioSource = new wrtc.nonstandard.RTCAudioSource();
|
|
441
444
|
const audioTrack = this.rtcAudioSource.createTrack();
|
|
442
445
|
this.combinedAudioStream = new wrtc.MediaStream([audioTrack]);
|
|
443
446
|
}
|
|
444
|
-
|
|
445
|
-
return this.combinedAudioStream;
|
|
446
|
-
}
|
|
447
|
-
handleRemoteParticipants(participants) {
|
|
448
|
-
if (participants.length === 0) {
|
|
449
|
-
this.stopAudioMixer();
|
|
450
|
-
return;
|
|
451
|
-
}
|
|
452
|
-
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
453
|
-
this.ffmpegProcess = this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
454
|
-
}
|
|
455
|
-
for (const p of participants)
|
|
456
|
-
this.attachParticipantIfNeeded(p);
|
|
457
|
-
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
458
|
-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
459
|
-
for (const [slot, pid] of this.participantSlots) {
|
|
460
|
-
if (pid && !liveIds.has(pid))
|
|
461
|
-
this.detachParticipant(pid);
|
|
462
|
-
}
|
|
463
|
-
}
|
|
464
|
-
stopAudioMixer() {
|
|
465
|
-
if (this.ffmpegProcess) {
|
|
466
|
-
this.mixer.stopFFmpegProcess(this.ffmpegProcess);
|
|
467
|
-
this.ffmpegProcess = null;
|
|
468
|
-
}
|
|
469
|
-
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
470
|
-
this.activeSlots = {};
|
|
471
|
-
// Recreate the media stream to avoid stale references
|
|
472
|
-
this.setupMediaStream();
|
|
473
|
-
}
|
|
474
|
-
slotForParticipant(participantId) {
|
|
447
|
+
slotForMixable(mixableId) {
|
|
475
448
|
var _a;
|
|
476
|
-
const found = (_a = [...this.
|
|
449
|
+
const found = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === mixableId)) === null || _a === void 0 ? void 0 : _a[0];
|
|
477
450
|
return found === undefined ? null : found;
|
|
478
451
|
}
|
|
479
|
-
|
|
452
|
+
slotsByType(mixableType) {
|
|
453
|
+
return [...this.mixableSlots.entries()].filter(([slotId]) => { var _a; return ((_a = this.activeSlots[slotId]) === null || _a === void 0 ? void 0 : _a.type) === mixableType; });
|
|
454
|
+
}
|
|
455
|
+
acquireSlot(mixableId) {
|
|
480
456
|
var _a;
|
|
481
|
-
const existing = this.
|
|
457
|
+
const existing = this.slotForMixable(mixableId);
|
|
482
458
|
if (existing !== null)
|
|
483
459
|
return existing;
|
|
484
|
-
const empty = (_a = [...this.
|
|
460
|
+
const empty = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === "")) === null || _a === void 0 ? void 0 : _a[0];
|
|
485
461
|
if (empty === undefined)
|
|
486
462
|
return null;
|
|
487
|
-
this.
|
|
463
|
+
this.mixableSlots.set(empty, mixableId);
|
|
488
464
|
return empty;
|
|
489
465
|
}
|
|
490
|
-
|
|
466
|
+
attachMixableIfNeeded(mixable) {
|
|
491
467
|
var _a;
|
|
492
|
-
const { id:
|
|
493
|
-
if (!
|
|
468
|
+
const { id: mixableId, stream: mixableStream, isAudioEnabled, type } = mixable;
|
|
469
|
+
if (!mixableId)
|
|
494
470
|
return;
|
|
495
|
-
if (!
|
|
496
|
-
this.
|
|
471
|
+
if (!mixableStream || !isAudioEnabled) {
|
|
472
|
+
this.detachMixable(mixableId);
|
|
497
473
|
return;
|
|
498
474
|
}
|
|
499
|
-
const audioTrack =
|
|
475
|
+
const audioTrack = mixableStream.getTracks().find((t) => t.kind === "audio");
|
|
500
476
|
if (!audioTrack) {
|
|
501
|
-
this.
|
|
477
|
+
this.detachMixable(mixableId);
|
|
502
478
|
return;
|
|
503
479
|
}
|
|
504
|
-
const slot = this.acquireSlot(
|
|
480
|
+
const slot = this.acquireSlot(mixableId);
|
|
505
481
|
if (slot === null)
|
|
506
482
|
return;
|
|
507
483
|
const existing = this.activeSlots[slot];
|
|
@@ -517,11 +493,11 @@ class AudioMixer extends EventEmitter {
|
|
|
517
493
|
this.activeSlots[slot] = undefined;
|
|
518
494
|
}
|
|
519
495
|
const { sink, writer, stop } = this.mixer.writeAudioDataToFFmpeg(this.ffmpegProcess, slot, audioTrack);
|
|
520
|
-
this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id };
|
|
521
|
-
(_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.
|
|
496
|
+
this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id, type };
|
|
497
|
+
(_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.detachMixable(mixableId));
|
|
522
498
|
}
|
|
523
|
-
|
|
524
|
-
const slot = this.
|
|
499
|
+
detachMixable(mixableId) {
|
|
500
|
+
const slot = this.slotForMixable(mixableId);
|
|
525
501
|
if (slot === null)
|
|
526
502
|
return;
|
|
527
503
|
const binding = this.activeSlots[slot];
|
|
@@ -536,7 +512,62 @@ class AudioMixer extends EventEmitter {
|
|
|
536
512
|
}
|
|
537
513
|
// Clear any queued audio data for this slot to prevent stale audio
|
|
538
514
|
this.mixer.clearSlotQueue(slot);
|
|
539
|
-
this.
|
|
515
|
+
this.mixableSlots.set(slot, "");
|
|
516
|
+
}
|
|
517
|
+
getCombinedAudioStream() {
|
|
518
|
+
return this.combinedAudioStream;
|
|
519
|
+
}
|
|
520
|
+
handleRemoteParticipants(participants) {
|
|
521
|
+
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
522
|
+
const typedSlots = this.slotsByType("participant");
|
|
523
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
524
|
+
for (const [slot, pid] of typedSlots) {
|
|
525
|
+
if (pid && !liveIds.has(pid))
|
|
526
|
+
this.detachMixable(pid);
|
|
527
|
+
}
|
|
528
|
+
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
529
|
+
this.ffmpegProcess = DEBUG_MIXER_OUTPUT
|
|
530
|
+
? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
|
|
531
|
+
: this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
532
|
+
}
|
|
533
|
+
for (const p of participants)
|
|
534
|
+
this.attachMixableIfNeeded(Object.assign(Object.assign({}, p), { type: "participant" }));
|
|
535
|
+
}
|
|
536
|
+
handleScreenshares(screenshares) {
|
|
537
|
+
const screensharesWithAudio = screenshares.filter((screenshare) => screenshare.hasAudioTrack &&
|
|
538
|
+
screenshare.stream &&
|
|
539
|
+
screenshare.stream.getTracks().filter(({ kind }) => kind === "audio").length > 0);
|
|
540
|
+
const liveIds = new Set(screensharesWithAudio.map((p) => p.id).filter(Boolean));
|
|
541
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
542
|
+
for (const [slot, sid] of this.slotsByType("screenshare")) {
|
|
543
|
+
if (sid && !liveIds.has(sid))
|
|
544
|
+
this.detachMixable(sid);
|
|
545
|
+
}
|
|
546
|
+
if (screensharesWithAudio.length === 0) {
|
|
547
|
+
return;
|
|
548
|
+
}
|
|
549
|
+
if (!this.ffmpegProcess && this.rtcAudioSource) {
|
|
550
|
+
this.ffmpegProcess = DEBUG_MIXER_OUTPUT
|
|
551
|
+
? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
|
|
552
|
+
: this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
|
|
553
|
+
}
|
|
554
|
+
const mixables = screensharesWithAudio.map(({ id, stream, hasAudioTrack }) => ({
|
|
555
|
+
id,
|
|
556
|
+
stream,
|
|
557
|
+
isAudioEnabled: hasAudioTrack,
|
|
558
|
+
}));
|
|
559
|
+
for (const s of mixables)
|
|
560
|
+
this.attachMixableIfNeeded(Object.assign(Object.assign({}, s), { type: "screenshare" }));
|
|
561
|
+
}
|
|
562
|
+
stopAudioMixer() {
|
|
563
|
+
if (this.ffmpegProcess) {
|
|
564
|
+
this.mixer.stopFFmpegProcess(this.ffmpegProcess);
|
|
565
|
+
this.ffmpegProcess = null;
|
|
566
|
+
}
|
|
567
|
+
this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
|
|
568
|
+
this.activeSlots = {};
|
|
569
|
+
// Recreate the media stream to avoid stale references
|
|
570
|
+
this.setupMediaStream();
|
|
540
571
|
}
|
|
541
572
|
}
|
|
542
573
|
|