@whereby.com/assistant-sdk 1.2.15 → 1.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -66,8 +66,8 @@ class AudioSink extends RTCAudioSink {
66
66
  }
67
67
 
68
68
  // Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
69
- // participants to these slots based on mute/unmute state.
70
- const PARTICIPANT_SLOTS = 20;
69
+ // participants/screenshares to these slots based on mute/unmute state.
70
+ const MIXER_SLOTS = 20;
71
71
  // Each sample is 2 bytes (16 bits) for PCM audio - s16le format
72
72
  // 48000 Hz is the standard sample rate for WebRTC audio
73
73
  const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
@@ -91,11 +91,11 @@ function createFfmpegMixer() {
91
91
  slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
92
92
  }
93
93
  ({
94
- enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
95
- enqSamples: new Array(PARTICIPANT_SLOTS).fill(0),
96
- wroteFrames: new Array(PARTICIPANT_SLOTS).fill(0),
97
- wroteSamples: new Array(PARTICIPANT_SLOTS).fill(0),
98
- lastFramesSeen: new Array(PARTICIPANT_SLOTS).fill(0),
94
+ enqFrames: new Array(MIXER_SLOTS).fill(0),
95
+ enqSamples: new Array(MIXER_SLOTS).fill(0),
96
+ wroteFrames: new Array(MIXER_SLOTS).fill(0),
97
+ wroteSamples: new Array(MIXER_SLOTS).fill(0),
98
+ lastFramesSeen: new Array(MIXER_SLOTS).fill(0),
99
99
  });
100
100
  let slots = [];
101
101
  let stopPacerFn = null;
@@ -152,13 +152,13 @@ function createFfmpegMixer() {
152
152
  * Call this once right after spawning FFmpeg:
153
153
  * ```ts
154
154
  * const ff = spawnFFmpegProcess();
155
- * startPacer(ff, PARTICIPANT_SLOTS);
155
+ * startPacer(ff, MIXER_SLOTS);
156
156
  * ```
157
157
  *
158
158
  * When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
159
159
  *
160
160
  * @param ff Child process handle from spawn("ffmpeg", ...)
161
- * @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
161
+ * @param slotCount Number of mixer input slots (0..N-1 → fd 3..3+N-1)
162
162
  */
163
163
  function startPacer(ff, slotCount, rtcAudioSource) {
164
164
  if (stopPacerFn) {
@@ -240,7 +240,7 @@ function createFfmpegMixer() {
240
240
  st.q.push(buf);
241
241
  }
242
242
  /**
243
- * Clear the audio queue for a specific slot when a participant leaves.
243
+ * Clear the audio queue for a specific slot when a participant leaves or screenshare stops.
244
244
  * This prevents stale audio data from continuing to play after disconnect.
245
245
  */
246
246
  function clearSlotQueue(slot) {
@@ -254,12 +254,12 @@ function createFfmpegMixer() {
254
254
  }
255
255
  }
256
256
  /**
257
- * Get the FFmpeg arguments for debugging, which writes each participant's audio to a separate WAV file
257
+ * Get the FFmpeg arguments for debugging, which writes each participant/screenshare's audio to a separate WAV file
258
258
  * and also mixes them into a single WAV file.
259
- * This is useful for inspecting the audio quality and timing of each participant.
259
+ * This is useful for inspecting the audio quality and timing of each participant/screenshare.
260
260
  */
261
261
  function getFFmpegArgumentsDebug() {
262
- const N = PARTICIPANT_SLOTS;
262
+ const N = MIXER_SLOTS;
263
263
  const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
264
264
  const ffArgs = [];
265
265
  for (let i = 0; i < N; i++) {
@@ -279,12 +279,12 @@ function createFfmpegMixer() {
279
279
  return ffArgs;
280
280
  }
281
281
  /**
282
- * Get the FFmpeg arguments for mixing audio from multiple participants.
282
+ * Get the FFmpeg arguments for mixing audio from multiple participants/screenshares.
283
283
  * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
284
284
  * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
285
285
  */
286
286
  function getFFmpegArguments() {
287
- const N = PARTICIPANT_SLOTS;
287
+ const N = MIXER_SLOTS;
288
288
  const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
289
289
  const ffArgs = [];
290
290
  for (let i = 0; i < N; i++) {
@@ -302,23 +302,23 @@ function createFfmpegMixer() {
302
302
  }
303
303
  /*
304
304
  * Spawn a new FFmpeg process for debugging purposes.
305
- * This will write each participant's audio to a separate WAV file and also mix them into a single WAV file.
305
+ * This will write each participant/screenshare's audio to a separate WAV file and also mix them into a single WAV file.
306
306
  * The output files will be named pre0.wav, pre1.wav, ..., and mixed.wav.
307
307
  * The process will log its output to stderr.
308
308
  * @return The spawned FFmpeg process.
309
309
  */
310
310
  function spawnFFmpegProcessDebug(rtcAudioSource) {
311
- const stdio = ["ignore", "ignore", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
311
+ const stdio = ["ignore", "ignore", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
312
312
  const args = getFFmpegArgumentsDebug();
313
313
  const ffmpegProcess = child_process.spawn("ffmpeg", args, { stdio });
314
- startPacer(ffmpegProcess, PARTICIPANT_SLOTS, rtcAudioSource);
314
+ startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
315
315
  ffmpegProcess.stderr.setEncoding("utf8");
316
316
  ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
317
317
  ffmpegProcess.on("error", () => console.error("FFmpeg process error (debug): is ffmpeg installed?"));
318
318
  return ffmpegProcess;
319
319
  }
320
320
  /**
321
- * Spawn a new FFmpeg process for mixing audio from multiple participants.
321
+ * Spawn a new FFmpeg process for mixing audio from multiple participants/screenshares.
322
322
  * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
323
323
  * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
324
324
  * The process will log its output to stderr.
@@ -326,10 +326,10 @@ function createFfmpegMixer() {
326
326
  * @return The spawned FFmpeg process.
327
327
  */
328
328
  function spawnFFmpegProcess(rtcAudioSource) {
329
- const stdio = ["pipe", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
329
+ const stdio = ["pipe", "pipe", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
330
330
  const args = getFFmpegArguments();
331
331
  const ffmpegProcess = child_process.spawn("ffmpeg", args, { stdio });
332
- startPacer(ffmpegProcess, PARTICIPANT_SLOTS, rtcAudioSource);
332
+ startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
333
333
  ffmpegProcess.stderr.setEncoding("utf8");
334
334
  ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
335
335
  ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
@@ -355,7 +355,7 @@ function createFfmpegMixer() {
355
355
  * that enqueues audio frames into the pacer.
356
356
  *
357
357
  * @param ffmpegProcess The FFmpeg process to which audio data will be written.
358
- * @param slot The participant slot number (0..N-1) to which this track belongs.
358
+ * @param slot The mixer slot number (0..N-1) to which this track belongs.
359
359
  * @param audioTrack The MediaStreamTrack containing the audio data.
360
360
  * @return An object containing the AudioSink, the writable stream, and a stop function.
361
361
  */
@@ -385,7 +385,7 @@ function createFfmpegMixer() {
385
385
  }
386
386
  /**
387
387
  * Stop the FFmpeg process and clean up all resources.
388
- * This function will unpipe the stdout, end all writable streams for each participant slot,
388
+ * This function will unpipe the stdout, end all writable streams for each mixer slot,
389
389
  * and kill the FFmpeg process.
390
390
  * @param ffmpegProcess The FFmpeg process to stop.
391
391
  */
@@ -399,7 +399,7 @@ function createFfmpegMixer() {
399
399
  catch (_c) {
400
400
  console.error("Failed to unpipe ffmpeg stdout");
401
401
  }
402
- for (let i = 0; i < PARTICIPANT_SLOTS; i++) {
402
+ for (let i = 0; i < MIXER_SLOTS; i++) {
403
403
  const w = ffmpegProcess.stdio[3 + i];
404
404
  try {
405
405
  w.end();
@@ -426,84 +426,60 @@ function createFfmpegMixer() {
426
426
  };
427
427
  }
428
428
 
429
+ var _a;
430
+ // Debug: set to true to enable debug output (and write audio to .wav files)
431
+ const DEBUG_MIXER_OUTPUT = (_a = process.env.DEBUG_MIXER_OUTPUT) !== null && _a !== void 0 ? _a : false;
429
432
  class AudioMixer extends EventEmitter.EventEmitter {
430
433
  constructor() {
431
434
  super();
432
435
  this.ffmpegProcess = null;
433
436
  this.combinedAudioStream = null;
434
437
  this.rtcAudioSource = null;
435
- this.participantSlots = new Map();
438
+ this.mixableSlots = new Map();
436
439
  this.activeSlots = {};
437
440
  this.mixer = createFfmpegMixer();
438
441
  this.setupMediaStream();
439
- this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
442
+ this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
440
443
  }
441
444
  setupMediaStream() {
442
445
  this.rtcAudioSource = new wrtc.nonstandard.RTCAudioSource();
443
446
  const audioTrack = this.rtcAudioSource.createTrack();
444
447
  this.combinedAudioStream = new wrtc.MediaStream([audioTrack]);
445
448
  }
446
- getCombinedAudioStream() {
447
- return this.combinedAudioStream;
448
- }
449
- handleRemoteParticipants(participants) {
450
- if (participants.length === 0) {
451
- this.stopAudioMixer();
452
- return;
453
- }
454
- if (!this.ffmpegProcess && this.rtcAudioSource) {
455
- this.ffmpegProcess = this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
456
- }
457
- for (const p of participants)
458
- this.attachParticipantIfNeeded(p);
459
- const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
460
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
461
- for (const [slot, pid] of this.participantSlots) {
462
- if (pid && !liveIds.has(pid))
463
- this.detachParticipant(pid);
464
- }
465
- }
466
- stopAudioMixer() {
467
- if (this.ffmpegProcess) {
468
- this.mixer.stopFFmpegProcess(this.ffmpegProcess);
469
- this.ffmpegProcess = null;
470
- }
471
- this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
472
- this.activeSlots = {};
473
- // Recreate the media stream to avoid stale references
474
- this.setupMediaStream();
475
- }
476
- slotForParticipant(participantId) {
449
+ slotForMixable(mixableId) {
477
450
  var _a;
478
- const found = (_a = [...this.participantSlots.entries()].find(([, id]) => id === participantId)) === null || _a === void 0 ? void 0 : _a[0];
451
+ const found = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === mixableId)) === null || _a === void 0 ? void 0 : _a[0];
479
452
  return found === undefined ? null : found;
480
453
  }
481
- acquireSlot(participantId) {
454
+ slotsByType(mixableType) {
455
+ return [...this.mixableSlots.entries()].filter(([slotId]) => { var _a; return ((_a = this.activeSlots[slotId]) === null || _a === void 0 ? void 0 : _a.type) === mixableType; });
456
+ }
457
+ acquireSlot(mixableId) {
482
458
  var _a;
483
- const existing = this.slotForParticipant(participantId);
459
+ const existing = this.slotForMixable(mixableId);
484
460
  if (existing !== null)
485
461
  return existing;
486
- const empty = (_a = [...this.participantSlots.entries()].find(([, id]) => id === "")) === null || _a === void 0 ? void 0 : _a[0];
462
+ const empty = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === "")) === null || _a === void 0 ? void 0 : _a[0];
487
463
  if (empty === undefined)
488
464
  return null;
489
- this.participantSlots.set(empty, participantId);
465
+ this.mixableSlots.set(empty, mixableId);
490
466
  return empty;
491
467
  }
492
- attachParticipantIfNeeded(participant) {
468
+ attachMixableIfNeeded(mixable) {
493
469
  var _a;
494
- const { id: participantId, stream: participantStream, isAudioEnabled } = participant;
495
- if (!participantId)
470
+ const { id: mixableId, stream: mixableStream, isAudioEnabled, type } = mixable;
471
+ if (!mixableId)
496
472
  return;
497
- if (!participantStream || !isAudioEnabled) {
498
- this.detachParticipant(participantId);
473
+ if (!mixableStream || !isAudioEnabled) {
474
+ this.detachMixable(mixableId);
499
475
  return;
500
476
  }
501
- const audioTrack = participantStream.getTracks().find((t) => t.kind === "audio");
477
+ const audioTrack = mixableStream.getTracks().find((t) => t.kind === "audio");
502
478
  if (!audioTrack) {
503
- this.detachParticipant(participantId);
479
+ this.detachMixable(mixableId);
504
480
  return;
505
481
  }
506
- const slot = this.acquireSlot(participantId);
482
+ const slot = this.acquireSlot(mixableId);
507
483
  if (slot === null)
508
484
  return;
509
485
  const existing = this.activeSlots[slot];
@@ -519,11 +495,11 @@ class AudioMixer extends EventEmitter.EventEmitter {
519
495
  this.activeSlots[slot] = undefined;
520
496
  }
521
497
  const { sink, writer, stop } = this.mixer.writeAudioDataToFFmpeg(this.ffmpegProcess, slot, audioTrack);
522
- this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id };
523
- (_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.detachParticipant(participantId));
498
+ this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id, type };
499
+ (_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.detachMixable(mixableId));
524
500
  }
525
- detachParticipant(participantId) {
526
- const slot = this.slotForParticipant(participantId);
501
+ detachMixable(mixableId) {
502
+ const slot = this.slotForMixable(mixableId);
527
503
  if (slot === null)
528
504
  return;
529
505
  const binding = this.activeSlots[slot];
@@ -538,7 +514,62 @@ class AudioMixer extends EventEmitter.EventEmitter {
538
514
  }
539
515
  // Clear any queued audio data for this slot to prevent stale audio
540
516
  this.mixer.clearSlotQueue(slot);
541
- this.participantSlots.set(slot, "");
517
+ this.mixableSlots.set(slot, "");
518
+ }
519
+ getCombinedAudioStream() {
520
+ return this.combinedAudioStream;
521
+ }
522
+ handleRemoteParticipants(participants) {
523
+ const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
524
+ const typedSlots = this.slotsByType("participant");
525
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
526
+ for (const [slot, pid] of typedSlots) {
527
+ if (pid && !liveIds.has(pid))
528
+ this.detachMixable(pid);
529
+ }
530
+ if (!this.ffmpegProcess && this.rtcAudioSource) {
531
+ this.ffmpegProcess = DEBUG_MIXER_OUTPUT
532
+ ? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
533
+ : this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
534
+ }
535
+ for (const p of participants)
536
+ this.attachMixableIfNeeded(Object.assign(Object.assign({}, p), { type: "participant" }));
537
+ }
538
+ handleScreenshares(screenshares) {
539
+ const screensharesWithAudio = screenshares.filter((screenshare) => screenshare.hasAudioTrack &&
540
+ screenshare.stream &&
541
+ screenshare.stream.getTracks().filter(({ kind }) => kind === "audio").length > 0);
542
+ const liveIds = new Set(screensharesWithAudio.map((p) => p.id).filter(Boolean));
543
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
544
+ for (const [slot, sid] of this.slotsByType("screenshare")) {
545
+ if (sid && !liveIds.has(sid))
546
+ this.detachMixable(sid);
547
+ }
548
+ if (screensharesWithAudio.length === 0) {
549
+ return;
550
+ }
551
+ if (!this.ffmpegProcess && this.rtcAudioSource) {
552
+ this.ffmpegProcess = DEBUG_MIXER_OUTPUT
553
+ ? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
554
+ : this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
555
+ }
556
+ const mixables = screensharesWithAudio.map(({ id, stream, hasAudioTrack }) => ({
557
+ id,
558
+ stream,
559
+ isAudioEnabled: hasAudioTrack,
560
+ }));
561
+ for (const s of mixables)
562
+ this.attachMixableIfNeeded(Object.assign(Object.assign({}, s), { type: "screenshare" }));
563
+ }
564
+ stopAudioMixer() {
565
+ if (this.ffmpegProcess) {
566
+ this.mixer.stopFFmpegProcess(this.ffmpegProcess);
567
+ this.ffmpegProcess = null;
568
+ }
569
+ this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
570
+ this.activeSlots = {};
571
+ // Recreate the media stream to avoid stale references
572
+ this.setupMediaStream();
542
573
  }
543
574
  }
544
575
 
package/dist/index.mjs CHANGED
@@ -64,8 +64,8 @@ class AudioSink extends RTCAudioSink {
64
64
  }
65
65
 
66
66
  // Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
67
- // participants to these slots based on mute/unmute state.
68
- const PARTICIPANT_SLOTS = 20;
67
+ // participants/screenshares to these slots based on mute/unmute state.
68
+ const MIXER_SLOTS = 20;
69
69
  // Each sample is 2 bytes (16 bits) for PCM audio - s16le format
70
70
  // 48000 Hz is the standard sample rate for WebRTC audio
71
71
  const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
@@ -89,11 +89,11 @@ function createFfmpegMixer() {
89
89
  slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
90
90
  }
91
91
  ({
92
- enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
93
- enqSamples: new Array(PARTICIPANT_SLOTS).fill(0),
94
- wroteFrames: new Array(PARTICIPANT_SLOTS).fill(0),
95
- wroteSamples: new Array(PARTICIPANT_SLOTS).fill(0),
96
- lastFramesSeen: new Array(PARTICIPANT_SLOTS).fill(0),
92
+ enqFrames: new Array(MIXER_SLOTS).fill(0),
93
+ enqSamples: new Array(MIXER_SLOTS).fill(0),
94
+ wroteFrames: new Array(MIXER_SLOTS).fill(0),
95
+ wroteSamples: new Array(MIXER_SLOTS).fill(0),
96
+ lastFramesSeen: new Array(MIXER_SLOTS).fill(0),
97
97
  });
98
98
  let slots = [];
99
99
  let stopPacerFn = null;
@@ -150,13 +150,13 @@ function createFfmpegMixer() {
150
150
  * Call this once right after spawning FFmpeg:
151
151
  * ```ts
152
152
  * const ff = spawnFFmpegProcess();
153
- * startPacer(ff, PARTICIPANT_SLOTS);
153
+ * startPacer(ff, MIXER_SLOTS);
154
154
  * ```
155
155
  *
156
156
  * When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
157
157
  *
158
158
  * @param ff Child process handle from spawn("ffmpeg", ...)
159
- * @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
159
+ * @param slotCount Number of mixer input slots (0..N-1 → fd 3..3+N-1)
160
160
  */
161
161
  function startPacer(ff, slotCount, rtcAudioSource) {
162
162
  if (stopPacerFn) {
@@ -238,7 +238,7 @@ function createFfmpegMixer() {
238
238
  st.q.push(buf);
239
239
  }
240
240
  /**
241
- * Clear the audio queue for a specific slot when a participant leaves.
241
+ * Clear the audio queue for a specific slot when a participant leaves or screenshare stops.
242
242
  * This prevents stale audio data from continuing to play after disconnect.
243
243
  */
244
244
  function clearSlotQueue(slot) {
@@ -252,12 +252,12 @@ function createFfmpegMixer() {
252
252
  }
253
253
  }
254
254
  /**
255
- * Get the FFmpeg arguments for debugging, which writes each participant's audio to a separate WAV file
255
+ * Get the FFmpeg arguments for debugging, which writes each participant/screenshare's audio to a separate WAV file
256
256
  * and also mixes them into a single WAV file.
257
- * This is useful for inspecting the audio quality and timing of each participant.
257
+ * This is useful for inspecting the audio quality and timing of each participant/screenshare.
258
258
  */
259
259
  function getFFmpegArgumentsDebug() {
260
- const N = PARTICIPANT_SLOTS;
260
+ const N = MIXER_SLOTS;
261
261
  const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
262
262
  const ffArgs = [];
263
263
  for (let i = 0; i < N; i++) {
@@ -277,12 +277,12 @@ function createFfmpegMixer() {
277
277
  return ffArgs;
278
278
  }
279
279
  /**
280
- * Get the FFmpeg arguments for mixing audio from multiple participants.
280
+ * Get the FFmpeg arguments for mixing audio from multiple participants/screenshares.
281
281
  * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
282
282
  * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
283
283
  */
284
284
  function getFFmpegArguments() {
285
- const N = PARTICIPANT_SLOTS;
285
+ const N = MIXER_SLOTS;
286
286
  const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
287
287
  const ffArgs = [];
288
288
  for (let i = 0; i < N; i++) {
@@ -300,23 +300,23 @@ function createFfmpegMixer() {
300
300
  }
301
301
  /*
302
302
  * Spawn a new FFmpeg process for debugging purposes.
303
- * This will write each participant's audio to a separate WAV file and also mix them into a single WAV file.
303
+ * This will write each participant/screenshare's audio to a separate WAV file and also mix them into a single WAV file.
304
304
  * The output files will be named pre0.wav, pre1.wav, ..., and mixed.wav.
305
305
  * The process will log its output to stderr.
306
306
  * @return The spawned FFmpeg process.
307
307
  */
308
308
  function spawnFFmpegProcessDebug(rtcAudioSource) {
309
- const stdio = ["ignore", "ignore", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
309
+ const stdio = ["ignore", "ignore", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
310
310
  const args = getFFmpegArgumentsDebug();
311
311
  const ffmpegProcess = spawn("ffmpeg", args, { stdio });
312
- startPacer(ffmpegProcess, PARTICIPANT_SLOTS, rtcAudioSource);
312
+ startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
313
313
  ffmpegProcess.stderr.setEncoding("utf8");
314
314
  ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
315
315
  ffmpegProcess.on("error", () => console.error("FFmpeg process error (debug): is ffmpeg installed?"));
316
316
  return ffmpegProcess;
317
317
  }
318
318
  /**
319
- * Spawn a new FFmpeg process for mixing audio from multiple participants.
319
+ * Spawn a new FFmpeg process for mixing audio from multiple participants/screenshares.
320
320
  * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
321
321
  * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
322
322
  * The process will log its output to stderr.
@@ -324,10 +324,10 @@ function createFfmpegMixer() {
324
324
  * @return The spawned FFmpeg process.
325
325
  */
326
326
  function spawnFFmpegProcess(rtcAudioSource) {
327
- const stdio = ["pipe", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
327
+ const stdio = ["pipe", "pipe", "pipe", ...Array(MIXER_SLOTS).fill("pipe")];
328
328
  const args = getFFmpegArguments();
329
329
  const ffmpegProcess = spawn("ffmpeg", args, { stdio });
330
- startPacer(ffmpegProcess, PARTICIPANT_SLOTS, rtcAudioSource);
330
+ startPacer(ffmpegProcess, MIXER_SLOTS, rtcAudioSource);
331
331
  ffmpegProcess.stderr.setEncoding("utf8");
332
332
  ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
333
333
  ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
@@ -353,7 +353,7 @@ function createFfmpegMixer() {
353
353
  * that enqueues audio frames into the pacer.
354
354
  *
355
355
  * @param ffmpegProcess The FFmpeg process to which audio data will be written.
356
- * @param slot The participant slot number (0..N-1) to which this track belongs.
356
+ * @param slot The mixer slot number (0..N-1) to which this track belongs.
357
357
  * @param audioTrack The MediaStreamTrack containing the audio data.
358
358
  * @return An object containing the AudioSink, the writable stream, and a stop function.
359
359
  */
@@ -383,7 +383,7 @@ function createFfmpegMixer() {
383
383
  }
384
384
  /**
385
385
  * Stop the FFmpeg process and clean up all resources.
386
- * This function will unpipe the stdout, end all writable streams for each participant slot,
386
+ * This function will unpipe the stdout, end all writable streams for each mixer slot,
387
387
  * and kill the FFmpeg process.
388
388
  * @param ffmpegProcess The FFmpeg process to stop.
389
389
  */
@@ -397,7 +397,7 @@ function createFfmpegMixer() {
397
397
  catch (_c) {
398
398
  console.error("Failed to unpipe ffmpeg stdout");
399
399
  }
400
- for (let i = 0; i < PARTICIPANT_SLOTS; i++) {
400
+ for (let i = 0; i < MIXER_SLOTS; i++) {
401
401
  const w = ffmpegProcess.stdio[3 + i];
402
402
  try {
403
403
  w.end();
@@ -424,84 +424,60 @@ function createFfmpegMixer() {
424
424
  };
425
425
  }
426
426
 
427
+ var _a;
428
+ // Debug: set to true to enable debug output (and write audio to .wav files)
429
+ const DEBUG_MIXER_OUTPUT = (_a = process.env.DEBUG_MIXER_OUTPUT) !== null && _a !== void 0 ? _a : false;
427
430
  class AudioMixer extends EventEmitter {
428
431
  constructor() {
429
432
  super();
430
433
  this.ffmpegProcess = null;
431
434
  this.combinedAudioStream = null;
432
435
  this.rtcAudioSource = null;
433
- this.participantSlots = new Map();
436
+ this.mixableSlots = new Map();
434
437
  this.activeSlots = {};
435
438
  this.mixer = createFfmpegMixer();
436
439
  this.setupMediaStream();
437
- this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
440
+ this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
438
441
  }
439
442
  setupMediaStream() {
440
443
  this.rtcAudioSource = new wrtc.nonstandard.RTCAudioSource();
441
444
  const audioTrack = this.rtcAudioSource.createTrack();
442
445
  this.combinedAudioStream = new wrtc.MediaStream([audioTrack]);
443
446
  }
444
- getCombinedAudioStream() {
445
- return this.combinedAudioStream;
446
- }
447
- handleRemoteParticipants(participants) {
448
- if (participants.length === 0) {
449
- this.stopAudioMixer();
450
- return;
451
- }
452
- if (!this.ffmpegProcess && this.rtcAudioSource) {
453
- this.ffmpegProcess = this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
454
- }
455
- for (const p of participants)
456
- this.attachParticipantIfNeeded(p);
457
- const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
458
- // eslint-disable-next-line @typescript-eslint/no-unused-vars
459
- for (const [slot, pid] of this.participantSlots) {
460
- if (pid && !liveIds.has(pid))
461
- this.detachParticipant(pid);
462
- }
463
- }
464
- stopAudioMixer() {
465
- if (this.ffmpegProcess) {
466
- this.mixer.stopFFmpegProcess(this.ffmpegProcess);
467
- this.ffmpegProcess = null;
468
- }
469
- this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
470
- this.activeSlots = {};
471
- // Recreate the media stream to avoid stale references
472
- this.setupMediaStream();
473
- }
474
- slotForParticipant(participantId) {
447
+ slotForMixable(mixableId) {
475
448
  var _a;
476
- const found = (_a = [...this.participantSlots.entries()].find(([, id]) => id === participantId)) === null || _a === void 0 ? void 0 : _a[0];
449
+ const found = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === mixableId)) === null || _a === void 0 ? void 0 : _a[0];
477
450
  return found === undefined ? null : found;
478
451
  }
479
- acquireSlot(participantId) {
452
+ slotsByType(mixableType) {
453
+ return [...this.mixableSlots.entries()].filter(([slotId]) => { var _a; return ((_a = this.activeSlots[slotId]) === null || _a === void 0 ? void 0 : _a.type) === mixableType; });
454
+ }
455
+ acquireSlot(mixableId) {
480
456
  var _a;
481
- const existing = this.slotForParticipant(participantId);
457
+ const existing = this.slotForMixable(mixableId);
482
458
  if (existing !== null)
483
459
  return existing;
484
- const empty = (_a = [...this.participantSlots.entries()].find(([, id]) => id === "")) === null || _a === void 0 ? void 0 : _a[0];
460
+ const empty = (_a = [...this.mixableSlots.entries()].find(([, id]) => id === "")) === null || _a === void 0 ? void 0 : _a[0];
485
461
  if (empty === undefined)
486
462
  return null;
487
- this.participantSlots.set(empty, participantId);
463
+ this.mixableSlots.set(empty, mixableId);
488
464
  return empty;
489
465
  }
490
- attachParticipantIfNeeded(participant) {
466
+ attachMixableIfNeeded(mixable) {
491
467
  var _a;
492
- const { id: participantId, stream: participantStream, isAudioEnabled } = participant;
493
- if (!participantId)
468
+ const { id: mixableId, stream: mixableStream, isAudioEnabled, type } = mixable;
469
+ if (!mixableId)
494
470
  return;
495
- if (!participantStream || !isAudioEnabled) {
496
- this.detachParticipant(participantId);
471
+ if (!mixableStream || !isAudioEnabled) {
472
+ this.detachMixable(mixableId);
497
473
  return;
498
474
  }
499
- const audioTrack = participantStream.getTracks().find((t) => t.kind === "audio");
475
+ const audioTrack = mixableStream.getTracks().find((t) => t.kind === "audio");
500
476
  if (!audioTrack) {
501
- this.detachParticipant(participantId);
477
+ this.detachMixable(mixableId);
502
478
  return;
503
479
  }
504
- const slot = this.acquireSlot(participantId);
480
+ const slot = this.acquireSlot(mixableId);
505
481
  if (slot === null)
506
482
  return;
507
483
  const existing = this.activeSlots[slot];
@@ -517,11 +493,11 @@ class AudioMixer extends EventEmitter {
517
493
  this.activeSlots[slot] = undefined;
518
494
  }
519
495
  const { sink, writer, stop } = this.mixer.writeAudioDataToFFmpeg(this.ffmpegProcess, slot, audioTrack);
520
- this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id };
521
- (_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.detachParticipant(participantId));
496
+ this.activeSlots[slot] = { sink, writer, stop, trackId: audioTrack.id, type };
497
+ (_a = audioTrack.addEventListener) === null || _a === void 0 ? void 0 : _a.call(audioTrack, "ended", () => this.detachMixable(mixableId));
522
498
  }
523
- detachParticipant(participantId) {
524
- const slot = this.slotForParticipant(participantId);
499
+ detachMixable(mixableId) {
500
+ const slot = this.slotForMixable(mixableId);
525
501
  if (slot === null)
526
502
  return;
527
503
  const binding = this.activeSlots[slot];
@@ -536,7 +512,62 @@ class AudioMixer extends EventEmitter {
536
512
  }
537
513
  // Clear any queued audio data for this slot to prevent stale audio
538
514
  this.mixer.clearSlotQueue(slot);
539
- this.participantSlots.set(slot, "");
515
+ this.mixableSlots.set(slot, "");
516
+ }
517
+ getCombinedAudioStream() {
518
+ return this.combinedAudioStream;
519
+ }
520
+ handleRemoteParticipants(participants) {
521
+ const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
522
+ const typedSlots = this.slotsByType("participant");
523
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
524
+ for (const [slot, pid] of typedSlots) {
525
+ if (pid && !liveIds.has(pid))
526
+ this.detachMixable(pid);
527
+ }
528
+ if (!this.ffmpegProcess && this.rtcAudioSource) {
529
+ this.ffmpegProcess = DEBUG_MIXER_OUTPUT
530
+ ? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
531
+ : this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
532
+ }
533
+ for (const p of participants)
534
+ this.attachMixableIfNeeded(Object.assign(Object.assign({}, p), { type: "participant" }));
535
+ }
536
+ handleScreenshares(screenshares) {
537
+ const screensharesWithAudio = screenshares.filter((screenshare) => screenshare.hasAudioTrack &&
538
+ screenshare.stream &&
539
+ screenshare.stream.getTracks().filter(({ kind }) => kind === "audio").length > 0);
540
+ const liveIds = new Set(screensharesWithAudio.map((p) => p.id).filter(Boolean));
541
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
542
+ for (const [slot, sid] of this.slotsByType("screenshare")) {
543
+ if (sid && !liveIds.has(sid))
544
+ this.detachMixable(sid);
545
+ }
546
+ if (screensharesWithAudio.length === 0) {
547
+ return;
548
+ }
549
+ if (!this.ffmpegProcess && this.rtcAudioSource) {
550
+ this.ffmpegProcess = DEBUG_MIXER_OUTPUT
551
+ ? this.mixer.spawnFFmpegProcessDebug(this.rtcAudioSource)
552
+ : this.mixer.spawnFFmpegProcess(this.rtcAudioSource);
553
+ }
554
+ const mixables = screensharesWithAudio.map(({ id, stream, hasAudioTrack }) => ({
555
+ id,
556
+ stream,
557
+ isAudioEnabled: hasAudioTrack,
558
+ }));
559
+ for (const s of mixables)
560
+ this.attachMixableIfNeeded(Object.assign(Object.assign({}, s), { type: "screenshare" }));
561
+ }
562
+ stopAudioMixer() {
563
+ if (this.ffmpegProcess) {
564
+ this.mixer.stopFFmpegProcess(this.ffmpegProcess);
565
+ this.ffmpegProcess = null;
566
+ }
567
+ this.mixableSlots = new Map(Array.from({ length: MIXER_SLOTS }, (_, i) => [i, ""]));
568
+ this.activeSlots = {};
569
+ // Recreate the media stream to avoid stale references
570
+ this.setupMediaStream();
540
571
  }
541
572
  }
542
573