npm - @whereby.com/assistant-sdk - Versions diffs - 0.0.0-canary-20250911141956 → 0.0.0-canary-20250912142319 - Mend

@whereby.com/assistant-sdk 0.0.0-canary-20250911141956 → 0.0.0-canary-20250912142319

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.cjs CHANGED Viewed

@@ -9,6 +9,26 @@ var express = require('express');
 var assert = require('assert');
 var bodyParser = require('body-parser');
 var os = require('os');
+var dotenv = require('dotenv');
+function _interopNamespaceDefault(e) {
+    var n = Object.create(null);
+    if (e) {
+        Object.keys(e).forEach(function (k) {
+            if (k !== 'default') {
+                var d = Object.getOwnPropertyDescriptor(e, k);
+                Object.defineProperty(n, k, d.get ? d : {
+                    enumerable: true,
+                    get: function () { return e[k]; }
+                });
+            }
+        });
+    }
+    n.default = e;
+    return Object.freeze(n);
+}
+var dotenv__namespace = /*#__PURE__*/_interopNamespaceDefault(dotenv);
 const ASSISTANT_JOIN_SUCCESS = "ASSISTANT_JOIN_SUCCESS";
@@ -68,9 +88,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
     }
 }
+// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
+// participants to these slots based on mute/unmute state.
 const PARTICIPANT_SLOTS = 20;
+// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
+// 48000 Hz is the standard sample rate for WebRTC audio
 const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
 const BYTES_PER_SAMPLE = 2;
+// 480 samples per 10ms frame at 48kHz
 const FRAME_10MS_SAMPLES = 480;
 const slotBuffers = new Map();
 function appendAndDrainTo480(slot, newSamples) {
@@ -82,10 +107,10 @@ function appendAndDrainTo480(slot, newSamples) {
     let offset = 0;
     while (merged.length - offset >= FRAME_10MS_SAMPLES) {
         const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
-        enqueueFrame(slot, chunk);
+        enqueueFrame(slot, chunk); // always 480
         offset += FRAME_10MS_SAMPLES;
     }
-    slotBuffers.set(slot, merged.subarray(offset));
+    slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
 }
 ({
     enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
@@ -97,6 +122,10 @@ function appendAndDrainTo480(slot, newSamples) {
 let slots = [];
 let stopPacerFn = null;
 let outputPacerState = null;
+/**
+ * Simple linear interpolation resampler to convert audio to 48kHz.
+ * This handles the common case of 16kHz -> 48kHz (3x upsampling).
+ */
 function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
     const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
     const outputLength = Math.floor(inputFrames * ratio);
@@ -116,11 +145,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
     }
     return output;
 }
+/**
+ * Enqueue an audio frame for paced delivery to the RTCAudioSource.
+ */
 function enqueueOutputFrame(samples) {
     if (outputPacerState) {
         outputPacerState.frameQueue.push(samples);
     }
 }
+/**
+ * Start the audio pacer loop for all input slots in an FFmpeg process.
+ *
+ * The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
+ * real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
+ * arrive jittery, bursty, or with slightly different clocks.
+ *
+ * Key behavior:
+ * - Writes exactly one frame per period, on a shared wall-clock grid.
+ * - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
+ *   never stalls.
+ * - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
+ * - Honors Node stream backpressure (`write()` return false) without breaking
+ *   the timing grid.
+ *
+ * This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
+ * can mix them without slow-downs or drift.
+ *
+ * Call this once right after spawning FFmpeg:
+ * ```ts
+ * const ff = spawnFFmpegProcess();
+ * startPacer(ff, PARTICIPANT_SLOTS);
+ * ```
+ *
+ * When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
+ *
+ * @param ff        Child process handle from spawn("ffmpeg", ...)
+ * @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
+ */
 function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     if (stopPacerFn) {
         stopPacerFn();
@@ -128,11 +189,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     }
     const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
     const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
-    const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
+    const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
     const t0 = nowMs();
     slots = Array.from({ length: slotCount }, () => ({
         q: [],
-        lastFrames: FRAME_10MS_SAMPLES,
+        lastFrames: FRAME_10MS_SAMPLES, // keep constant
         nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
     }));
     outputPacerState = {
@@ -147,10 +208,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
         for (let s = 0; s < slotCount; s++) {
             const st = slots[s];
             const w = writers[s];
-            const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
+            const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
             if (t >= st.nextDueMs) {
                 const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
                 if (!w.write(buf)) {
+                    // Just continue without adding drain listener - backpressure will naturally resolve
                     const late = t - st.nextDueMs;
                     const steps = Math.max(1, Math.ceil(late / frameMs));
                     st.nextDueMs += steps * frameMs;
@@ -163,9 +225,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
         }
         if (!outputPacerState)
             return;
+        // Handle output pacer for RTCAudioSource
         const state = outputPacerState;
         if (t >= state.nextDueMs) {
-            const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
+            const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
             if (!state.didEmitReadyEvent) {
                 state.onAudioStreamReady();
                 state.didEmitReadyEvent = true;
@@ -181,12 +244,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     }, 5);
     stopPacerFn = () => clearInterval(iv);
 }
+/**
+ * Stop the audio pacer loop and clear all input slots.
+ * Call this before killing the FFmpeg process to ensure clean shutdown.
+ */
 function stopPacer() {
     if (stopPacerFn)
         stopPacerFn();
     stopPacerFn = null;
     slots = [];
 }
+/**
+ * Queue a live frame for a given slot (0..N-1).
+ * Auto-resnaps the slot's schedule if the frame size (480/960) changes.
+ */
 function enqueueFrame(slot, samples, numberOfFrames) {
     const st = slots[slot];
     if (!st)
@@ -194,6 +265,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
     const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
     st.q.push(buf);
 }
+/**
+ * Clear the audio queue for a specific slot when a participant leaves.
+ * This prevents stale audio data from continuing to play after disconnect.
+ */
 function clearSlotQueue(slot) {
     const st = slots[slot];
     if (st) {
@@ -203,6 +278,11 @@ function clearSlotQueue(slot) {
         st.nextDueMs = now + frameMs;
     }
 }
+/**
+ * Get the FFmpeg arguments for mixing audio from multiple participants.
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
+ */
 function getFFmpegArguments() {
     const N = PARTICIPANT_SLOTS;
     const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
@@ -220,6 +300,14 @@ function getFFmpegArguments() {
     ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
     return ffArgs;
 }
+/**
+ * Spawn a new FFmpeg process for mixing audio from multiple participants.
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
+ * The process will log its output to stderr.
+ * @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
+ * @return The spawned FFmpeg process.
+ */
 function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
     const args = getFFmpegArguments();
@@ -229,7 +317,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
     ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
     let audioBuffer = Buffer.alloc(0);
-    const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
+    const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
     ffmpegProcess.stdout.on("data", (chunk) => {
         audioBuffer = Buffer.concat([audioBuffer, chunk]);
         while (audioBuffer.length >= FRAME_SIZE_BYTES) {
@@ -244,6 +332,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     });
     return ffmpegProcess;
 }
+/**
+ * Write audio data from a MediaStreamTrack to the FFmpeg process.
+ * This function creates an AudioSink for the track and sets up a data handler
+ * that enqueues audio frames into the pacer.
+ *
+ * @param ffmpegProcess The FFmpeg process to which audio data will be written.
+ * @param slot The participant slot number (0..N-1) to which this track belongs.
+ * @param audioTrack The MediaStreamTrack containing the audio data.
+ * @return An object containing the AudioSink, the writable stream, and a stop function.
+ */
 function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
     const writer = ffmpegProcess.stdio[3 + slot];
     const sink = new AudioSink(audioTrack);
@@ -268,6 +366,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
     };
     return { sink, writer, stop };
 }
+/**
+ * Stop the FFmpeg process and clean up all resources.
+ * This function will unpipe the stdout, end all writable streams for each participant slot,
+ * and kill the FFmpeg process.
+ * @param ffmpegProcess The FFmpeg process to stop.
+ */
 function stopFFmpegProcess(ffmpegProcess) {
     stopPacer();
     if (ffmpegProcess && !ffmpegProcess.killed) {
@@ -321,6 +425,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
         for (const p of participants)
             this.attachParticipantIfNeeded(p);
         const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
+        // eslint-disable-next-line @typescript-eslint/no-unused-vars
         for (const [slot, pid] of this.participantSlots) {
             if (pid && !liveIds.has(pid))
                 this.detachParticipant(pid);
@@ -333,6 +438,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
         }
         this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
         this.activeSlots = {};
+        // Recreate the media stream to avoid stale references
         this.setupMediaStream();
     }
     slotForParticipant(participantId) {
@@ -398,6 +504,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
             }
             this.activeSlots[slot] = undefined;
         }
+        // Clear any queued audio data for this slot to prevent stale audio
         clearSlotQueue(slot);
         this.participantSlots.set(slot, "");
     }
@@ -521,10 +628,11 @@ class Assistant extends EventEmitter {
     }
 }
-const BIND_INTERFACE = "en0";
+dotenv__namespace.config();
+const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
 function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
     let wherebyDomain;
-    {
+    if (IS_LOCAL === "true") {
         const ifaceAddrs = os.networkInterfaces()[BIND_INTERFACE];
         if (!ifaceAddrs) {
             throw new Error(`Unknown interface ${BIND_INTERFACE}`);
@@ -535,6 +643,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
         }
         wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
     }
+    else {
+        wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
+    }
     return `https://${wherebyDomain}${roomPath}`;
 }
@@ -562,7 +673,7 @@ const webhookRouter = (webhookTriggers, emitter, assistantKey, startCombinedAudi
     return router;
 };
 class Trigger extends EventEmitter.EventEmitter {
-    constructor({ webhookTriggers = {}, port = 4999, assistantKey, startCombinedAudioStream, startLocalMedia, }) {
+    constructor({ webhookTriggers = {}, port = 8080, assistantKey, startCombinedAudioStream, startLocalMedia, }) {
         super();
         this.webhookTriggers = webhookTriggers;
         this.port = port;
@@ -575,6 +686,7 @@ class Trigger extends EventEmitter.EventEmitter {
         const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
         app.use(router);
         const server = app.listen(this.port, () => {
+            // console.log(`Bot trigger server now running on port[${this.port}]`);
         });
         process.on("SIGTERM", () => {
             server.close();

package/dist/index.mjs CHANGED Viewed

@@ -7,6 +7,7 @@ import express from 'express';
 import assert from 'assert';
 import bodyParser from 'body-parser';
 import { networkInterfaces } from 'os';
+import * as dotenv from 'dotenv';
 const ASSISTANT_JOIN_SUCCESS = "ASSISTANT_JOIN_SUCCESS";
@@ -66,9 +67,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
     }
 }
+// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
+// participants to these slots based on mute/unmute state.
 const PARTICIPANT_SLOTS = 20;
+// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
+// 48000 Hz is the standard sample rate for WebRTC audio
 const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
 const BYTES_PER_SAMPLE = 2;
+// 480 samples per 10ms frame at 48kHz
 const FRAME_10MS_SAMPLES = 480;
 const slotBuffers = new Map();
 function appendAndDrainTo480(slot, newSamples) {
@@ -80,10 +86,10 @@ function appendAndDrainTo480(slot, newSamples) {
     let offset = 0;
     while (merged.length - offset >= FRAME_10MS_SAMPLES) {
         const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
-        enqueueFrame(slot, chunk);
+        enqueueFrame(slot, chunk); // always 480
         offset += FRAME_10MS_SAMPLES;
     }
-    slotBuffers.set(slot, merged.subarray(offset));
+    slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
 }
 ({
     enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
@@ -95,6 +101,10 @@ function appendAndDrainTo480(slot, newSamples) {
 let slots = [];
 let stopPacerFn = null;
 let outputPacerState = null;
+/**
+ * Simple linear interpolation resampler to convert audio to 48kHz.
+ * This handles the common case of 16kHz -> 48kHz (3x upsampling).
+ */
 function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
     const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
     const outputLength = Math.floor(inputFrames * ratio);
@@ -114,11 +124,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
     }
     return output;
 }
+/**
+ * Enqueue an audio frame for paced delivery to the RTCAudioSource.
+ */
 function enqueueOutputFrame(samples) {
     if (outputPacerState) {
         outputPacerState.frameQueue.push(samples);
     }
 }
+/**
+ * Start the audio pacer loop for all input slots in an FFmpeg process.
+ *
+ * The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
+ * real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
+ * arrive jittery, bursty, or with slightly different clocks.
+ *
+ * Key behavior:
+ * - Writes exactly one frame per period, on a shared wall-clock grid.
+ * - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
+ *   never stalls.
+ * - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
+ * - Honors Node stream backpressure (`write()` return false) without breaking
+ *   the timing grid.
+ *
+ * This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
+ * can mix them without slow-downs or drift.
+ *
+ * Call this once right after spawning FFmpeg:
+ * ```ts
+ * const ff = spawnFFmpegProcess();
+ * startPacer(ff, PARTICIPANT_SLOTS);
+ * ```
+ *
+ * When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
+ *
+ * @param ff        Child process handle from spawn("ffmpeg", ...)
+ * @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
+ */
 function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     if (stopPacerFn) {
         stopPacerFn();
@@ -126,11 +168,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     }
     const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
     const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
-    const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
+    const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
     const t0 = nowMs();
     slots = Array.from({ length: slotCount }, () => ({
         q: [],
-        lastFrames: FRAME_10MS_SAMPLES,
+        lastFrames: FRAME_10MS_SAMPLES, // keep constant
         nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
     }));
     outputPacerState = {
@@ -145,10 +187,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
         for (let s = 0; s < slotCount; s++) {
             const st = slots[s];
             const w = writers[s];
-            const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
+            const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
             if (t >= st.nextDueMs) {
                 const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
                 if (!w.write(buf)) {
+                    // Just continue without adding drain listener - backpressure will naturally resolve
                     const late = t - st.nextDueMs;
                     const steps = Math.max(1, Math.ceil(late / frameMs));
                     st.nextDueMs += steps * frameMs;
@@ -161,9 +204,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
         }
         if (!outputPacerState)
             return;
+        // Handle output pacer for RTCAudioSource
         const state = outputPacerState;
         if (t >= state.nextDueMs) {
-            const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
+            const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
             if (!state.didEmitReadyEvent) {
                 state.onAudioStreamReady();
                 state.didEmitReadyEvent = true;
@@ -179,12 +223,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     }, 5);
     stopPacerFn = () => clearInterval(iv);
 }
+/**
+ * Stop the audio pacer loop and clear all input slots.
+ * Call this before killing the FFmpeg process to ensure clean shutdown.
+ */
 function stopPacer() {
     if (stopPacerFn)
         stopPacerFn();
     stopPacerFn = null;
     slots = [];
 }
+/**
+ * Queue a live frame for a given slot (0..N-1).
+ * Auto-resnaps the slot's schedule if the frame size (480/960) changes.
+ */
 function enqueueFrame(slot, samples, numberOfFrames) {
     const st = slots[slot];
     if (!st)
@@ -192,6 +244,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
     const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
     st.q.push(buf);
 }
+/**
+ * Clear the audio queue for a specific slot when a participant leaves.
+ * This prevents stale audio data from continuing to play after disconnect.
+ */
 function clearSlotQueue(slot) {
     const st = slots[slot];
     if (st) {
@@ -201,6 +257,11 @@ function clearSlotQueue(slot) {
         st.nextDueMs = now + frameMs;
     }
 }
+/**
+ * Get the FFmpeg arguments for mixing audio from multiple participants.
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
+ */
 function getFFmpegArguments() {
     const N = PARTICIPANT_SLOTS;
     const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
@@ -218,6 +279,14 @@ function getFFmpegArguments() {
     ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
     return ffArgs;
 }
+/**
+ * Spawn a new FFmpeg process for mixing audio from multiple participants.
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
+ * The process will log its output to stderr.
+ * @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
+ * @return The spawned FFmpeg process.
+ */
 function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
     const args = getFFmpegArguments();
@@ -227,7 +296,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
     ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
     let audioBuffer = Buffer.alloc(0);
-    const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
+    const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
     ffmpegProcess.stdout.on("data", (chunk) => {
         audioBuffer = Buffer.concat([audioBuffer, chunk]);
         while (audioBuffer.length >= FRAME_SIZE_BYTES) {
@@ -242,6 +311,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     });
     return ffmpegProcess;
 }
+/**
+ * Write audio data from a MediaStreamTrack to the FFmpeg process.
+ * This function creates an AudioSink for the track and sets up a data handler
+ * that enqueues audio frames into the pacer.
+ *
+ * @param ffmpegProcess The FFmpeg process to which audio data will be written.
+ * @param slot The participant slot number (0..N-1) to which this track belongs.
+ * @param audioTrack The MediaStreamTrack containing the audio data.
+ * @return An object containing the AudioSink, the writable stream, and a stop function.
+ */
 function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
     const writer = ffmpegProcess.stdio[3 + slot];
     const sink = new AudioSink(audioTrack);
@@ -266,6 +345,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
     };
     return { sink, writer, stop };
 }
+/**
+ * Stop the FFmpeg process and clean up all resources.
+ * This function will unpipe the stdout, end all writable streams for each participant slot,
+ * and kill the FFmpeg process.
+ * @param ffmpegProcess The FFmpeg process to stop.
+ */
 function stopFFmpegProcess(ffmpegProcess) {
     stopPacer();
     if (ffmpegProcess && !ffmpegProcess.killed) {
@@ -319,6 +404,7 @@ class AudioMixer extends EventEmitter {
         for (const p of participants)
             this.attachParticipantIfNeeded(p);
         const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
+        // eslint-disable-next-line @typescript-eslint/no-unused-vars
         for (const [slot, pid] of this.participantSlots) {
             if (pid && !liveIds.has(pid))
                 this.detachParticipant(pid);
@@ -331,6 +417,7 @@ class AudioMixer extends EventEmitter {
         }
         this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
         this.activeSlots = {};
+        // Recreate the media stream to avoid stale references
         this.setupMediaStream();
     }
     slotForParticipant(participantId) {
@@ -396,6 +483,7 @@ class AudioMixer extends EventEmitter {
             }
             this.activeSlots[slot] = undefined;
         }
+        // Clear any queued audio data for this slot to prevent stale audio
         clearSlotQueue(slot);
         this.participantSlots.set(slot, "");
     }
@@ -519,10 +607,11 @@ class Assistant extends EventEmitter$1 {
     }
 }
-const BIND_INTERFACE = "en0";
+dotenv.config();
+const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
 function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
     let wherebyDomain;
-    {
+    if (IS_LOCAL === "true") {
         const ifaceAddrs = networkInterfaces()[BIND_INTERFACE];
         if (!ifaceAddrs) {
             throw new Error(`Unknown interface ${BIND_INTERFACE}`);
@@ -533,6 +622,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
         }
         wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
     }
+    else {
+        wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
+    }
     return `https://${wherebyDomain}${roomPath}`;
 }
@@ -560,7 +652,7 @@ const webhookRouter = (webhookTriggers, emitter, assistantKey, startCombinedAudi
     return router;
 };
 class Trigger extends EventEmitter {
-    constructor({ webhookTriggers = {}, port = 4999, assistantKey, startCombinedAudioStream, startLocalMedia, }) {
+    constructor({ webhookTriggers = {}, port = 8080, assistantKey, startCombinedAudioStream, startLocalMedia, }) {
         super();
         this.webhookTriggers = webhookTriggers;
         this.port = port;
@@ -573,6 +665,7 @@ class Trigger extends EventEmitter {
         const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
         app.use(router);
         const server = app.listen(this.port, () => {
+            // console.log(`Bot trigger server now running on port[${this.port}]`);
         });
         process.on("SIGTERM", () => {
             server.close();

package/dist/legacy-esm.js CHANGED Viewed

@@ -7,6 +7,7 @@ import express from 'express';
 import assert from 'assert';
 import bodyParser from 'body-parser';
 import { networkInterfaces } from 'os';
+import * as dotenv from 'dotenv';
 const ASSISTANT_JOIN_SUCCESS = "ASSISTANT_JOIN_SUCCESS";
@@ -66,9 +67,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
     }
 }
+// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
+// participants to these slots based on mute/unmute state.
 const PARTICIPANT_SLOTS = 20;
+// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
+// 48000 Hz is the standard sample rate for WebRTC audio
 const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
 const BYTES_PER_SAMPLE = 2;
+// 480 samples per 10ms frame at 48kHz
 const FRAME_10MS_SAMPLES = 480;
 const slotBuffers = new Map();
 function appendAndDrainTo480(slot, newSamples) {
@@ -80,10 +86,10 @@ function appendAndDrainTo480(slot, newSamples) {
     let offset = 0;
     while (merged.length - offset >= FRAME_10MS_SAMPLES) {
         const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
-        enqueueFrame(slot, chunk);
+        enqueueFrame(slot, chunk); // always 480
         offset += FRAME_10MS_SAMPLES;
     }
-    slotBuffers.set(slot, merged.subarray(offset));
+    slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
 }
 ({
     enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
@@ -95,6 +101,10 @@ function appendAndDrainTo480(slot, newSamples) {
 let slots = [];
 let stopPacerFn = null;
 let outputPacerState = null;
+/**
+ * Simple linear interpolation resampler to convert audio to 48kHz.
+ * This handles the common case of 16kHz -> 48kHz (3x upsampling).
+ */
 function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
     const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
     const outputLength = Math.floor(inputFrames * ratio);
@@ -114,11 +124,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
     }
     return output;
 }
+/**
+ * Enqueue an audio frame for paced delivery to the RTCAudioSource.
+ */
 function enqueueOutputFrame(samples) {
     if (outputPacerState) {
         outputPacerState.frameQueue.push(samples);
     }
 }
+/**
+ * Start the audio pacer loop for all input slots in an FFmpeg process.
+ *
+ * The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
+ * real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
+ * arrive jittery, bursty, or with slightly different clocks.
+ *
+ * Key behavior:
+ * - Writes exactly one frame per period, on a shared wall-clock grid.
+ * - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
+ *   never stalls.
+ * - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
+ * - Honors Node stream backpressure (`write()` return false) without breaking
+ *   the timing grid.
+ *
+ * This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
+ * can mix them without slow-downs or drift.
+ *
+ * Call this once right after spawning FFmpeg:
+ * ```ts
+ * const ff = spawnFFmpegProcess();
+ * startPacer(ff, PARTICIPANT_SLOTS);
+ * ```
+ *
+ * When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
+ *
+ * @param ff        Child process handle from spawn("ffmpeg", ...)
+ * @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
+ */
 function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     if (stopPacerFn) {
         stopPacerFn();
@@ -126,11 +168,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     }
     const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
     const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
-    const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
+    const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
     const t0 = nowMs();
     slots = Array.from({ length: slotCount }, () => ({
         q: [],
-        lastFrames: FRAME_10MS_SAMPLES,
+        lastFrames: FRAME_10MS_SAMPLES, // keep constant
         nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
     }));
     outputPacerState = {
@@ -145,10 +187,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
         for (let s = 0; s < slotCount; s++) {
             const st = slots[s];
             const w = writers[s];
-            const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
+            const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
             if (t >= st.nextDueMs) {
                 const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
                 if (!w.write(buf)) {
+                    // Just continue without adding drain listener - backpressure will naturally resolve
                     const late = t - st.nextDueMs;
                     const steps = Math.max(1, Math.ceil(late / frameMs));
                     st.nextDueMs += steps * frameMs;
@@ -161,9 +204,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
         }
         if (!outputPacerState)
             return;
+        // Handle output pacer for RTCAudioSource
         const state = outputPacerState;
         if (t >= state.nextDueMs) {
-            const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
+            const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
             if (!state.didEmitReadyEvent) {
                 state.onAudioStreamReady();
                 state.didEmitReadyEvent = true;
@@ -179,12 +223,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     }, 5);
     stopPacerFn = () => clearInterval(iv);
 }
+/**
+ * Stop the audio pacer loop and clear all input slots.
+ * Call this before killing the FFmpeg process to ensure clean shutdown.
+ */
 function stopPacer() {
     if (stopPacerFn)
         stopPacerFn();
     stopPacerFn = null;
     slots = [];
 }
+/**
+ * Queue a live frame for a given slot (0..N-1).
+ * Auto-resnaps the slot's schedule if the frame size (480/960) changes.
+ */
 function enqueueFrame(slot, samples, numberOfFrames) {
     const st = slots[slot];
     if (!st)
@@ -192,6 +244,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
     const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
     st.q.push(buf);
 }
+/**
+ * Clear the audio queue for a specific slot when a participant leaves.
+ * This prevents stale audio data from continuing to play after disconnect.
+ */
 function clearSlotQueue(slot) {
     const st = slots[slot];
     if (st) {
@@ -201,6 +257,11 @@ function clearSlotQueue(slot) {
         st.nextDueMs = now + frameMs;
     }
 }
+/**
+ * Get the FFmpeg arguments for mixing audio from multiple participants.
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
+ */
 function getFFmpegArguments() {
     const N = PARTICIPANT_SLOTS;
     const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
@@ -218,6 +279,14 @@ function getFFmpegArguments() {
     ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
     return ffArgs;
 }
+/**
+ * Spawn a new FFmpeg process for mixing audio from multiple participants.
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
+ * The process will log its output to stderr.
+ * @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
+ * @return The spawned FFmpeg process.
+ */
 function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
     const args = getFFmpegArguments();
@@ -227,7 +296,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
     ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
     let audioBuffer = Buffer.alloc(0);
-    const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
+    const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
     ffmpegProcess.stdout.on("data", (chunk) => {
         audioBuffer = Buffer.concat([audioBuffer, chunk]);
         while (audioBuffer.length >= FRAME_SIZE_BYTES) {
@@ -242,6 +311,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     });
     return ffmpegProcess;
 }
+/**
+ * Write audio data from a MediaStreamTrack to the FFmpeg process.
+ * This function creates an AudioSink for the track and sets up a data handler
+ * that enqueues audio frames into the pacer.
+ *
+ * @param ffmpegProcess The FFmpeg process to which audio data will be written.
+ * @param slot The participant slot number (0..N-1) to which this track belongs.
+ * @param audioTrack The MediaStreamTrack containing the audio data.
+ * @return An object containing the AudioSink, the writable stream, and a stop function.
+ */
 function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
     const writer = ffmpegProcess.stdio[3 + slot];
     const sink = new AudioSink(audioTrack);
@@ -266,6 +345,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
     };
     return { sink, writer, stop };
 }
+/**
+ * Stop the FFmpeg process and clean up all resources.
+ * This function will unpipe the stdout, end all writable streams for each participant slot,
+ * and kill the FFmpeg process.
+ * @param ffmpegProcess The FFmpeg process to stop.
+ */
 function stopFFmpegProcess(ffmpegProcess) {
     stopPacer();
     if (ffmpegProcess && !ffmpegProcess.killed) {
@@ -319,6 +404,7 @@ class AudioMixer extends EventEmitter {
         for (const p of participants)
             this.attachParticipantIfNeeded(p);
         const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
+        // eslint-disable-next-line @typescript-eslint/no-unused-vars
         for (const [slot, pid] of this.participantSlots) {
             if (pid && !liveIds.has(pid))
                 this.detachParticipant(pid);
@@ -331,6 +417,7 @@ class AudioMixer extends EventEmitter {
         }
         this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
         this.activeSlots = {};
+        // Recreate the media stream to avoid stale references
         this.setupMediaStream();
     }
     slotForParticipant(participantId) {
@@ -396,6 +483,7 @@ class AudioMixer extends EventEmitter {
             }
             this.activeSlots[slot] = undefined;
         }
+        // Clear any queued audio data for this slot to prevent stale audio
         clearSlotQueue(slot);
         this.participantSlots.set(slot, "");
     }
@@ -519,10 +607,11 @@ class Assistant extends EventEmitter$1 {
     }
 }
-const BIND_INTERFACE = "en0";
+dotenv.config();
+const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
 function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
     let wherebyDomain;
-    {
+    if (IS_LOCAL === "true") {
         const ifaceAddrs = networkInterfaces()[BIND_INTERFACE];
         if (!ifaceAddrs) {
             throw new Error(`Unknown interface ${BIND_INTERFACE}`);
@@ -533,6 +622,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
         }
         wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
     }
+    else {
+        wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
+    }
     return `https://${wherebyDomain}${roomPath}`;
 }
@@ -560,7 +652,7 @@ const webhookRouter = (webhookTriggers, emitter, assistantKey, startCombinedAudi
     return router;
 };
 class Trigger extends EventEmitter {
-    constructor({ webhookTriggers = {}, port = 4999, assistantKey, startCombinedAudioStream, startLocalMedia, }) {
+    constructor({ webhookTriggers = {}, port = 8080, assistantKey, startCombinedAudioStream, startLocalMedia, }) {
         super();
         this.webhookTriggers = webhookTriggers;
         this.port = port;
@@ -573,6 +665,7 @@ class Trigger extends EventEmitter {
         const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
         app.use(router);
         const server = app.listen(this.port, () => {
+            // console.log(`Bot trigger server now running on port[${this.port}]`);
         });
         process.on("SIGTERM", () => {
             server.close();

package/dist/polyfills.cjs CHANGED Viewed

@@ -38,8 +38,10 @@ typeof SuppressedError === "function" ? SuppressedError : function (error, suppr
 function setWebsocketOrigin(roomUrl) {
     try {
+        // add pathname needed for parsing in rtcstats-server.
         const url = new URL(roomUrl);
         global.window.location.pathname = url.pathname;
+        // fix origin header needed for parsing in rtcstats-server.
         const defaultClientOptions = {
             origin: url.origin,
         };
@@ -55,24 +57,29 @@ function setWebsocketOrigin(roomUrl) {
     }
 }
 const wrtcMediaDevices = wrtc.mediaDevices;
-global.navigator = {
-    userAgent: "Node.js/20",
-    mediaDevices: {
-        getUserMedia: wrtc.getUserMedia,
-        addEventListener: wrtcMediaDevices.addEventListener,
-        removeEventListener: wrtcMediaDevices.removeEventListener,
-        enumerateDevices: () => __awaiter(void 0, void 0, void 0, function* () {
-            return new Promise((resolve) => resolve([
-                {
-                    deviceId: "default",
-                    groupId: uuid.v4(),
-                    kind: "audioinput",
-                    label: "Dummy audio device",
-                },
-            ]));
-        }),
+Object.defineProperty(global, "navigator", {
+    value: {
+        userAgent: "Node.js/20",
+        mediaDevices: {
+            getUserMedia: wrtc.getUserMedia,
+            addEventListener: wrtcMediaDevices.addEventListener,
+            removeEventListener: wrtcMediaDevices.removeEventListener,
+            enumerateDevices: () => __awaiter(void 0, void 0, void 0, function* () {
+                return new Promise((resolve) => resolve([
+                    {
+                        deviceId: "default",
+                        groupId: uuid.v4(),
+                        kind: "audioinput",
+                        label: "Dummy audio device",
+                    },
+                ]));
+            }),
+        },
     },
-};
+    writable: false,
+    enumerable: true,
+    configurable: true,
+});
 class DOMException {
     constructor(...args) {
         console.error("DOMException", args);
@@ -85,6 +92,10 @@ class RTCPeerConnection extends wrtc.RTCPeerConnection {
     }
     getStats(arg) {
         return __awaiter(this, void 0, void 0, function* () {
+            /**
+             * node-wrtc seems to expect an Object argument, and doesn't handle the null arg we pass, so we
+             * wrap the call and filter the arg
+             **/
             arg = arg instanceof Object ? arg : undefined;
             const stats = yield this.wrappedGetStats(arg);
             return stats;
@@ -107,6 +118,6 @@ global.RTCRtpSender = wrtc.RTCRtpSender;
 global.RTCRtpTransceiver = wrtc.RTCRtpTransceiver;
 global.RTCSctpTransport = wrtc.RTCSctpTransport;
 global.RTCSessionDescription = wrtc.RTCSessionDescription;
-global.window = Object.assign(Object.assign({}, global), { location: { pathname: "" }, screen: { width: 0 }, setInterval: global.setInterval });
+global.window = Object.assign(Object.assign({}, global), { location: { pathname: "" }, screen: { width: 0 }, setInterval: global.setInterval }); // make sure all the classes / setInterval are available on window for rtcstats
 exports.setWebsocketOrigin = setWebsocketOrigin;

package/dist/tools.cjs CHANGED Viewed

@@ -19,9 +19,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
     }
 }
+// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
+// participants to these slots based on mute/unmute state.
 const PARTICIPANT_SLOTS = 20;
+// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
+// 48000 Hz is the standard sample rate for WebRTC audio
 const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
 const BYTES_PER_SAMPLE = 2;
+// 480 samples per 10ms frame at 48kHz
 const FRAME_10MS_SAMPLES = 480;
 const slotBuffers = new Map();
 function appendAndDrainTo480(slot, newSamples) {
@@ -33,10 +38,10 @@ function appendAndDrainTo480(slot, newSamples) {
     let offset = 0;
     while (merged.length - offset >= FRAME_10MS_SAMPLES) {
         const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
-        enqueueFrame(slot, chunk);
+        enqueueFrame(slot, chunk); // always 480
         offset += FRAME_10MS_SAMPLES;
     }
-    slotBuffers.set(slot, merged.subarray(offset));
+    slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
 }
 ({
     enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
@@ -48,6 +53,10 @@ function appendAndDrainTo480(slot, newSamples) {
 let slots = [];
 let stopPacerFn = null;
 let outputPacerState = null;
+/**
+ * Simple linear interpolation resampler to convert audio to 48kHz.
+ * This handles the common case of 16kHz -> 48kHz (3x upsampling).
+ */
 function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
     const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
     const outputLength = Math.floor(inputFrames * ratio);
@@ -67,11 +76,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
     }
     return output;
 }
+/**
+ * Enqueue an audio frame for paced delivery to the RTCAudioSource.
+ */
 function enqueueOutputFrame(samples) {
     if (outputPacerState) {
         outputPacerState.frameQueue.push(samples);
     }
 }
+/**
+ * Start the audio pacer loop for all input slots in an FFmpeg process.
+ *
+ * The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
+ * real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
+ * arrive jittery, bursty, or with slightly different clocks.
+ *
+ * Key behavior:
+ * - Writes exactly one frame per period, on a shared wall-clock grid.
+ * - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
+ *   never stalls.
+ * - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
+ * - Honors Node stream backpressure (`write()` return false) without breaking
+ *   the timing grid.
+ *
+ * This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
+ * can mix them without slow-downs or drift.
+ *
+ * Call this once right after spawning FFmpeg:
+ * ```ts
+ * const ff = spawnFFmpegProcess();
+ * startPacer(ff, PARTICIPANT_SLOTS);
+ * ```
+ *
+ * When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
+ *
+ * @param ff        Child process handle from spawn("ffmpeg", ...)
+ * @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
+ */
 function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     if (stopPacerFn) {
         stopPacerFn();
@@ -79,11 +120,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     }
     const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
     const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
-    const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
+    const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
     const t0 = nowMs();
     slots = Array.from({ length: slotCount }, () => ({
         q: [],
-        lastFrames: FRAME_10MS_SAMPLES,
+        lastFrames: FRAME_10MS_SAMPLES, // keep constant
         nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
     }));
     outputPacerState = {
@@ -98,10 +139,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
         for (let s = 0; s < slotCount; s++) {
             const st = slots[s];
             const w = writers[s];
-            const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
+            const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
             if (t >= st.nextDueMs) {
                 const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
                 if (!w.write(buf)) {
+                    // Just continue without adding drain listener - backpressure will naturally resolve
                     const late = t - st.nextDueMs;
                     const steps = Math.max(1, Math.ceil(late / frameMs));
                     st.nextDueMs += steps * frameMs;
@@ -114,9 +156,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
         }
         if (!outputPacerState)
             return;
+        // Handle output pacer for RTCAudioSource
         const state = outputPacerState;
         if (t >= state.nextDueMs) {
-            const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
+            const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
             if (!state.didEmitReadyEvent) {
                 state.onAudioStreamReady();
                 state.didEmitReadyEvent = true;
@@ -132,12 +175,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
     }, 5);
     stopPacerFn = () => clearInterval(iv);
 }
+/**
+ * Stop the audio pacer loop and clear all input slots.
+ * Call this before killing the FFmpeg process to ensure clean shutdown.
+ */
 function stopPacer() {
     if (stopPacerFn)
         stopPacerFn();
     stopPacerFn = null;
     slots = [];
 }
+/**
+ * Queue a live frame for a given slot (0..N-1).
+ * Auto-resnaps the slot's schedule if the frame size (480/960) changes.
+ */
 function enqueueFrame(slot, samples, numberOfFrames) {
     const st = slots[slot];
     if (!st)
@@ -145,6 +196,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
     const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
     st.q.push(buf);
 }
+/**
+ * Clear the audio queue for a specific slot when a participant leaves.
+ * This prevents stale audio data from continuing to play after disconnect.
+ */
 function clearSlotQueue(slot) {
     const st = slots[slot];
     if (st) {
@@ -154,6 +209,11 @@ function clearSlotQueue(slot) {
         st.nextDueMs = now + frameMs;
     }
 }
+/**
+ * Get the FFmpeg arguments for mixing audio from multiple participants.
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
+ */
 function getFFmpegArguments() {
     const N = PARTICIPANT_SLOTS;
     const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
@@ -171,6 +231,14 @@ function getFFmpegArguments() {
     ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
     return ffArgs;
 }
+/**
+ * Spawn a new FFmpeg process for mixing audio from multiple participants.
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
+ * The process will log its output to stderr.
+ * @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
+ * @return The spawned FFmpeg process.
+ */
 function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
     const args = getFFmpegArguments();
@@ -180,7 +248,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
     ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
     let audioBuffer = Buffer.alloc(0);
-    const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
+    const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
     ffmpegProcess.stdout.on("data", (chunk) => {
         audioBuffer = Buffer.concat([audioBuffer, chunk]);
         while (audioBuffer.length >= FRAME_SIZE_BYTES) {
@@ -195,6 +263,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
     });
     return ffmpegProcess;
 }
+/**
+ * Write audio data from a MediaStreamTrack to the FFmpeg process.
+ * This function creates an AudioSink for the track and sets up a data handler
+ * that enqueues audio frames into the pacer.
+ *
+ * @param ffmpegProcess The FFmpeg process to which audio data will be written.
+ * @param slot The participant slot number (0..N-1) to which this track belongs.
+ * @param audioTrack The MediaStreamTrack containing the audio data.
+ * @return An object containing the AudioSink, the writable stream, and a stop function.
+ */
 function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
     const writer = ffmpegProcess.stdio[3 + slot];
     const sink = new AudioSink(audioTrack);
@@ -219,6 +297,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
     };
     return { sink, writer, stop };
 }
+/**
+ * Stop the FFmpeg process and clean up all resources.
+ * This function will unpipe the stdout, end all writable streams for each participant slot,
+ * and kill the FFmpeg process.
+ * @param ffmpegProcess The FFmpeg process to stop.
+ */
 function stopFFmpegProcess(ffmpegProcess) {
     stopPacer();
     if (ffmpegProcess && !ffmpegProcess.killed) {
@@ -272,6 +356,7 @@ class AudioMixer extends events.EventEmitter {
         for (const p of participants)
             this.attachParticipantIfNeeded(p);
         const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
+        // eslint-disable-next-line @typescript-eslint/no-unused-vars
         for (const [slot, pid] of this.participantSlots) {
             if (pid && !liveIds.has(pid))
                 this.detachParticipant(pid);
@@ -284,6 +369,7 @@ class AudioMixer extends events.EventEmitter {
         }
         this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
         this.activeSlots = {};
+        // Recreate the media stream to avoid stale references
         this.setupMediaStream();
     }
     slotForParticipant(participantId) {
@@ -349,6 +435,7 @@ class AudioMixer extends events.EventEmitter {
             }
             this.activeSlots[slot] = undefined;
         }
+        // Clear any queued audio data for this slot to prevent stale audio
         clearSlotQueue(slot);
         this.participantSlots.set(slot, "");
     }

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "@whereby.com/assistant-sdk",
   "description": "Assistant SDK for whereby.com",
   "author": "Whereby AS",
-  "version": "0.0.0-canary-20250911141956",
+  "version": "0.0.0-canary-20250912142319",
   "license": "MIT",
   "files": [
     "dist",
@@ -47,6 +47,8 @@
     }
   },
   "devDependencies": {
+    "body-parser": "2.2.0",
+    "express": "5.1.0",
     "eslint": "^9.29.0",
     "prettier": "^3.5.3",
     "typescript": "^5.8.3",
@@ -58,9 +60,10 @@
   },
   "dependencies": {
     "@roamhq/wrtc": "github:whereby/node-webrtc#patch/rtc_audio_source",
+    "dotenv": "^16.4.5",
     "uuid": "^11.0.3",
     "ws": "^8.18.0",
-    "@whereby.com/core": "0.0.0-canary-20250911141956"
+    "@whereby.com/core": "0.0.0-canary-20250912142319"
   },
   "prettier": "@whereby.com/prettier-config",
   "scripts": {