@whereby.com/assistant-sdk 0.0.0-canary-20250911141956 → 0.0.0-canary-20250912142319
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +122 -10
- package/dist/index.mjs +103 -10
- package/dist/legacy-esm.js +103 -10
- package/dist/polyfills.cjs +29 -18
- package/dist/tools.cjs +94 -7
- package/package.json +5 -2
package/dist/index.cjs
CHANGED
|
@@ -9,6 +9,26 @@ var express = require('express');
|
|
|
9
9
|
var assert = require('assert');
|
|
10
10
|
var bodyParser = require('body-parser');
|
|
11
11
|
var os = require('os');
|
|
12
|
+
var dotenv = require('dotenv');
|
|
13
|
+
|
|
14
|
+
function _interopNamespaceDefault(e) {
|
|
15
|
+
var n = Object.create(null);
|
|
16
|
+
if (e) {
|
|
17
|
+
Object.keys(e).forEach(function (k) {
|
|
18
|
+
if (k !== 'default') {
|
|
19
|
+
var d = Object.getOwnPropertyDescriptor(e, k);
|
|
20
|
+
Object.defineProperty(n, k, d.get ? d : {
|
|
21
|
+
enumerable: true,
|
|
22
|
+
get: function () { return e[k]; }
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
n.default = e;
|
|
28
|
+
return Object.freeze(n);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
var dotenv__namespace = /*#__PURE__*/_interopNamespaceDefault(dotenv);
|
|
12
32
|
|
|
13
33
|
const ASSISTANT_JOIN_SUCCESS = "ASSISTANT_JOIN_SUCCESS";
|
|
14
34
|
|
|
@@ -68,9 +88,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
|
|
|
68
88
|
}
|
|
69
89
|
}
|
|
70
90
|
|
|
91
|
+
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
92
|
+
// participants to these slots based on mute/unmute state.
|
|
71
93
|
const PARTICIPANT_SLOTS = 20;
|
|
94
|
+
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
95
|
+
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
72
96
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
73
97
|
const BYTES_PER_SAMPLE = 2;
|
|
98
|
+
// 480 samples per 10ms frame at 48kHz
|
|
74
99
|
const FRAME_10MS_SAMPLES = 480;
|
|
75
100
|
const slotBuffers = new Map();
|
|
76
101
|
function appendAndDrainTo480(slot, newSamples) {
|
|
@@ -82,10 +107,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
82
107
|
let offset = 0;
|
|
83
108
|
while (merged.length - offset >= FRAME_10MS_SAMPLES) {
|
|
84
109
|
const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
|
|
85
|
-
enqueueFrame(slot, chunk);
|
|
110
|
+
enqueueFrame(slot, chunk); // always 480
|
|
86
111
|
offset += FRAME_10MS_SAMPLES;
|
|
87
112
|
}
|
|
88
|
-
slotBuffers.set(slot, merged.subarray(offset));
|
|
113
|
+
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
89
114
|
}
|
|
90
115
|
({
|
|
91
116
|
enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
@@ -97,6 +122,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
97
122
|
let slots = [];
|
|
98
123
|
let stopPacerFn = null;
|
|
99
124
|
let outputPacerState = null;
|
|
125
|
+
/**
|
|
126
|
+
* Simple linear interpolation resampler to convert audio to 48kHz.
|
|
127
|
+
* This handles the common case of 16kHz -> 48kHz (3x upsampling).
|
|
128
|
+
*/
|
|
100
129
|
function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
101
130
|
const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
|
|
102
131
|
const outputLength = Math.floor(inputFrames * ratio);
|
|
@@ -116,11 +145,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
|
116
145
|
}
|
|
117
146
|
return output;
|
|
118
147
|
}
|
|
148
|
+
/**
|
|
149
|
+
* Enqueue an audio frame for paced delivery to the RTCAudioSource.
|
|
150
|
+
*/
|
|
119
151
|
function enqueueOutputFrame(samples) {
|
|
120
152
|
if (outputPacerState) {
|
|
121
153
|
outputPacerState.frameQueue.push(samples);
|
|
122
154
|
}
|
|
123
155
|
}
|
|
156
|
+
/**
|
|
157
|
+
* Start the audio pacer loop for all input slots in an FFmpeg process.
|
|
158
|
+
*
|
|
159
|
+
* The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
|
|
160
|
+
* real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
|
|
161
|
+
* arrive jittery, bursty, or with slightly different clocks.
|
|
162
|
+
*
|
|
163
|
+
* Key behavior:
|
|
164
|
+
* - Writes exactly one frame per period, on a shared wall-clock grid.
|
|
165
|
+
* - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
|
|
166
|
+
* never stalls.
|
|
167
|
+
* - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
|
|
168
|
+
* - Honors Node stream backpressure (`write()` return false) without breaking
|
|
169
|
+
* the timing grid.
|
|
170
|
+
*
|
|
171
|
+
* This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
|
|
172
|
+
* can mix them without slow-downs or drift.
|
|
173
|
+
*
|
|
174
|
+
* Call this once right after spawning FFmpeg:
|
|
175
|
+
* ```ts
|
|
176
|
+
* const ff = spawnFFmpegProcess();
|
|
177
|
+
* startPacer(ff, PARTICIPANT_SLOTS);
|
|
178
|
+
* ```
|
|
179
|
+
*
|
|
180
|
+
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
181
|
+
*
|
|
182
|
+
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
183
|
+
* @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
|
|
184
|
+
*/
|
|
124
185
|
function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
125
186
|
if (stopPacerFn) {
|
|
126
187
|
stopPacerFn();
|
|
@@ -128,11 +189,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
128
189
|
}
|
|
129
190
|
const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
|
|
130
191
|
const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
|
|
131
|
-
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
192
|
+
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
|
|
132
193
|
const t0 = nowMs();
|
|
133
194
|
slots = Array.from({ length: slotCount }, () => ({
|
|
134
195
|
q: [],
|
|
135
|
-
lastFrames: FRAME_10MS_SAMPLES,
|
|
196
|
+
lastFrames: FRAME_10MS_SAMPLES, // keep constant
|
|
136
197
|
nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
|
|
137
198
|
}));
|
|
138
199
|
outputPacerState = {
|
|
@@ -147,10 +208,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
147
208
|
for (let s = 0; s < slotCount; s++) {
|
|
148
209
|
const st = slots[s];
|
|
149
210
|
const w = writers[s];
|
|
150
|
-
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
211
|
+
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
|
|
151
212
|
if (t >= st.nextDueMs) {
|
|
152
213
|
const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
|
|
153
214
|
if (!w.write(buf)) {
|
|
215
|
+
// Just continue without adding drain listener - backpressure will naturally resolve
|
|
154
216
|
const late = t - st.nextDueMs;
|
|
155
217
|
const steps = Math.max(1, Math.ceil(late / frameMs));
|
|
156
218
|
st.nextDueMs += steps * frameMs;
|
|
@@ -163,9 +225,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
163
225
|
}
|
|
164
226
|
if (!outputPacerState)
|
|
165
227
|
return;
|
|
228
|
+
// Handle output pacer for RTCAudioSource
|
|
166
229
|
const state = outputPacerState;
|
|
167
230
|
if (t >= state.nextDueMs) {
|
|
168
|
-
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
|
|
231
|
+
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
|
|
169
232
|
if (!state.didEmitReadyEvent) {
|
|
170
233
|
state.onAudioStreamReady();
|
|
171
234
|
state.didEmitReadyEvent = true;
|
|
@@ -181,12 +244,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
181
244
|
}, 5);
|
|
182
245
|
stopPacerFn = () => clearInterval(iv);
|
|
183
246
|
}
|
|
247
|
+
/**
|
|
248
|
+
* Stop the audio pacer loop and clear all input slots.
|
|
249
|
+
* Call this before killing the FFmpeg process to ensure clean shutdown.
|
|
250
|
+
*/
|
|
184
251
|
function stopPacer() {
|
|
185
252
|
if (stopPacerFn)
|
|
186
253
|
stopPacerFn();
|
|
187
254
|
stopPacerFn = null;
|
|
188
255
|
slots = [];
|
|
189
256
|
}
|
|
257
|
+
/**
|
|
258
|
+
* Queue a live frame for a given slot (0..N-1).
|
|
259
|
+
* Auto-resnaps the slot's schedule if the frame size (480/960) changes.
|
|
260
|
+
*/
|
|
190
261
|
function enqueueFrame(slot, samples, numberOfFrames) {
|
|
191
262
|
const st = slots[slot];
|
|
192
263
|
if (!st)
|
|
@@ -194,6 +265,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
|
|
|
194
265
|
const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
|
|
195
266
|
st.q.push(buf);
|
|
196
267
|
}
|
|
268
|
+
/**
|
|
269
|
+
* Clear the audio queue for a specific slot when a participant leaves.
|
|
270
|
+
* This prevents stale audio data from continuing to play after disconnect.
|
|
271
|
+
*/
|
|
197
272
|
function clearSlotQueue(slot) {
|
|
198
273
|
const st = slots[slot];
|
|
199
274
|
if (st) {
|
|
@@ -203,6 +278,11 @@ function clearSlotQueue(slot) {
|
|
|
203
278
|
st.nextDueMs = now + frameMs;
|
|
204
279
|
}
|
|
205
280
|
}
|
|
281
|
+
/**
|
|
282
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
283
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
284
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
285
|
+
*/
|
|
206
286
|
function getFFmpegArguments() {
|
|
207
287
|
const N = PARTICIPANT_SLOTS;
|
|
208
288
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
@@ -220,6 +300,14 @@ function getFFmpegArguments() {
|
|
|
220
300
|
ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
|
|
221
301
|
return ffArgs;
|
|
222
302
|
}
|
|
303
|
+
/**
|
|
304
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
305
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
306
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
307
|
+
* The process will log its output to stderr.
|
|
308
|
+
* @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
|
|
309
|
+
* @return The spawned FFmpeg process.
|
|
310
|
+
*/
|
|
223
311
|
function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
224
312
|
const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
|
|
225
313
|
const args = getFFmpegArguments();
|
|
@@ -229,7 +317,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
229
317
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
230
318
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
231
319
|
let audioBuffer = Buffer.alloc(0);
|
|
232
|
-
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
|
|
320
|
+
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
|
|
233
321
|
ffmpegProcess.stdout.on("data", (chunk) => {
|
|
234
322
|
audioBuffer = Buffer.concat([audioBuffer, chunk]);
|
|
235
323
|
while (audioBuffer.length >= FRAME_SIZE_BYTES) {
|
|
@@ -244,6 +332,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
244
332
|
});
|
|
245
333
|
return ffmpegProcess;
|
|
246
334
|
}
|
|
335
|
+
/**
|
|
336
|
+
* Write audio data from a MediaStreamTrack to the FFmpeg process.
|
|
337
|
+
* This function creates an AudioSink for the track and sets up a data handler
|
|
338
|
+
* that enqueues audio frames into the pacer.
|
|
339
|
+
*
|
|
340
|
+
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
341
|
+
* @param slot The participant slot number (0..N-1) to which this track belongs.
|
|
342
|
+
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
343
|
+
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
344
|
+
*/
|
|
247
345
|
function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
248
346
|
const writer = ffmpegProcess.stdio[3 + slot];
|
|
249
347
|
const sink = new AudioSink(audioTrack);
|
|
@@ -268,6 +366,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
|
268
366
|
};
|
|
269
367
|
return { sink, writer, stop };
|
|
270
368
|
}
|
|
369
|
+
/**
|
|
370
|
+
* Stop the FFmpeg process and clean up all resources.
|
|
371
|
+
* This function will unpipe the stdout, end all writable streams for each participant slot,
|
|
372
|
+
* and kill the FFmpeg process.
|
|
373
|
+
* @param ffmpegProcess The FFmpeg process to stop.
|
|
374
|
+
*/
|
|
271
375
|
function stopFFmpegProcess(ffmpegProcess) {
|
|
272
376
|
stopPacer();
|
|
273
377
|
if (ffmpegProcess && !ffmpegProcess.killed) {
|
|
@@ -321,6 +425,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
321
425
|
for (const p of participants)
|
|
322
426
|
this.attachParticipantIfNeeded(p);
|
|
323
427
|
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
428
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
324
429
|
for (const [slot, pid] of this.participantSlots) {
|
|
325
430
|
if (pid && !liveIds.has(pid))
|
|
326
431
|
this.detachParticipant(pid);
|
|
@@ -333,6 +438,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
333
438
|
}
|
|
334
439
|
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
335
440
|
this.activeSlots = {};
|
|
441
|
+
// Recreate the media stream to avoid stale references
|
|
336
442
|
this.setupMediaStream();
|
|
337
443
|
}
|
|
338
444
|
slotForParticipant(participantId) {
|
|
@@ -398,6 +504,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
|
|
|
398
504
|
}
|
|
399
505
|
this.activeSlots[slot] = undefined;
|
|
400
506
|
}
|
|
507
|
+
// Clear any queued audio data for this slot to prevent stale audio
|
|
401
508
|
clearSlotQueue(slot);
|
|
402
509
|
this.participantSlots.set(slot, "");
|
|
403
510
|
}
|
|
@@ -521,10 +628,11 @@ class Assistant extends EventEmitter {
|
|
|
521
628
|
}
|
|
522
629
|
}
|
|
523
630
|
|
|
524
|
-
|
|
631
|
+
dotenv__namespace.config();
|
|
632
|
+
const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
|
|
525
633
|
function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
526
634
|
let wherebyDomain;
|
|
527
|
-
{
|
|
635
|
+
if (IS_LOCAL === "true") {
|
|
528
636
|
const ifaceAddrs = os.networkInterfaces()[BIND_INTERFACE];
|
|
529
637
|
if (!ifaceAddrs) {
|
|
530
638
|
throw new Error(`Unknown interface ${BIND_INTERFACE}`);
|
|
@@ -535,6 +643,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
|
535
643
|
}
|
|
536
644
|
wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
|
|
537
645
|
}
|
|
646
|
+
else {
|
|
647
|
+
wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
|
|
648
|
+
}
|
|
538
649
|
return `https://${wherebyDomain}${roomPath}`;
|
|
539
650
|
}
|
|
540
651
|
|
|
@@ -562,7 +673,7 @@ const webhookRouter = (webhookTriggers, emitter, assistantKey, startCombinedAudi
|
|
|
562
673
|
return router;
|
|
563
674
|
};
|
|
564
675
|
class Trigger extends EventEmitter.EventEmitter {
|
|
565
|
-
constructor({ webhookTriggers = {}, port =
|
|
676
|
+
constructor({ webhookTriggers = {}, port = 8080, assistantKey, startCombinedAudioStream, startLocalMedia, }) {
|
|
566
677
|
super();
|
|
567
678
|
this.webhookTriggers = webhookTriggers;
|
|
568
679
|
this.port = port;
|
|
@@ -575,6 +686,7 @@ class Trigger extends EventEmitter.EventEmitter {
|
|
|
575
686
|
const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
|
|
576
687
|
app.use(router);
|
|
577
688
|
const server = app.listen(this.port, () => {
|
|
689
|
+
// console.log(`Bot trigger server now running on port[${this.port}]`);
|
|
578
690
|
});
|
|
579
691
|
process.on("SIGTERM", () => {
|
|
580
692
|
server.close();
|
package/dist/index.mjs
CHANGED
|
@@ -7,6 +7,7 @@ import express from 'express';
|
|
|
7
7
|
import assert from 'assert';
|
|
8
8
|
import bodyParser from 'body-parser';
|
|
9
9
|
import { networkInterfaces } from 'os';
|
|
10
|
+
import * as dotenv from 'dotenv';
|
|
10
11
|
|
|
11
12
|
const ASSISTANT_JOIN_SUCCESS = "ASSISTANT_JOIN_SUCCESS";
|
|
12
13
|
|
|
@@ -66,9 +67,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
|
|
|
66
67
|
}
|
|
67
68
|
}
|
|
68
69
|
|
|
70
|
+
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
71
|
+
// participants to these slots based on mute/unmute state.
|
|
69
72
|
const PARTICIPANT_SLOTS = 20;
|
|
73
|
+
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
74
|
+
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
70
75
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
71
76
|
const BYTES_PER_SAMPLE = 2;
|
|
77
|
+
// 480 samples per 10ms frame at 48kHz
|
|
72
78
|
const FRAME_10MS_SAMPLES = 480;
|
|
73
79
|
const slotBuffers = new Map();
|
|
74
80
|
function appendAndDrainTo480(slot, newSamples) {
|
|
@@ -80,10 +86,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
80
86
|
let offset = 0;
|
|
81
87
|
while (merged.length - offset >= FRAME_10MS_SAMPLES) {
|
|
82
88
|
const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
|
|
83
|
-
enqueueFrame(slot, chunk);
|
|
89
|
+
enqueueFrame(slot, chunk); // always 480
|
|
84
90
|
offset += FRAME_10MS_SAMPLES;
|
|
85
91
|
}
|
|
86
|
-
slotBuffers.set(slot, merged.subarray(offset));
|
|
92
|
+
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
87
93
|
}
|
|
88
94
|
({
|
|
89
95
|
enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
@@ -95,6 +101,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
95
101
|
let slots = [];
|
|
96
102
|
let stopPacerFn = null;
|
|
97
103
|
let outputPacerState = null;
|
|
104
|
+
/**
|
|
105
|
+
* Simple linear interpolation resampler to convert audio to 48kHz.
|
|
106
|
+
* This handles the common case of 16kHz -> 48kHz (3x upsampling).
|
|
107
|
+
*/
|
|
98
108
|
function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
99
109
|
const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
|
|
100
110
|
const outputLength = Math.floor(inputFrames * ratio);
|
|
@@ -114,11 +124,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
|
114
124
|
}
|
|
115
125
|
return output;
|
|
116
126
|
}
|
|
127
|
+
/**
|
|
128
|
+
* Enqueue an audio frame for paced delivery to the RTCAudioSource.
|
|
129
|
+
*/
|
|
117
130
|
function enqueueOutputFrame(samples) {
|
|
118
131
|
if (outputPacerState) {
|
|
119
132
|
outputPacerState.frameQueue.push(samples);
|
|
120
133
|
}
|
|
121
134
|
}
|
|
135
|
+
/**
|
|
136
|
+
* Start the audio pacer loop for all input slots in an FFmpeg process.
|
|
137
|
+
*
|
|
138
|
+
* The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
|
|
139
|
+
* real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
|
|
140
|
+
* arrive jittery, bursty, or with slightly different clocks.
|
|
141
|
+
*
|
|
142
|
+
* Key behavior:
|
|
143
|
+
* - Writes exactly one frame per period, on a shared wall-clock grid.
|
|
144
|
+
* - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
|
|
145
|
+
* never stalls.
|
|
146
|
+
* - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
|
|
147
|
+
* - Honors Node stream backpressure (`write()` return false) without breaking
|
|
148
|
+
* the timing grid.
|
|
149
|
+
*
|
|
150
|
+
* This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
|
|
151
|
+
* can mix them without slow-downs or drift.
|
|
152
|
+
*
|
|
153
|
+
* Call this once right after spawning FFmpeg:
|
|
154
|
+
* ```ts
|
|
155
|
+
* const ff = spawnFFmpegProcess();
|
|
156
|
+
* startPacer(ff, PARTICIPANT_SLOTS);
|
|
157
|
+
* ```
|
|
158
|
+
*
|
|
159
|
+
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
160
|
+
*
|
|
161
|
+
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
162
|
+
* @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
|
|
163
|
+
*/
|
|
122
164
|
function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
123
165
|
if (stopPacerFn) {
|
|
124
166
|
stopPacerFn();
|
|
@@ -126,11 +168,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
126
168
|
}
|
|
127
169
|
const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
|
|
128
170
|
const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
|
|
129
|
-
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
171
|
+
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
|
|
130
172
|
const t0 = nowMs();
|
|
131
173
|
slots = Array.from({ length: slotCount }, () => ({
|
|
132
174
|
q: [],
|
|
133
|
-
lastFrames: FRAME_10MS_SAMPLES,
|
|
175
|
+
lastFrames: FRAME_10MS_SAMPLES, // keep constant
|
|
134
176
|
nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
|
|
135
177
|
}));
|
|
136
178
|
outputPacerState = {
|
|
@@ -145,10 +187,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
145
187
|
for (let s = 0; s < slotCount; s++) {
|
|
146
188
|
const st = slots[s];
|
|
147
189
|
const w = writers[s];
|
|
148
|
-
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
190
|
+
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
|
|
149
191
|
if (t >= st.nextDueMs) {
|
|
150
192
|
const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
|
|
151
193
|
if (!w.write(buf)) {
|
|
194
|
+
// Just continue without adding drain listener - backpressure will naturally resolve
|
|
152
195
|
const late = t - st.nextDueMs;
|
|
153
196
|
const steps = Math.max(1, Math.ceil(late / frameMs));
|
|
154
197
|
st.nextDueMs += steps * frameMs;
|
|
@@ -161,9 +204,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
161
204
|
}
|
|
162
205
|
if (!outputPacerState)
|
|
163
206
|
return;
|
|
207
|
+
// Handle output pacer for RTCAudioSource
|
|
164
208
|
const state = outputPacerState;
|
|
165
209
|
if (t >= state.nextDueMs) {
|
|
166
|
-
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
|
|
210
|
+
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
|
|
167
211
|
if (!state.didEmitReadyEvent) {
|
|
168
212
|
state.onAudioStreamReady();
|
|
169
213
|
state.didEmitReadyEvent = true;
|
|
@@ -179,12 +223,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
179
223
|
}, 5);
|
|
180
224
|
stopPacerFn = () => clearInterval(iv);
|
|
181
225
|
}
|
|
226
|
+
/**
|
|
227
|
+
* Stop the audio pacer loop and clear all input slots.
|
|
228
|
+
* Call this before killing the FFmpeg process to ensure clean shutdown.
|
|
229
|
+
*/
|
|
182
230
|
function stopPacer() {
|
|
183
231
|
if (stopPacerFn)
|
|
184
232
|
stopPacerFn();
|
|
185
233
|
stopPacerFn = null;
|
|
186
234
|
slots = [];
|
|
187
235
|
}
|
|
236
|
+
/**
|
|
237
|
+
* Queue a live frame for a given slot (0..N-1).
|
|
238
|
+
* Auto-resnaps the slot's schedule if the frame size (480/960) changes.
|
|
239
|
+
*/
|
|
188
240
|
function enqueueFrame(slot, samples, numberOfFrames) {
|
|
189
241
|
const st = slots[slot];
|
|
190
242
|
if (!st)
|
|
@@ -192,6 +244,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
|
|
|
192
244
|
const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
|
|
193
245
|
st.q.push(buf);
|
|
194
246
|
}
|
|
247
|
+
/**
|
|
248
|
+
* Clear the audio queue for a specific slot when a participant leaves.
|
|
249
|
+
* This prevents stale audio data from continuing to play after disconnect.
|
|
250
|
+
*/
|
|
195
251
|
function clearSlotQueue(slot) {
|
|
196
252
|
const st = slots[slot];
|
|
197
253
|
if (st) {
|
|
@@ -201,6 +257,11 @@ function clearSlotQueue(slot) {
|
|
|
201
257
|
st.nextDueMs = now + frameMs;
|
|
202
258
|
}
|
|
203
259
|
}
|
|
260
|
+
/**
|
|
261
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
262
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
263
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
264
|
+
*/
|
|
204
265
|
function getFFmpegArguments() {
|
|
205
266
|
const N = PARTICIPANT_SLOTS;
|
|
206
267
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
@@ -218,6 +279,14 @@ function getFFmpegArguments() {
|
|
|
218
279
|
ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
|
|
219
280
|
return ffArgs;
|
|
220
281
|
}
|
|
282
|
+
/**
|
|
283
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
284
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
285
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
286
|
+
* The process will log its output to stderr.
|
|
287
|
+
* @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
|
|
288
|
+
* @return The spawned FFmpeg process.
|
|
289
|
+
*/
|
|
221
290
|
function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
222
291
|
const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
|
|
223
292
|
const args = getFFmpegArguments();
|
|
@@ -227,7 +296,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
227
296
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
228
297
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
229
298
|
let audioBuffer = Buffer.alloc(0);
|
|
230
|
-
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
|
|
299
|
+
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
|
|
231
300
|
ffmpegProcess.stdout.on("data", (chunk) => {
|
|
232
301
|
audioBuffer = Buffer.concat([audioBuffer, chunk]);
|
|
233
302
|
while (audioBuffer.length >= FRAME_SIZE_BYTES) {
|
|
@@ -242,6 +311,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
242
311
|
});
|
|
243
312
|
return ffmpegProcess;
|
|
244
313
|
}
|
|
314
|
+
/**
|
|
315
|
+
* Write audio data from a MediaStreamTrack to the FFmpeg process.
|
|
316
|
+
* This function creates an AudioSink for the track and sets up a data handler
|
|
317
|
+
* that enqueues audio frames into the pacer.
|
|
318
|
+
*
|
|
319
|
+
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
320
|
+
* @param slot The participant slot number (0..N-1) to which this track belongs.
|
|
321
|
+
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
322
|
+
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
323
|
+
*/
|
|
245
324
|
function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
246
325
|
const writer = ffmpegProcess.stdio[3 + slot];
|
|
247
326
|
const sink = new AudioSink(audioTrack);
|
|
@@ -266,6 +345,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
|
266
345
|
};
|
|
267
346
|
return { sink, writer, stop };
|
|
268
347
|
}
|
|
348
|
+
/**
|
|
349
|
+
* Stop the FFmpeg process and clean up all resources.
|
|
350
|
+
* This function will unpipe the stdout, end all writable streams for each participant slot,
|
|
351
|
+
* and kill the FFmpeg process.
|
|
352
|
+
* @param ffmpegProcess The FFmpeg process to stop.
|
|
353
|
+
*/
|
|
269
354
|
function stopFFmpegProcess(ffmpegProcess) {
|
|
270
355
|
stopPacer();
|
|
271
356
|
if (ffmpegProcess && !ffmpegProcess.killed) {
|
|
@@ -319,6 +404,7 @@ class AudioMixer extends EventEmitter {
|
|
|
319
404
|
for (const p of participants)
|
|
320
405
|
this.attachParticipantIfNeeded(p);
|
|
321
406
|
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
407
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
322
408
|
for (const [slot, pid] of this.participantSlots) {
|
|
323
409
|
if (pid && !liveIds.has(pid))
|
|
324
410
|
this.detachParticipant(pid);
|
|
@@ -331,6 +417,7 @@ class AudioMixer extends EventEmitter {
|
|
|
331
417
|
}
|
|
332
418
|
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
333
419
|
this.activeSlots = {};
|
|
420
|
+
// Recreate the media stream to avoid stale references
|
|
334
421
|
this.setupMediaStream();
|
|
335
422
|
}
|
|
336
423
|
slotForParticipant(participantId) {
|
|
@@ -396,6 +483,7 @@ class AudioMixer extends EventEmitter {
|
|
|
396
483
|
}
|
|
397
484
|
this.activeSlots[slot] = undefined;
|
|
398
485
|
}
|
|
486
|
+
// Clear any queued audio data for this slot to prevent stale audio
|
|
399
487
|
clearSlotQueue(slot);
|
|
400
488
|
this.participantSlots.set(slot, "");
|
|
401
489
|
}
|
|
@@ -519,10 +607,11 @@ class Assistant extends EventEmitter$1 {
|
|
|
519
607
|
}
|
|
520
608
|
}
|
|
521
609
|
|
|
522
|
-
|
|
610
|
+
dotenv.config();
|
|
611
|
+
const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
|
|
523
612
|
function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
524
613
|
let wherebyDomain;
|
|
525
|
-
{
|
|
614
|
+
if (IS_LOCAL === "true") {
|
|
526
615
|
const ifaceAddrs = networkInterfaces()[BIND_INTERFACE];
|
|
527
616
|
if (!ifaceAddrs) {
|
|
528
617
|
throw new Error(`Unknown interface ${BIND_INTERFACE}`);
|
|
@@ -533,6 +622,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
|
533
622
|
}
|
|
534
623
|
wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
|
|
535
624
|
}
|
|
625
|
+
else {
|
|
626
|
+
wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
|
|
627
|
+
}
|
|
536
628
|
return `https://${wherebyDomain}${roomPath}`;
|
|
537
629
|
}
|
|
538
630
|
|
|
@@ -560,7 +652,7 @@ const webhookRouter = (webhookTriggers, emitter, assistantKey, startCombinedAudi
|
|
|
560
652
|
return router;
|
|
561
653
|
};
|
|
562
654
|
class Trigger extends EventEmitter {
|
|
563
|
-
constructor({ webhookTriggers = {}, port =
|
|
655
|
+
constructor({ webhookTriggers = {}, port = 8080, assistantKey, startCombinedAudioStream, startLocalMedia, }) {
|
|
564
656
|
super();
|
|
565
657
|
this.webhookTriggers = webhookTriggers;
|
|
566
658
|
this.port = port;
|
|
@@ -573,6 +665,7 @@ class Trigger extends EventEmitter {
|
|
|
573
665
|
const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
|
|
574
666
|
app.use(router);
|
|
575
667
|
const server = app.listen(this.port, () => {
|
|
668
|
+
// console.log(`Bot trigger server now running on port[${this.port}]`);
|
|
576
669
|
});
|
|
577
670
|
process.on("SIGTERM", () => {
|
|
578
671
|
server.close();
|
package/dist/legacy-esm.js
CHANGED
|
@@ -7,6 +7,7 @@ import express from 'express';
|
|
|
7
7
|
import assert from 'assert';
|
|
8
8
|
import bodyParser from 'body-parser';
|
|
9
9
|
import { networkInterfaces } from 'os';
|
|
10
|
+
import * as dotenv from 'dotenv';
|
|
10
11
|
|
|
11
12
|
const ASSISTANT_JOIN_SUCCESS = "ASSISTANT_JOIN_SUCCESS";
|
|
12
13
|
|
|
@@ -66,9 +67,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
|
|
|
66
67
|
}
|
|
67
68
|
}
|
|
68
69
|
|
|
70
|
+
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
71
|
+
// participants to these slots based on mute/unmute state.
|
|
69
72
|
const PARTICIPANT_SLOTS = 20;
|
|
73
|
+
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
74
|
+
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
70
75
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
71
76
|
const BYTES_PER_SAMPLE = 2;
|
|
77
|
+
// 480 samples per 10ms frame at 48kHz
|
|
72
78
|
const FRAME_10MS_SAMPLES = 480;
|
|
73
79
|
const slotBuffers = new Map();
|
|
74
80
|
function appendAndDrainTo480(slot, newSamples) {
|
|
@@ -80,10 +86,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
80
86
|
let offset = 0;
|
|
81
87
|
while (merged.length - offset >= FRAME_10MS_SAMPLES) {
|
|
82
88
|
const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
|
|
83
|
-
enqueueFrame(slot, chunk);
|
|
89
|
+
enqueueFrame(slot, chunk); // always 480
|
|
84
90
|
offset += FRAME_10MS_SAMPLES;
|
|
85
91
|
}
|
|
86
|
-
slotBuffers.set(slot, merged.subarray(offset));
|
|
92
|
+
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
87
93
|
}
|
|
88
94
|
({
|
|
89
95
|
enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
@@ -95,6 +101,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
95
101
|
let slots = [];
|
|
96
102
|
let stopPacerFn = null;
|
|
97
103
|
let outputPacerState = null;
|
|
104
|
+
/**
|
|
105
|
+
* Simple linear interpolation resampler to convert audio to 48kHz.
|
|
106
|
+
* This handles the common case of 16kHz -> 48kHz (3x upsampling).
|
|
107
|
+
*/
|
|
98
108
|
function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
99
109
|
const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
|
|
100
110
|
const outputLength = Math.floor(inputFrames * ratio);
|
|
@@ -114,11 +124,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
|
114
124
|
}
|
|
115
125
|
return output;
|
|
116
126
|
}
|
|
127
|
+
/**
|
|
128
|
+
* Enqueue an audio frame for paced delivery to the RTCAudioSource.
|
|
129
|
+
*/
|
|
117
130
|
function enqueueOutputFrame(samples) {
|
|
118
131
|
if (outputPacerState) {
|
|
119
132
|
outputPacerState.frameQueue.push(samples);
|
|
120
133
|
}
|
|
121
134
|
}
|
|
135
|
+
/**
|
|
136
|
+
* Start the audio pacer loop for all input slots in an FFmpeg process.
|
|
137
|
+
*
|
|
138
|
+
* The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
|
|
139
|
+
* real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
|
|
140
|
+
* arrive jittery, bursty, or with slightly different clocks.
|
|
141
|
+
*
|
|
142
|
+
* Key behavior:
|
|
143
|
+
* - Writes exactly one frame per period, on a shared wall-clock grid.
|
|
144
|
+
* - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
|
|
145
|
+
* never stalls.
|
|
146
|
+
* - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
|
|
147
|
+
* - Honors Node stream backpressure (`write()` return false) without breaking
|
|
148
|
+
* the timing grid.
|
|
149
|
+
*
|
|
150
|
+
* This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
|
|
151
|
+
* can mix them without slow-downs or drift.
|
|
152
|
+
*
|
|
153
|
+
* Call this once right after spawning FFmpeg:
|
|
154
|
+
* ```ts
|
|
155
|
+
* const ff = spawnFFmpegProcess();
|
|
156
|
+
* startPacer(ff, PARTICIPANT_SLOTS);
|
|
157
|
+
* ```
|
|
158
|
+
*
|
|
159
|
+
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
160
|
+
*
|
|
161
|
+
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
162
|
+
* @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
|
|
163
|
+
*/
|
|
122
164
|
function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
123
165
|
if (stopPacerFn) {
|
|
124
166
|
stopPacerFn();
|
|
@@ -126,11 +168,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
126
168
|
}
|
|
127
169
|
const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
|
|
128
170
|
const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
|
|
129
|
-
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
171
|
+
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
|
|
130
172
|
const t0 = nowMs();
|
|
131
173
|
slots = Array.from({ length: slotCount }, () => ({
|
|
132
174
|
q: [],
|
|
133
|
-
lastFrames: FRAME_10MS_SAMPLES,
|
|
175
|
+
lastFrames: FRAME_10MS_SAMPLES, // keep constant
|
|
134
176
|
nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
|
|
135
177
|
}));
|
|
136
178
|
outputPacerState = {
|
|
@@ -145,10 +187,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
145
187
|
for (let s = 0; s < slotCount; s++) {
|
|
146
188
|
const st = slots[s];
|
|
147
189
|
const w = writers[s];
|
|
148
|
-
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
190
|
+
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
|
|
149
191
|
if (t >= st.nextDueMs) {
|
|
150
192
|
const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
|
|
151
193
|
if (!w.write(buf)) {
|
|
194
|
+
// Just continue without adding drain listener - backpressure will naturally resolve
|
|
152
195
|
const late = t - st.nextDueMs;
|
|
153
196
|
const steps = Math.max(1, Math.ceil(late / frameMs));
|
|
154
197
|
st.nextDueMs += steps * frameMs;
|
|
@@ -161,9 +204,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
161
204
|
}
|
|
162
205
|
if (!outputPacerState)
|
|
163
206
|
return;
|
|
207
|
+
// Handle output pacer for RTCAudioSource
|
|
164
208
|
const state = outputPacerState;
|
|
165
209
|
if (t >= state.nextDueMs) {
|
|
166
|
-
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
|
|
210
|
+
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
|
|
167
211
|
if (!state.didEmitReadyEvent) {
|
|
168
212
|
state.onAudioStreamReady();
|
|
169
213
|
state.didEmitReadyEvent = true;
|
|
@@ -179,12 +223,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
179
223
|
}, 5);
|
|
180
224
|
stopPacerFn = () => clearInterval(iv);
|
|
181
225
|
}
|
|
226
|
+
/**
|
|
227
|
+
* Stop the audio pacer loop and clear all input slots.
|
|
228
|
+
* Call this before killing the FFmpeg process to ensure clean shutdown.
|
|
229
|
+
*/
|
|
182
230
|
function stopPacer() {
|
|
183
231
|
if (stopPacerFn)
|
|
184
232
|
stopPacerFn();
|
|
185
233
|
stopPacerFn = null;
|
|
186
234
|
slots = [];
|
|
187
235
|
}
|
|
236
|
+
/**
|
|
237
|
+
* Queue a live frame for a given slot (0..N-1).
|
|
238
|
+
* Auto-resnaps the slot's schedule if the frame size (480/960) changes.
|
|
239
|
+
*/
|
|
188
240
|
function enqueueFrame(slot, samples, numberOfFrames) {
|
|
189
241
|
const st = slots[slot];
|
|
190
242
|
if (!st)
|
|
@@ -192,6 +244,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
|
|
|
192
244
|
const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
|
|
193
245
|
st.q.push(buf);
|
|
194
246
|
}
|
|
247
|
+
/**
|
|
248
|
+
* Clear the audio queue for a specific slot when a participant leaves.
|
|
249
|
+
* This prevents stale audio data from continuing to play after disconnect.
|
|
250
|
+
*/
|
|
195
251
|
function clearSlotQueue(slot) {
|
|
196
252
|
const st = slots[slot];
|
|
197
253
|
if (st) {
|
|
@@ -201,6 +257,11 @@ function clearSlotQueue(slot) {
|
|
|
201
257
|
st.nextDueMs = now + frameMs;
|
|
202
258
|
}
|
|
203
259
|
}
|
|
260
|
+
/**
|
|
261
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
262
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
263
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
264
|
+
*/
|
|
204
265
|
function getFFmpegArguments() {
|
|
205
266
|
const N = PARTICIPANT_SLOTS;
|
|
206
267
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
@@ -218,6 +279,14 @@ function getFFmpegArguments() {
|
|
|
218
279
|
ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
|
|
219
280
|
return ffArgs;
|
|
220
281
|
}
|
|
282
|
+
/**
|
|
283
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
284
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
285
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
286
|
+
* The process will log its output to stderr.
|
|
287
|
+
* @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
|
|
288
|
+
* @return The spawned FFmpeg process.
|
|
289
|
+
*/
|
|
221
290
|
function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
222
291
|
const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
|
|
223
292
|
const args = getFFmpegArguments();
|
|
@@ -227,7 +296,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
227
296
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
228
297
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
229
298
|
let audioBuffer = Buffer.alloc(0);
|
|
230
|
-
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
|
|
299
|
+
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
|
|
231
300
|
ffmpegProcess.stdout.on("data", (chunk) => {
|
|
232
301
|
audioBuffer = Buffer.concat([audioBuffer, chunk]);
|
|
233
302
|
while (audioBuffer.length >= FRAME_SIZE_BYTES) {
|
|
@@ -242,6 +311,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
242
311
|
});
|
|
243
312
|
return ffmpegProcess;
|
|
244
313
|
}
|
|
314
|
+
/**
|
|
315
|
+
* Write audio data from a MediaStreamTrack to the FFmpeg process.
|
|
316
|
+
* This function creates an AudioSink for the track and sets up a data handler
|
|
317
|
+
* that enqueues audio frames into the pacer.
|
|
318
|
+
*
|
|
319
|
+
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
320
|
+
* @param slot The participant slot number (0..N-1) to which this track belongs.
|
|
321
|
+
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
322
|
+
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
323
|
+
*/
|
|
245
324
|
function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
246
325
|
const writer = ffmpegProcess.stdio[3 + slot];
|
|
247
326
|
const sink = new AudioSink(audioTrack);
|
|
@@ -266,6 +345,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
|
266
345
|
};
|
|
267
346
|
return { sink, writer, stop };
|
|
268
347
|
}
|
|
348
|
+
/**
|
|
349
|
+
* Stop the FFmpeg process and clean up all resources.
|
|
350
|
+
* This function will unpipe the stdout, end all writable streams for each participant slot,
|
|
351
|
+
* and kill the FFmpeg process.
|
|
352
|
+
* @param ffmpegProcess The FFmpeg process to stop.
|
|
353
|
+
*/
|
|
269
354
|
function stopFFmpegProcess(ffmpegProcess) {
|
|
270
355
|
stopPacer();
|
|
271
356
|
if (ffmpegProcess && !ffmpegProcess.killed) {
|
|
@@ -319,6 +404,7 @@ class AudioMixer extends EventEmitter {
|
|
|
319
404
|
for (const p of participants)
|
|
320
405
|
this.attachParticipantIfNeeded(p);
|
|
321
406
|
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
407
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
322
408
|
for (const [slot, pid] of this.participantSlots) {
|
|
323
409
|
if (pid && !liveIds.has(pid))
|
|
324
410
|
this.detachParticipant(pid);
|
|
@@ -331,6 +417,7 @@ class AudioMixer extends EventEmitter {
|
|
|
331
417
|
}
|
|
332
418
|
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
333
419
|
this.activeSlots = {};
|
|
420
|
+
// Recreate the media stream to avoid stale references
|
|
334
421
|
this.setupMediaStream();
|
|
335
422
|
}
|
|
336
423
|
slotForParticipant(participantId) {
|
|
@@ -396,6 +483,7 @@ class AudioMixer extends EventEmitter {
|
|
|
396
483
|
}
|
|
397
484
|
this.activeSlots[slot] = undefined;
|
|
398
485
|
}
|
|
486
|
+
// Clear any queued audio data for this slot to prevent stale audio
|
|
399
487
|
clearSlotQueue(slot);
|
|
400
488
|
this.participantSlots.set(slot, "");
|
|
401
489
|
}
|
|
@@ -519,10 +607,11 @@ class Assistant extends EventEmitter$1 {
|
|
|
519
607
|
}
|
|
520
608
|
}
|
|
521
609
|
|
|
522
|
-
|
|
610
|
+
dotenv.config();
|
|
611
|
+
const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
|
|
523
612
|
function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
524
613
|
let wherebyDomain;
|
|
525
|
-
{
|
|
614
|
+
if (IS_LOCAL === "true") {
|
|
526
615
|
const ifaceAddrs = networkInterfaces()[BIND_INTERFACE];
|
|
527
616
|
if (!ifaceAddrs) {
|
|
528
617
|
throw new Error(`Unknown interface ${BIND_INTERFACE}`);
|
|
@@ -533,6 +622,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
|
|
|
533
622
|
}
|
|
534
623
|
wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
|
|
535
624
|
}
|
|
625
|
+
else {
|
|
626
|
+
wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
|
|
627
|
+
}
|
|
536
628
|
return `https://${wherebyDomain}${roomPath}`;
|
|
537
629
|
}
|
|
538
630
|
|
|
@@ -560,7 +652,7 @@ const webhookRouter = (webhookTriggers, emitter, assistantKey, startCombinedAudi
|
|
|
560
652
|
return router;
|
|
561
653
|
};
|
|
562
654
|
class Trigger extends EventEmitter {
|
|
563
|
-
constructor({ webhookTriggers = {}, port =
|
|
655
|
+
constructor({ webhookTriggers = {}, port = 8080, assistantKey, startCombinedAudioStream, startLocalMedia, }) {
|
|
564
656
|
super();
|
|
565
657
|
this.webhookTriggers = webhookTriggers;
|
|
566
658
|
this.port = port;
|
|
@@ -573,6 +665,7 @@ class Trigger extends EventEmitter {
|
|
|
573
665
|
const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
|
|
574
666
|
app.use(router);
|
|
575
667
|
const server = app.listen(this.port, () => {
|
|
668
|
+
// console.log(`Bot trigger server now running on port[${this.port}]`);
|
|
576
669
|
});
|
|
577
670
|
process.on("SIGTERM", () => {
|
|
578
671
|
server.close();
|
package/dist/polyfills.cjs
CHANGED
|
@@ -38,8 +38,10 @@ typeof SuppressedError === "function" ? SuppressedError : function (error, suppr
|
|
|
38
38
|
|
|
39
39
|
function setWebsocketOrigin(roomUrl) {
|
|
40
40
|
try {
|
|
41
|
+
// add pathname needed for parsing in rtcstats-server.
|
|
41
42
|
const url = new URL(roomUrl);
|
|
42
43
|
global.window.location.pathname = url.pathname;
|
|
44
|
+
// fix origin header needed for parsing in rtcstats-server.
|
|
43
45
|
const defaultClientOptions = {
|
|
44
46
|
origin: url.origin,
|
|
45
47
|
};
|
|
@@ -55,24 +57,29 @@ function setWebsocketOrigin(roomUrl) {
|
|
|
55
57
|
}
|
|
56
58
|
}
|
|
57
59
|
const wrtcMediaDevices = wrtc.mediaDevices;
|
|
58
|
-
global
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
60
|
+
Object.defineProperty(global, "navigator", {
|
|
61
|
+
value: {
|
|
62
|
+
userAgent: "Node.js/20",
|
|
63
|
+
mediaDevices: {
|
|
64
|
+
getUserMedia: wrtc.getUserMedia,
|
|
65
|
+
addEventListener: wrtcMediaDevices.addEventListener,
|
|
66
|
+
removeEventListener: wrtcMediaDevices.removeEventListener,
|
|
67
|
+
enumerateDevices: () => __awaiter(void 0, void 0, void 0, function* () {
|
|
68
|
+
return new Promise((resolve) => resolve([
|
|
69
|
+
{
|
|
70
|
+
deviceId: "default",
|
|
71
|
+
groupId: uuid.v4(),
|
|
72
|
+
kind: "audioinput",
|
|
73
|
+
label: "Dummy audio device",
|
|
74
|
+
},
|
|
75
|
+
]));
|
|
76
|
+
}),
|
|
77
|
+
},
|
|
74
78
|
},
|
|
75
|
-
|
|
79
|
+
writable: false,
|
|
80
|
+
enumerable: true,
|
|
81
|
+
configurable: true,
|
|
82
|
+
});
|
|
76
83
|
class DOMException {
|
|
77
84
|
constructor(...args) {
|
|
78
85
|
console.error("DOMException", args);
|
|
@@ -85,6 +92,10 @@ class RTCPeerConnection extends wrtc.RTCPeerConnection {
|
|
|
85
92
|
}
|
|
86
93
|
getStats(arg) {
|
|
87
94
|
return __awaiter(this, void 0, void 0, function* () {
|
|
95
|
+
/**
|
|
96
|
+
* node-wrtc seems to expect an Object argument, and doesn't handle the null arg we pass, so we
|
|
97
|
+
* wrap the call and filter the arg
|
|
98
|
+
**/
|
|
88
99
|
arg = arg instanceof Object ? arg : undefined;
|
|
89
100
|
const stats = yield this.wrappedGetStats(arg);
|
|
90
101
|
return stats;
|
|
@@ -107,6 +118,6 @@ global.RTCRtpSender = wrtc.RTCRtpSender;
|
|
|
107
118
|
global.RTCRtpTransceiver = wrtc.RTCRtpTransceiver;
|
|
108
119
|
global.RTCSctpTransport = wrtc.RTCSctpTransport;
|
|
109
120
|
global.RTCSessionDescription = wrtc.RTCSessionDescription;
|
|
110
|
-
global.window = Object.assign(Object.assign({}, global), { location: { pathname: "" }, screen: { width: 0 }, setInterval: global.setInterval });
|
|
121
|
+
global.window = Object.assign(Object.assign({}, global), { location: { pathname: "" }, screen: { width: 0 }, setInterval: global.setInterval }); // make sure all the classes / setInterval are available on window for rtcstats
|
|
111
122
|
|
|
112
123
|
exports.setWebsocketOrigin = setWebsocketOrigin;
|
package/dist/tools.cjs
CHANGED
|
@@ -19,9 +19,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
|
|
|
19
19
|
}
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
+
// Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
|
|
23
|
+
// participants to these slots based on mute/unmute state.
|
|
22
24
|
const PARTICIPANT_SLOTS = 20;
|
|
25
|
+
// Each sample is 2 bytes (16 bits) for PCM audio - s16le format
|
|
26
|
+
// 48000 Hz is the standard sample rate for WebRTC audio
|
|
23
27
|
const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
|
|
24
28
|
const BYTES_PER_SAMPLE = 2;
|
|
29
|
+
// 480 samples per 10ms frame at 48kHz
|
|
25
30
|
const FRAME_10MS_SAMPLES = 480;
|
|
26
31
|
const slotBuffers = new Map();
|
|
27
32
|
function appendAndDrainTo480(slot, newSamples) {
|
|
@@ -33,10 +38,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
33
38
|
let offset = 0;
|
|
34
39
|
while (merged.length - offset >= FRAME_10MS_SAMPLES) {
|
|
35
40
|
const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
|
|
36
|
-
enqueueFrame(slot, chunk);
|
|
41
|
+
enqueueFrame(slot, chunk); // always 480
|
|
37
42
|
offset += FRAME_10MS_SAMPLES;
|
|
38
43
|
}
|
|
39
|
-
slotBuffers.set(slot, merged.subarray(offset));
|
|
44
|
+
slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
|
|
40
45
|
}
|
|
41
46
|
({
|
|
42
47
|
enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
|
|
@@ -48,6 +53,10 @@ function appendAndDrainTo480(slot, newSamples) {
|
|
|
48
53
|
let slots = [];
|
|
49
54
|
let stopPacerFn = null;
|
|
50
55
|
let outputPacerState = null;
|
|
56
|
+
/**
|
|
57
|
+
* Simple linear interpolation resampler to convert audio to 48kHz.
|
|
58
|
+
* This handles the common case of 16kHz -> 48kHz (3x upsampling).
|
|
59
|
+
*/
|
|
51
60
|
function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
52
61
|
const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
|
|
53
62
|
const outputLength = Math.floor(inputFrames * ratio);
|
|
@@ -67,11 +76,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
|
|
|
67
76
|
}
|
|
68
77
|
return output;
|
|
69
78
|
}
|
|
79
|
+
/**
|
|
80
|
+
* Enqueue an audio frame for paced delivery to the RTCAudioSource.
|
|
81
|
+
*/
|
|
70
82
|
function enqueueOutputFrame(samples) {
|
|
71
83
|
if (outputPacerState) {
|
|
72
84
|
outputPacerState.frameQueue.push(samples);
|
|
73
85
|
}
|
|
74
86
|
}
|
|
87
|
+
/**
|
|
88
|
+
* Start the audio pacer loop for all input slots in an FFmpeg process.
|
|
89
|
+
*
|
|
90
|
+
* The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
|
|
91
|
+
* real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
|
|
92
|
+
* arrive jittery, bursty, or with slightly different clocks.
|
|
93
|
+
*
|
|
94
|
+
* Key behavior:
|
|
95
|
+
* - Writes exactly one frame per period, on a shared wall-clock grid.
|
|
96
|
+
* - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
|
|
97
|
+
* never stalls.
|
|
98
|
+
* - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
|
|
99
|
+
* - Honors Node stream backpressure (`write()` return false) without breaking
|
|
100
|
+
* the timing grid.
|
|
101
|
+
*
|
|
102
|
+
* This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
|
|
103
|
+
* can mix them without slow-downs or drift.
|
|
104
|
+
*
|
|
105
|
+
* Call this once right after spawning FFmpeg:
|
|
106
|
+
* ```ts
|
|
107
|
+
* const ff = spawnFFmpegProcess();
|
|
108
|
+
* startPacer(ff, PARTICIPANT_SLOTS);
|
|
109
|
+
* ```
|
|
110
|
+
*
|
|
111
|
+
* When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
|
|
112
|
+
*
|
|
113
|
+
* @param ff Child process handle from spawn("ffmpeg", ...)
|
|
114
|
+
* @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
|
|
115
|
+
*/
|
|
75
116
|
function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
76
117
|
if (stopPacerFn) {
|
|
77
118
|
stopPacerFn();
|
|
@@ -79,11 +120,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
79
120
|
}
|
|
80
121
|
const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
|
|
81
122
|
const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
|
|
82
|
-
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
123
|
+
const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
|
|
83
124
|
const t0 = nowMs();
|
|
84
125
|
slots = Array.from({ length: slotCount }, () => ({
|
|
85
126
|
q: [],
|
|
86
|
-
lastFrames: FRAME_10MS_SAMPLES,
|
|
127
|
+
lastFrames: FRAME_10MS_SAMPLES, // keep constant
|
|
87
128
|
nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
|
|
88
129
|
}));
|
|
89
130
|
outputPacerState = {
|
|
@@ -98,10 +139,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
98
139
|
for (let s = 0; s < slotCount; s++) {
|
|
99
140
|
const st = slots[s];
|
|
100
141
|
const w = writers[s];
|
|
101
|
-
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
|
|
142
|
+
const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
|
|
102
143
|
if (t >= st.nextDueMs) {
|
|
103
144
|
const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
|
|
104
145
|
if (!w.write(buf)) {
|
|
146
|
+
// Just continue without adding drain listener - backpressure will naturally resolve
|
|
105
147
|
const late = t - st.nextDueMs;
|
|
106
148
|
const steps = Math.max(1, Math.ceil(late / frameMs));
|
|
107
149
|
st.nextDueMs += steps * frameMs;
|
|
@@ -114,9 +156,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
114
156
|
}
|
|
115
157
|
if (!outputPacerState)
|
|
116
158
|
return;
|
|
159
|
+
// Handle output pacer for RTCAudioSource
|
|
117
160
|
const state = outputPacerState;
|
|
118
161
|
if (t >= state.nextDueMs) {
|
|
119
|
-
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
|
|
162
|
+
const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
|
|
120
163
|
if (!state.didEmitReadyEvent) {
|
|
121
164
|
state.onAudioStreamReady();
|
|
122
165
|
state.didEmitReadyEvent = true;
|
|
@@ -132,12 +175,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
|
|
|
132
175
|
}, 5);
|
|
133
176
|
stopPacerFn = () => clearInterval(iv);
|
|
134
177
|
}
|
|
178
|
+
/**
|
|
179
|
+
* Stop the audio pacer loop and clear all input slots.
|
|
180
|
+
* Call this before killing the FFmpeg process to ensure clean shutdown.
|
|
181
|
+
*/
|
|
135
182
|
function stopPacer() {
|
|
136
183
|
if (stopPacerFn)
|
|
137
184
|
stopPacerFn();
|
|
138
185
|
stopPacerFn = null;
|
|
139
186
|
slots = [];
|
|
140
187
|
}
|
|
188
|
+
/**
|
|
189
|
+
* Queue a live frame for a given slot (0..N-1).
|
|
190
|
+
* Auto-resnaps the slot's schedule if the frame size (480/960) changes.
|
|
191
|
+
*/
|
|
141
192
|
function enqueueFrame(slot, samples, numberOfFrames) {
|
|
142
193
|
const st = slots[slot];
|
|
143
194
|
if (!st)
|
|
@@ -145,6 +196,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
|
|
|
145
196
|
const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
|
|
146
197
|
st.q.push(buf);
|
|
147
198
|
}
|
|
199
|
+
/**
|
|
200
|
+
* Clear the audio queue for a specific slot when a participant leaves.
|
|
201
|
+
* This prevents stale audio data from continuing to play after disconnect.
|
|
202
|
+
*/
|
|
148
203
|
function clearSlotQueue(slot) {
|
|
149
204
|
const st = slots[slot];
|
|
150
205
|
if (st) {
|
|
@@ -154,6 +209,11 @@ function clearSlotQueue(slot) {
|
|
|
154
209
|
st.nextDueMs = now + frameMs;
|
|
155
210
|
}
|
|
156
211
|
}
|
|
212
|
+
/**
|
|
213
|
+
* Get the FFmpeg arguments for mixing audio from multiple participants.
|
|
214
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
215
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
216
|
+
*/
|
|
157
217
|
function getFFmpegArguments() {
|
|
158
218
|
const N = PARTICIPANT_SLOTS;
|
|
159
219
|
const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
|
|
@@ -171,6 +231,14 @@ function getFFmpegArguments() {
|
|
|
171
231
|
ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
|
|
172
232
|
return ffArgs;
|
|
173
233
|
}
|
|
234
|
+
/**
|
|
235
|
+
* Spawn a new FFmpeg process for mixing audio from multiple participants.
|
|
236
|
+
* This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
|
|
237
|
+
* The output is in PCM 16-bit little-endian format at 48kHz sample rate.
|
|
238
|
+
* The process will log its output to stderr.
|
|
239
|
+
* @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
|
|
240
|
+
* @return The spawned FFmpeg process.
|
|
241
|
+
*/
|
|
174
242
|
function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
175
243
|
const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
|
|
176
244
|
const args = getFFmpegArguments();
|
|
@@ -180,7 +248,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
180
248
|
ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
|
|
181
249
|
ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
|
|
182
250
|
let audioBuffer = Buffer.alloc(0);
|
|
183
|
-
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
|
|
251
|
+
const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
|
|
184
252
|
ffmpegProcess.stdout.on("data", (chunk) => {
|
|
185
253
|
audioBuffer = Buffer.concat([audioBuffer, chunk]);
|
|
186
254
|
while (audioBuffer.length >= FRAME_SIZE_BYTES) {
|
|
@@ -195,6 +263,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
|
|
|
195
263
|
});
|
|
196
264
|
return ffmpegProcess;
|
|
197
265
|
}
|
|
266
|
+
/**
|
|
267
|
+
* Write audio data from a MediaStreamTrack to the FFmpeg process.
|
|
268
|
+
* This function creates an AudioSink for the track and sets up a data handler
|
|
269
|
+
* that enqueues audio frames into the pacer.
|
|
270
|
+
*
|
|
271
|
+
* @param ffmpegProcess The FFmpeg process to which audio data will be written.
|
|
272
|
+
* @param slot The participant slot number (0..N-1) to which this track belongs.
|
|
273
|
+
* @param audioTrack The MediaStreamTrack containing the audio data.
|
|
274
|
+
* @return An object containing the AudioSink, the writable stream, and a stop function.
|
|
275
|
+
*/
|
|
198
276
|
function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
199
277
|
const writer = ffmpegProcess.stdio[3 + slot];
|
|
200
278
|
const sink = new AudioSink(audioTrack);
|
|
@@ -219,6 +297,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
|
|
|
219
297
|
};
|
|
220
298
|
return { sink, writer, stop };
|
|
221
299
|
}
|
|
300
|
+
/**
|
|
301
|
+
* Stop the FFmpeg process and clean up all resources.
|
|
302
|
+
* This function will unpipe the stdout, end all writable streams for each participant slot,
|
|
303
|
+
* and kill the FFmpeg process.
|
|
304
|
+
* @param ffmpegProcess The FFmpeg process to stop.
|
|
305
|
+
*/
|
|
222
306
|
function stopFFmpegProcess(ffmpegProcess) {
|
|
223
307
|
stopPacer();
|
|
224
308
|
if (ffmpegProcess && !ffmpegProcess.killed) {
|
|
@@ -272,6 +356,7 @@ class AudioMixer extends events.EventEmitter {
|
|
|
272
356
|
for (const p of participants)
|
|
273
357
|
this.attachParticipantIfNeeded(p);
|
|
274
358
|
const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
|
|
359
|
+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
275
360
|
for (const [slot, pid] of this.participantSlots) {
|
|
276
361
|
if (pid && !liveIds.has(pid))
|
|
277
362
|
this.detachParticipant(pid);
|
|
@@ -284,6 +369,7 @@ class AudioMixer extends events.EventEmitter {
|
|
|
284
369
|
}
|
|
285
370
|
this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
|
|
286
371
|
this.activeSlots = {};
|
|
372
|
+
// Recreate the media stream to avoid stale references
|
|
287
373
|
this.setupMediaStream();
|
|
288
374
|
}
|
|
289
375
|
slotForParticipant(participantId) {
|
|
@@ -349,6 +435,7 @@ class AudioMixer extends events.EventEmitter {
|
|
|
349
435
|
}
|
|
350
436
|
this.activeSlots[slot] = undefined;
|
|
351
437
|
}
|
|
438
|
+
// Clear any queued audio data for this slot to prevent stale audio
|
|
352
439
|
clearSlotQueue(slot);
|
|
353
440
|
this.participantSlots.set(slot, "");
|
|
354
441
|
}
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@whereby.com/assistant-sdk",
|
|
3
3
|
"description": "Assistant SDK for whereby.com",
|
|
4
4
|
"author": "Whereby AS",
|
|
5
|
-
"version": "0.0.0-canary-
|
|
5
|
+
"version": "0.0.0-canary-20250912142319",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"files": [
|
|
8
8
|
"dist",
|
|
@@ -47,6 +47,8 @@
|
|
|
47
47
|
}
|
|
48
48
|
},
|
|
49
49
|
"devDependencies": {
|
|
50
|
+
"body-parser": "2.2.0",
|
|
51
|
+
"express": "5.1.0",
|
|
50
52
|
"eslint": "^9.29.0",
|
|
51
53
|
"prettier": "^3.5.3",
|
|
52
54
|
"typescript": "^5.8.3",
|
|
@@ -58,9 +60,10 @@
|
|
|
58
60
|
},
|
|
59
61
|
"dependencies": {
|
|
60
62
|
"@roamhq/wrtc": "github:whereby/node-webrtc#patch/rtc_audio_source",
|
|
63
|
+
"dotenv": "^16.4.5",
|
|
61
64
|
"uuid": "^11.0.3",
|
|
62
65
|
"ws": "^8.18.0",
|
|
63
|
-
"@whereby.com/core": "0.0.0-canary-
|
|
66
|
+
"@whereby.com/core": "0.0.0-canary-20250912142319"
|
|
64
67
|
},
|
|
65
68
|
"prettier": "@whereby.com/prettier-config",
|
|
66
69
|
"scripts": {
|