@whereby.com/assistant-sdk 0.0.0-canary-20250912102624 → 0.0.0-canary-20250912144626

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -88,9 +88,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
88
88
  }
89
89
  }
90
90
 
91
+ // Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
92
+ // participants to these slots based on mute/unmute state.
91
93
  const PARTICIPANT_SLOTS = 20;
94
+ // Each sample is 2 bytes (16 bits) for PCM audio - s16le format
95
+ // 48000 Hz is the standard sample rate for WebRTC audio
92
96
  const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
93
97
  const BYTES_PER_SAMPLE = 2;
98
+ // 480 samples per 10ms frame at 48kHz
94
99
  const FRAME_10MS_SAMPLES = 480;
95
100
  const slotBuffers = new Map();
96
101
  function appendAndDrainTo480(slot, newSamples) {
@@ -102,10 +107,10 @@ function appendAndDrainTo480(slot, newSamples) {
102
107
  let offset = 0;
103
108
  while (merged.length - offset >= FRAME_10MS_SAMPLES) {
104
109
  const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
105
- enqueueFrame(slot, chunk);
110
+ enqueueFrame(slot, chunk); // always 480
106
111
  offset += FRAME_10MS_SAMPLES;
107
112
  }
108
- slotBuffers.set(slot, merged.subarray(offset));
113
+ slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
109
114
  }
110
115
  ({
111
116
  enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
@@ -117,6 +122,10 @@ function appendAndDrainTo480(slot, newSamples) {
117
122
  let slots = [];
118
123
  let stopPacerFn = null;
119
124
  let outputPacerState = null;
125
+ /**
126
+ * Simple linear interpolation resampler to convert audio to 48kHz.
127
+ * This handles the common case of 16kHz -> 48kHz (3x upsampling).
128
+ */
120
129
  function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
121
130
  const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
122
131
  const outputLength = Math.floor(inputFrames * ratio);
@@ -136,11 +145,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
136
145
  }
137
146
  return output;
138
147
  }
148
+ /**
149
+ * Enqueue an audio frame for paced delivery to the RTCAudioSource.
150
+ */
139
151
  function enqueueOutputFrame(samples) {
140
152
  if (outputPacerState) {
141
153
  outputPacerState.frameQueue.push(samples);
142
154
  }
143
155
  }
156
+ /**
157
+ * Start the audio pacer loop for all input slots in an FFmpeg process.
158
+ *
159
+ * The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
160
+ * real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
161
+ * arrive jittery, bursty, or with slightly different clocks.
162
+ *
163
+ * Key behavior:
164
+ * - Writes exactly one frame per period, on a shared wall-clock grid.
165
+ * - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
166
+ * never stalls.
167
+ * - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
168
+ * - Honors Node stream backpressure (`write()` return false) without breaking
169
+ * the timing grid.
170
+ *
171
+ * This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
172
+ * can mix them without slow-downs or drift.
173
+ *
174
+ * Call this once right after spawning FFmpeg:
175
+ * ```ts
176
+ * const ff = spawnFFmpegProcess();
177
+ * startPacer(ff, PARTICIPANT_SLOTS);
178
+ * ```
179
+ *
180
+ * When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
181
+ *
182
+ * @param ff Child process handle from spawn("ffmpeg", ...)
183
+ * @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
184
+ */
144
185
  function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
145
186
  if (stopPacerFn) {
146
187
  stopPacerFn();
@@ -148,11 +189,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
148
189
  }
149
190
  const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
150
191
  const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
151
- const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
192
+ const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
152
193
  const t0 = nowMs();
153
194
  slots = Array.from({ length: slotCount }, () => ({
154
195
  q: [],
155
- lastFrames: FRAME_10MS_SAMPLES,
196
+ lastFrames: FRAME_10MS_SAMPLES, // keep constant
156
197
  nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
157
198
  }));
158
199
  outputPacerState = {
@@ -167,10 +208,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
167
208
  for (let s = 0; s < slotCount; s++) {
168
209
  const st = slots[s];
169
210
  const w = writers[s];
170
- const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
211
+ const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
171
212
  if (t >= st.nextDueMs) {
172
213
  const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
173
214
  if (!w.write(buf)) {
215
+ // Just continue without adding drain listener - backpressure will naturally resolve
174
216
  const late = t - st.nextDueMs;
175
217
  const steps = Math.max(1, Math.ceil(late / frameMs));
176
218
  st.nextDueMs += steps * frameMs;
@@ -183,9 +225,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
183
225
  }
184
226
  if (!outputPacerState)
185
227
  return;
228
+ // Handle output pacer for RTCAudioSource
186
229
  const state = outputPacerState;
187
230
  if (t >= state.nextDueMs) {
188
- const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
231
+ const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
189
232
  if (!state.didEmitReadyEvent) {
190
233
  state.onAudioStreamReady();
191
234
  state.didEmitReadyEvent = true;
@@ -201,12 +244,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
201
244
  }, 5);
202
245
  stopPacerFn = () => clearInterval(iv);
203
246
  }
247
+ /**
248
+ * Stop the audio pacer loop and clear all input slots.
249
+ * Call this before killing the FFmpeg process to ensure clean shutdown.
250
+ */
204
251
  function stopPacer() {
205
252
  if (stopPacerFn)
206
253
  stopPacerFn();
207
254
  stopPacerFn = null;
208
255
  slots = [];
209
256
  }
257
+ /**
258
+ * Queue a live frame for a given slot (0..N-1).
259
+ * Auto-resnaps the slot's schedule if the frame size (480/960) changes.
260
+ */
210
261
  function enqueueFrame(slot, samples, numberOfFrames) {
211
262
  const st = slots[slot];
212
263
  if (!st)
@@ -214,6 +265,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
214
265
  const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
215
266
  st.q.push(buf);
216
267
  }
268
+ /**
269
+ * Clear the audio queue for a specific slot when a participant leaves.
270
+ * This prevents stale audio data from continuing to play after disconnect.
271
+ */
217
272
  function clearSlotQueue(slot) {
218
273
  const st = slots[slot];
219
274
  if (st) {
@@ -223,6 +278,11 @@ function clearSlotQueue(slot) {
223
278
  st.nextDueMs = now + frameMs;
224
279
  }
225
280
  }
281
+ /**
282
+ * Get the FFmpeg arguments for mixing audio from multiple participants.
283
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
284
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
285
+ */
226
286
  function getFFmpegArguments() {
227
287
  const N = PARTICIPANT_SLOTS;
228
288
  const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
@@ -240,6 +300,14 @@ function getFFmpegArguments() {
240
300
  ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
241
301
  return ffArgs;
242
302
  }
303
+ /**
304
+ * Spawn a new FFmpeg process for mixing audio from multiple participants.
305
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
306
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
307
+ * The process will log its output to stderr.
308
+ * @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
309
+ * @return The spawned FFmpeg process.
310
+ */
243
311
  function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
244
312
  const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
245
313
  const args = getFFmpegArguments();
@@ -249,7 +317,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
249
317
  ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
250
318
  ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
251
319
  let audioBuffer = Buffer.alloc(0);
252
- const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
320
+ const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
253
321
  ffmpegProcess.stdout.on("data", (chunk) => {
254
322
  audioBuffer = Buffer.concat([audioBuffer, chunk]);
255
323
  while (audioBuffer.length >= FRAME_SIZE_BYTES) {
@@ -264,6 +332,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
264
332
  });
265
333
  return ffmpegProcess;
266
334
  }
335
+ /**
336
+ * Write audio data from a MediaStreamTrack to the FFmpeg process.
337
+ * This function creates an AudioSink for the track and sets up a data handler
338
+ * that enqueues audio frames into the pacer.
339
+ *
340
+ * @param ffmpegProcess The FFmpeg process to which audio data will be written.
341
+ * @param slot The participant slot number (0..N-1) to which this track belongs.
342
+ * @param audioTrack The MediaStreamTrack containing the audio data.
343
+ * @return An object containing the AudioSink, the writable stream, and a stop function.
344
+ */
267
345
  function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
268
346
  const writer = ffmpegProcess.stdio[3 + slot];
269
347
  const sink = new AudioSink(audioTrack);
@@ -288,6 +366,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
288
366
  };
289
367
  return { sink, writer, stop };
290
368
  }
369
+ /**
370
+ * Stop the FFmpeg process and clean up all resources.
371
+ * This function will unpipe the stdout, end all writable streams for each participant slot,
372
+ * and kill the FFmpeg process.
373
+ * @param ffmpegProcess The FFmpeg process to stop.
374
+ */
291
375
  function stopFFmpegProcess(ffmpegProcess) {
292
376
  stopPacer();
293
377
  if (ffmpegProcess && !ffmpegProcess.killed) {
@@ -341,6 +425,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
341
425
  for (const p of participants)
342
426
  this.attachParticipantIfNeeded(p);
343
427
  const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
428
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
344
429
  for (const [slot, pid] of this.participantSlots) {
345
430
  if (pid && !liveIds.has(pid))
346
431
  this.detachParticipant(pid);
@@ -353,6 +438,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
353
438
  }
354
439
  this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
355
440
  this.activeSlots = {};
441
+ // Recreate the media stream to avoid stale references
356
442
  this.setupMediaStream();
357
443
  }
358
444
  slotForParticipant(participantId) {
@@ -418,6 +504,7 @@ class AudioMixer extends EventEmitter.EventEmitter {
418
504
  }
419
505
  this.activeSlots[slot] = undefined;
420
506
  }
507
+ // Clear any queued audio data for this slot to prevent stale audio
421
508
  clearSlotQueue(slot);
422
509
  this.participantSlots.set(slot, "");
423
510
  }
@@ -545,10 +632,7 @@ dotenv__namespace.config();
545
632
  const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
546
633
  function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
547
634
  let wherebyDomain;
548
- if (!IS_LOCAL) {
549
- wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
550
- }
551
- else {
635
+ if (IS_LOCAL === "true") {
552
636
  const ifaceAddrs = os.networkInterfaces()[BIND_INTERFACE];
553
637
  if (!ifaceAddrs) {
554
638
  throw new Error(`Unknown interface ${BIND_INTERFACE}`);
@@ -559,6 +643,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
559
643
  }
560
644
  wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
561
645
  }
646
+ else {
647
+ wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
648
+ }
562
649
  return `https://${wherebyDomain}${roomPath}`;
563
650
  }
564
651
 
@@ -599,6 +686,7 @@ class Trigger extends EventEmitter.EventEmitter {
599
686
  const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
600
687
  app.use(router);
601
688
  const server = app.listen(this.port, () => {
689
+ // console.log(`Bot trigger server now running on port[${this.port}]`);
602
690
  });
603
691
  process.on("SIGTERM", () => {
604
692
  server.close();
package/dist/index.d.cts CHANGED
@@ -1,9 +1,41 @@
1
+ import * as wrtc from '@roamhq/wrtc';
2
+ import wrtc__default from '@roamhq/wrtc';
1
3
  import { RoomConnectionClient, RemoteParticipantState, ChatMessage } from '@whereby.com/core';
2
4
  export { RemoteParticipantState } from '@whereby.com/core';
3
- import wrtc from '@roamhq/wrtc';
4
5
  import EventEmitter, { EventEmitter as EventEmitter$1 } from 'events';
5
6
  import { PassThrough } from 'stream';
6
7
 
8
+ declare global {
9
+ interface MediaStream extends wrtc.MediaStream {
10
+ }
11
+ interface MediaStreamTrack extends wrtc.MediaStreamTrack {
12
+ }
13
+ interface RTCDataChannel extends wrtc.RTCDataChannel {
14
+ }
15
+ interface RTCDataChannelEvent extends wrtc.RTCDataChannelEvent {
16
+ }
17
+ interface RTCDtlsTransport extends wrtc.RTCDtlsTransport {
18
+ }
19
+ interface RTCIceCandidate extends wrtc.RTCIceCandidate {
20
+ }
21
+ interface RTCIceTransport extends wrtc.RTCIceTransport {
22
+ }
23
+ interface RTCPeerConnection extends wrtc.RTCPeerConnection {
24
+ }
25
+ interface RTCPeerConnectionIceEvent extends wrtc.RTCPeerConnectionIceEvent {
26
+ }
27
+ interface RTCRtpReceiver extends wrtc.RTCRtpReceiver {
28
+ }
29
+ interface RTCRtpSender extends wrtc.RTCRtpSender {
30
+ }
31
+ interface RTCRtpTransceiver extends wrtc.RTCRtpTransceiver {
32
+ }
33
+ interface RTCSctpTransport extends wrtc.RTCSctpTransport {
34
+ }
35
+ interface RTCSessionDescription extends wrtc.RTCSessionDescription {
36
+ }
37
+ }
38
+
7
39
  declare const AUDIO_STREAM_READY = "AUDIO_STREAM_READY";
8
40
  type AssistantEvents = {
9
41
  [AUDIO_STREAM_READY]: [{
@@ -29,7 +61,7 @@ declare class Assistant extends EventEmitter<AssistantEvents> {
29
61
  joinRoom(roomUrl: string): Promise<void>;
30
62
  startLocalMedia(): void;
31
63
  getLocalMediaStream(): MediaStream | null;
32
- getLocalAudioSource(): wrtc.nonstandard.RTCAudioSource | null;
64
+ getLocalAudioSource(): wrtc__default.nonstandard.RTCAudioSource | null;
33
65
  getRoomConnection(): RoomConnectionClient;
34
66
  getCombinedAudioStream(): MediaStream | null;
35
67
  getRemoteParticipants(): RemoteParticipantState[];
@@ -122,7 +154,7 @@ declare class Trigger extends EventEmitter$1<TriggerEvents> {
122
154
  declare class AudioSource extends PassThrough {
123
155
  constructor();
124
156
  }
125
- declare class AudioSink extends wrtc.nonstandard.RTCAudioSink {
157
+ declare class AudioSink extends wrtc__default.nonstandard.RTCAudioSink {
126
158
  private _sink;
127
159
  constructor(track: MediaStreamTrack);
128
160
  subscribe(cb: (d: {
package/dist/index.d.mts CHANGED
@@ -1,9 +1,41 @@
1
+ import * as wrtc from '@roamhq/wrtc';
2
+ import wrtc__default from '@roamhq/wrtc';
1
3
  import { RoomConnectionClient, RemoteParticipantState, ChatMessage } from '@whereby.com/core';
2
4
  export { RemoteParticipantState } from '@whereby.com/core';
3
- import wrtc from '@roamhq/wrtc';
4
5
  import EventEmitter, { EventEmitter as EventEmitter$1 } from 'events';
5
6
  import { PassThrough } from 'stream';
6
7
 
8
+ declare global {
9
+ interface MediaStream extends wrtc.MediaStream {
10
+ }
11
+ interface MediaStreamTrack extends wrtc.MediaStreamTrack {
12
+ }
13
+ interface RTCDataChannel extends wrtc.RTCDataChannel {
14
+ }
15
+ interface RTCDataChannelEvent extends wrtc.RTCDataChannelEvent {
16
+ }
17
+ interface RTCDtlsTransport extends wrtc.RTCDtlsTransport {
18
+ }
19
+ interface RTCIceCandidate extends wrtc.RTCIceCandidate {
20
+ }
21
+ interface RTCIceTransport extends wrtc.RTCIceTransport {
22
+ }
23
+ interface RTCPeerConnection extends wrtc.RTCPeerConnection {
24
+ }
25
+ interface RTCPeerConnectionIceEvent extends wrtc.RTCPeerConnectionIceEvent {
26
+ }
27
+ interface RTCRtpReceiver extends wrtc.RTCRtpReceiver {
28
+ }
29
+ interface RTCRtpSender extends wrtc.RTCRtpSender {
30
+ }
31
+ interface RTCRtpTransceiver extends wrtc.RTCRtpTransceiver {
32
+ }
33
+ interface RTCSctpTransport extends wrtc.RTCSctpTransport {
34
+ }
35
+ interface RTCSessionDescription extends wrtc.RTCSessionDescription {
36
+ }
37
+ }
38
+
7
39
  declare const AUDIO_STREAM_READY = "AUDIO_STREAM_READY";
8
40
  type AssistantEvents = {
9
41
  [AUDIO_STREAM_READY]: [{
@@ -29,7 +61,7 @@ declare class Assistant extends EventEmitter<AssistantEvents> {
29
61
  joinRoom(roomUrl: string): Promise<void>;
30
62
  startLocalMedia(): void;
31
63
  getLocalMediaStream(): MediaStream | null;
32
- getLocalAudioSource(): wrtc.nonstandard.RTCAudioSource | null;
64
+ getLocalAudioSource(): wrtc__default.nonstandard.RTCAudioSource | null;
33
65
  getRoomConnection(): RoomConnectionClient;
34
66
  getCombinedAudioStream(): MediaStream | null;
35
67
  getRemoteParticipants(): RemoteParticipantState[];
@@ -122,7 +154,7 @@ declare class Trigger extends EventEmitter$1<TriggerEvents> {
122
154
  declare class AudioSource extends PassThrough {
123
155
  constructor();
124
156
  }
125
- declare class AudioSink extends wrtc.nonstandard.RTCAudioSink {
157
+ declare class AudioSink extends wrtc__default.nonstandard.RTCAudioSink {
126
158
  private _sink;
127
159
  constructor(track: MediaStreamTrack);
128
160
  subscribe(cb: (d: {
package/dist/index.d.ts CHANGED
@@ -1,9 +1,41 @@
1
+ import * as wrtc from '@roamhq/wrtc';
2
+ import wrtc__default from '@roamhq/wrtc';
1
3
  import { RoomConnectionClient, RemoteParticipantState, ChatMessage } from '@whereby.com/core';
2
4
  export { RemoteParticipantState } from '@whereby.com/core';
3
- import wrtc from '@roamhq/wrtc';
4
5
  import EventEmitter, { EventEmitter as EventEmitter$1 } from 'events';
5
6
  import { PassThrough } from 'stream';
6
7
 
8
+ declare global {
9
+ interface MediaStream extends wrtc.MediaStream {
10
+ }
11
+ interface MediaStreamTrack extends wrtc.MediaStreamTrack {
12
+ }
13
+ interface RTCDataChannel extends wrtc.RTCDataChannel {
14
+ }
15
+ interface RTCDataChannelEvent extends wrtc.RTCDataChannelEvent {
16
+ }
17
+ interface RTCDtlsTransport extends wrtc.RTCDtlsTransport {
18
+ }
19
+ interface RTCIceCandidate extends wrtc.RTCIceCandidate {
20
+ }
21
+ interface RTCIceTransport extends wrtc.RTCIceTransport {
22
+ }
23
+ interface RTCPeerConnection extends wrtc.RTCPeerConnection {
24
+ }
25
+ interface RTCPeerConnectionIceEvent extends wrtc.RTCPeerConnectionIceEvent {
26
+ }
27
+ interface RTCRtpReceiver extends wrtc.RTCRtpReceiver {
28
+ }
29
+ interface RTCRtpSender extends wrtc.RTCRtpSender {
30
+ }
31
+ interface RTCRtpTransceiver extends wrtc.RTCRtpTransceiver {
32
+ }
33
+ interface RTCSctpTransport extends wrtc.RTCSctpTransport {
34
+ }
35
+ interface RTCSessionDescription extends wrtc.RTCSessionDescription {
36
+ }
37
+ }
38
+
7
39
  declare const AUDIO_STREAM_READY = "AUDIO_STREAM_READY";
8
40
  type AssistantEvents = {
9
41
  [AUDIO_STREAM_READY]: [{
@@ -29,7 +61,7 @@ declare class Assistant extends EventEmitter<AssistantEvents> {
29
61
  joinRoom(roomUrl: string): Promise<void>;
30
62
  startLocalMedia(): void;
31
63
  getLocalMediaStream(): MediaStream | null;
32
- getLocalAudioSource(): wrtc.nonstandard.RTCAudioSource | null;
64
+ getLocalAudioSource(): wrtc__default.nonstandard.RTCAudioSource | null;
33
65
  getRoomConnection(): RoomConnectionClient;
34
66
  getCombinedAudioStream(): MediaStream | null;
35
67
  getRemoteParticipants(): RemoteParticipantState[];
@@ -122,7 +154,7 @@ declare class Trigger extends EventEmitter$1<TriggerEvents> {
122
154
  declare class AudioSource extends PassThrough {
123
155
  constructor();
124
156
  }
125
- declare class AudioSink extends wrtc.nonstandard.RTCAudioSink {
157
+ declare class AudioSink extends wrtc__default.nonstandard.RTCAudioSink {
126
158
  private _sink;
127
159
  constructor(track: MediaStreamTrack);
128
160
  subscribe(cb: (d: {
package/dist/index.mjs CHANGED
@@ -67,9 +67,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
67
67
  }
68
68
  }
69
69
 
70
+ // Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
71
+ // participants to these slots based on mute/unmute state.
70
72
  const PARTICIPANT_SLOTS = 20;
73
+ // Each sample is 2 bytes (16 bits) for PCM audio - s16le format
74
+ // 48000 Hz is the standard sample rate for WebRTC audio
71
75
  const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
72
76
  const BYTES_PER_SAMPLE = 2;
77
+ // 480 samples per 10ms frame at 48kHz
73
78
  const FRAME_10MS_SAMPLES = 480;
74
79
  const slotBuffers = new Map();
75
80
  function appendAndDrainTo480(slot, newSamples) {
@@ -81,10 +86,10 @@ function appendAndDrainTo480(slot, newSamples) {
81
86
  let offset = 0;
82
87
  while (merged.length - offset >= FRAME_10MS_SAMPLES) {
83
88
  const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
84
- enqueueFrame(slot, chunk);
89
+ enqueueFrame(slot, chunk); // always 480
85
90
  offset += FRAME_10MS_SAMPLES;
86
91
  }
87
- slotBuffers.set(slot, merged.subarray(offset));
92
+ slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
88
93
  }
89
94
  ({
90
95
  enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
@@ -96,6 +101,10 @@ function appendAndDrainTo480(slot, newSamples) {
96
101
  let slots = [];
97
102
  let stopPacerFn = null;
98
103
  let outputPacerState = null;
104
+ /**
105
+ * Simple linear interpolation resampler to convert audio to 48kHz.
106
+ * This handles the common case of 16kHz -> 48kHz (3x upsampling).
107
+ */
99
108
  function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
100
109
  const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
101
110
  const outputLength = Math.floor(inputFrames * ratio);
@@ -115,11 +124,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
115
124
  }
116
125
  return output;
117
126
  }
127
+ /**
128
+ * Enqueue an audio frame for paced delivery to the RTCAudioSource.
129
+ */
118
130
  function enqueueOutputFrame(samples) {
119
131
  if (outputPacerState) {
120
132
  outputPacerState.frameQueue.push(samples);
121
133
  }
122
134
  }
135
+ /**
136
+ * Start the audio pacer loop for all input slots in an FFmpeg process.
137
+ *
138
+ * The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
139
+ * real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
140
+ * arrive jittery, bursty, or with slightly different clocks.
141
+ *
142
+ * Key behavior:
143
+ * - Writes exactly one frame per period, on a shared wall-clock grid.
144
+ * - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
145
+ * never stalls.
146
+ * - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
147
+ * - Honors Node stream backpressure (`write()` return false) without breaking
148
+ * the timing grid.
149
+ *
150
+ * This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
151
+ * can mix them without slow-downs or drift.
152
+ *
153
+ * Call this once right after spawning FFmpeg:
154
+ * ```ts
155
+ * const ff = spawnFFmpegProcess();
156
+ * startPacer(ff, PARTICIPANT_SLOTS);
157
+ * ```
158
+ *
159
+ * When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
160
+ *
161
+ * @param ff Child process handle from spawn("ffmpeg", ...)
162
+ * @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
163
+ */
123
164
  function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
124
165
  if (stopPacerFn) {
125
166
  stopPacerFn();
@@ -127,11 +168,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
127
168
  }
128
169
  const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
129
170
  const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
130
- const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
171
+ const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
131
172
  const t0 = nowMs();
132
173
  slots = Array.from({ length: slotCount }, () => ({
133
174
  q: [],
134
- lastFrames: FRAME_10MS_SAMPLES,
175
+ lastFrames: FRAME_10MS_SAMPLES, // keep constant
135
176
  nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
136
177
  }));
137
178
  outputPacerState = {
@@ -146,10 +187,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
146
187
  for (let s = 0; s < slotCount; s++) {
147
188
  const st = slots[s];
148
189
  const w = writers[s];
149
- const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
190
+ const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
150
191
  if (t >= st.nextDueMs) {
151
192
  const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
152
193
  if (!w.write(buf)) {
194
+ // Just continue without adding drain listener - backpressure will naturally resolve
153
195
  const late = t - st.nextDueMs;
154
196
  const steps = Math.max(1, Math.ceil(late / frameMs));
155
197
  st.nextDueMs += steps * frameMs;
@@ -162,9 +204,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
162
204
  }
163
205
  if (!outputPacerState)
164
206
  return;
207
+ // Handle output pacer for RTCAudioSource
165
208
  const state = outputPacerState;
166
209
  if (t >= state.nextDueMs) {
167
- const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
210
+ const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
168
211
  if (!state.didEmitReadyEvent) {
169
212
  state.onAudioStreamReady();
170
213
  state.didEmitReadyEvent = true;
@@ -180,12 +223,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
180
223
  }, 5);
181
224
  stopPacerFn = () => clearInterval(iv);
182
225
  }
226
+ /**
227
+ * Stop the audio pacer loop and clear all input slots.
228
+ * Call this before killing the FFmpeg process to ensure clean shutdown.
229
+ */
183
230
  function stopPacer() {
184
231
  if (stopPacerFn)
185
232
  stopPacerFn();
186
233
  stopPacerFn = null;
187
234
  slots = [];
188
235
  }
236
+ /**
237
+ * Queue a live frame for a given slot (0..N-1).
238
+ * Auto-resnaps the slot's schedule if the frame size (480/960) changes.
239
+ */
189
240
  function enqueueFrame(slot, samples, numberOfFrames) {
190
241
  const st = slots[slot];
191
242
  if (!st)
@@ -193,6 +244,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
193
244
  const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
194
245
  st.q.push(buf);
195
246
  }
247
+ /**
248
+ * Clear the audio queue for a specific slot when a participant leaves.
249
+ * This prevents stale audio data from continuing to play after disconnect.
250
+ */
196
251
  function clearSlotQueue(slot) {
197
252
  const st = slots[slot];
198
253
  if (st) {
@@ -202,6 +257,11 @@ function clearSlotQueue(slot) {
202
257
  st.nextDueMs = now + frameMs;
203
258
  }
204
259
  }
260
+ /**
261
+ * Get the FFmpeg arguments for mixing audio from multiple participants.
262
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
263
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
264
+ */
205
265
  function getFFmpegArguments() {
206
266
  const N = PARTICIPANT_SLOTS;
207
267
  const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
@@ -219,6 +279,14 @@ function getFFmpegArguments() {
219
279
  ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
220
280
  return ffArgs;
221
281
  }
282
+ /**
283
+ * Spawn a new FFmpeg process for mixing audio from multiple participants.
284
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
285
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
286
+ * The process will log its output to stderr.
287
+ * @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
288
+ * @return The spawned FFmpeg process.
289
+ */
222
290
  function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
223
291
  const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
224
292
  const args = getFFmpegArguments();
@@ -228,7 +296,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
228
296
  ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
229
297
  ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
230
298
  let audioBuffer = Buffer.alloc(0);
231
- const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
299
+ const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
232
300
  ffmpegProcess.stdout.on("data", (chunk) => {
233
301
  audioBuffer = Buffer.concat([audioBuffer, chunk]);
234
302
  while (audioBuffer.length >= FRAME_SIZE_BYTES) {
@@ -243,6 +311,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
243
311
  });
244
312
  return ffmpegProcess;
245
313
  }
314
+ /**
315
+ * Write audio data from a MediaStreamTrack to the FFmpeg process.
316
+ * This function creates an AudioSink for the track and sets up a data handler
317
+ * that enqueues audio frames into the pacer.
318
+ *
319
+ * @param ffmpegProcess The FFmpeg process to which audio data will be written.
320
+ * @param slot The participant slot number (0..N-1) to which this track belongs.
321
+ * @param audioTrack The MediaStreamTrack containing the audio data.
322
+ * @return An object containing the AudioSink, the writable stream, and a stop function.
323
+ */
246
324
  function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
247
325
  const writer = ffmpegProcess.stdio[3 + slot];
248
326
  const sink = new AudioSink(audioTrack);
@@ -267,6 +345,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
267
345
  };
268
346
  return { sink, writer, stop };
269
347
  }
348
+ /**
349
+ * Stop the FFmpeg process and clean up all resources.
350
+ * This function will unpipe the stdout, end all writable streams for each participant slot,
351
+ * and kill the FFmpeg process.
352
+ * @param ffmpegProcess The FFmpeg process to stop.
353
+ */
270
354
  function stopFFmpegProcess(ffmpegProcess) {
271
355
  stopPacer();
272
356
  if (ffmpegProcess && !ffmpegProcess.killed) {
@@ -320,6 +404,7 @@ class AudioMixer extends EventEmitter {
320
404
  for (const p of participants)
321
405
  this.attachParticipantIfNeeded(p);
322
406
  const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
407
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
323
408
  for (const [slot, pid] of this.participantSlots) {
324
409
  if (pid && !liveIds.has(pid))
325
410
  this.detachParticipant(pid);
@@ -332,6 +417,7 @@ class AudioMixer extends EventEmitter {
332
417
  }
333
418
  this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
334
419
  this.activeSlots = {};
420
+ // Recreate the media stream to avoid stale references
335
421
  this.setupMediaStream();
336
422
  }
337
423
  slotForParticipant(participantId) {
@@ -397,6 +483,7 @@ class AudioMixer extends EventEmitter {
397
483
  }
398
484
  this.activeSlots[slot] = undefined;
399
485
  }
486
+ // Clear any queued audio data for this slot to prevent stale audio
400
487
  clearSlotQueue(slot);
401
488
  this.participantSlots.set(slot, "");
402
489
  }
@@ -524,10 +611,7 @@ dotenv.config();
524
611
  const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
525
612
  function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
526
613
  let wherebyDomain;
527
- if (!IS_LOCAL) {
528
- wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
529
- }
530
- else {
614
+ if (IS_LOCAL === "true") {
531
615
  const ifaceAddrs = networkInterfaces()[BIND_INTERFACE];
532
616
  if (!ifaceAddrs) {
533
617
  throw new Error(`Unknown interface ${BIND_INTERFACE}`);
@@ -538,6 +622,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
538
622
  }
539
623
  wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
540
624
  }
625
+ else {
626
+ wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
627
+ }
541
628
  return `https://${wherebyDomain}${roomPath}`;
542
629
  }
543
630
 
@@ -578,6 +665,7 @@ class Trigger extends EventEmitter {
578
665
  const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
579
666
  app.use(router);
580
667
  const server = app.listen(this.port, () => {
668
+ // console.log(`Bot trigger server now running on port[${this.port}]`);
581
669
  });
582
670
  process.on("SIGTERM", () => {
583
671
  server.close();
@@ -67,9 +67,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
67
67
  }
68
68
  }
69
69
 
70
+ // Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
71
+ // participants to these slots based on mute/unmute state.
70
72
  const PARTICIPANT_SLOTS = 20;
73
+ // Each sample is 2 bytes (16 bits) for PCM audio - s16le format
74
+ // 48000 Hz is the standard sample rate for WebRTC audio
71
75
  const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
72
76
  const BYTES_PER_SAMPLE = 2;
77
+ // 480 samples per 10ms frame at 48kHz
73
78
  const FRAME_10MS_SAMPLES = 480;
74
79
  const slotBuffers = new Map();
75
80
  function appendAndDrainTo480(slot, newSamples) {
@@ -81,10 +86,10 @@ function appendAndDrainTo480(slot, newSamples) {
81
86
  let offset = 0;
82
87
  while (merged.length - offset >= FRAME_10MS_SAMPLES) {
83
88
  const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
84
- enqueueFrame(slot, chunk);
89
+ enqueueFrame(slot, chunk); // always 480
85
90
  offset += FRAME_10MS_SAMPLES;
86
91
  }
87
- slotBuffers.set(slot, merged.subarray(offset));
92
+ slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
88
93
  }
89
94
  ({
90
95
  enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
@@ -96,6 +101,10 @@ function appendAndDrainTo480(slot, newSamples) {
96
101
  let slots = [];
97
102
  let stopPacerFn = null;
98
103
  let outputPacerState = null;
104
+ /**
105
+ * Simple linear interpolation resampler to convert audio to 48kHz.
106
+ * This handles the common case of 16kHz -> 48kHz (3x upsampling).
107
+ */
99
108
  function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
100
109
  const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
101
110
  const outputLength = Math.floor(inputFrames * ratio);
@@ -115,11 +124,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
115
124
  }
116
125
  return output;
117
126
  }
127
+ /**
128
+ * Enqueue an audio frame for paced delivery to the RTCAudioSource.
129
+ */
118
130
  function enqueueOutputFrame(samples) {
119
131
  if (outputPacerState) {
120
132
  outputPacerState.frameQueue.push(samples);
121
133
  }
122
134
  }
135
+ /**
136
+ * Start the audio pacer loop for all input slots in an FFmpeg process.
137
+ *
138
+ * The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
139
+ * real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
140
+ * arrive jittery, bursty, or with slightly different clocks.
141
+ *
142
+ * Key behavior:
143
+ * - Writes exactly one frame per period, on a shared wall-clock grid.
144
+ * - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
145
+ * never stalls.
146
+ * - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
147
+ * - Honors Node stream backpressure (`write()` return false) without breaking
148
+ * the timing grid.
149
+ *
150
+ * This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
151
+ * can mix them without slow-downs or drift.
152
+ *
153
+ * Call this once right after spawning FFmpeg:
154
+ * ```ts
155
+ * const ff = spawnFFmpegProcess();
156
+ * startPacer(ff, PARTICIPANT_SLOTS);
157
+ * ```
158
+ *
159
+ * When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
160
+ *
161
+ * @param ff Child process handle from spawn("ffmpeg", ...)
162
+ * @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
163
+ */
123
164
  function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
124
165
  if (stopPacerFn) {
125
166
  stopPacerFn();
@@ -127,11 +168,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
127
168
  }
128
169
  const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
129
170
  const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
130
- const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
171
+ const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
131
172
  const t0 = nowMs();
132
173
  slots = Array.from({ length: slotCount }, () => ({
133
174
  q: [],
134
- lastFrames: FRAME_10MS_SAMPLES,
175
+ lastFrames: FRAME_10MS_SAMPLES, // keep constant
135
176
  nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
136
177
  }));
137
178
  outputPacerState = {
@@ -146,10 +187,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
146
187
  for (let s = 0; s < slotCount; s++) {
147
188
  const st = slots[s];
148
189
  const w = writers[s];
149
- const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
190
+ const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
150
191
  if (t >= st.nextDueMs) {
151
192
  const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
152
193
  if (!w.write(buf)) {
194
+ // Just continue without adding drain listener - backpressure will naturally resolve
153
195
  const late = t - st.nextDueMs;
154
196
  const steps = Math.max(1, Math.ceil(late / frameMs));
155
197
  st.nextDueMs += steps * frameMs;
@@ -162,9 +204,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
162
204
  }
163
205
  if (!outputPacerState)
164
206
  return;
207
+ // Handle output pacer for RTCAudioSource
165
208
  const state = outputPacerState;
166
209
  if (t >= state.nextDueMs) {
167
- const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
210
+ const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
168
211
  if (!state.didEmitReadyEvent) {
169
212
  state.onAudioStreamReady();
170
213
  state.didEmitReadyEvent = true;
@@ -180,12 +223,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
180
223
  }, 5);
181
224
  stopPacerFn = () => clearInterval(iv);
182
225
  }
226
+ /**
227
+ * Stop the audio pacer loop and clear all input slots.
228
+ * Call this before killing the FFmpeg process to ensure clean shutdown.
229
+ */
183
230
  function stopPacer() {
184
231
  if (stopPacerFn)
185
232
  stopPacerFn();
186
233
  stopPacerFn = null;
187
234
  slots = [];
188
235
  }
236
+ /**
237
+ * Queue a live frame for a given slot (0..N-1).
238
+ * Auto-resnaps the slot's schedule if the frame size (480/960) changes.
239
+ */
189
240
  function enqueueFrame(slot, samples, numberOfFrames) {
190
241
  const st = slots[slot];
191
242
  if (!st)
@@ -193,6 +244,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
193
244
  const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
194
245
  st.q.push(buf);
195
246
  }
247
+ /**
248
+ * Clear the audio queue for a specific slot when a participant leaves.
249
+ * This prevents stale audio data from continuing to play after disconnect.
250
+ */
196
251
  function clearSlotQueue(slot) {
197
252
  const st = slots[slot];
198
253
  if (st) {
@@ -202,6 +257,11 @@ function clearSlotQueue(slot) {
202
257
  st.nextDueMs = now + frameMs;
203
258
  }
204
259
  }
260
+ /**
261
+ * Get the FFmpeg arguments for mixing audio from multiple participants.
262
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
263
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
264
+ */
205
265
  function getFFmpegArguments() {
206
266
  const N = PARTICIPANT_SLOTS;
207
267
  const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
@@ -219,6 +279,14 @@ function getFFmpegArguments() {
219
279
  ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
220
280
  return ffArgs;
221
281
  }
282
+ /**
283
+ * Spawn a new FFmpeg process for mixing audio from multiple participants.
284
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
285
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
286
+ * The process will log its output to stderr.
287
+ * @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
288
+ * @return The spawned FFmpeg process.
289
+ */
222
290
  function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
223
291
  const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
224
292
  const args = getFFmpegArguments();
@@ -228,7 +296,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
228
296
  ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
229
297
  ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
230
298
  let audioBuffer = Buffer.alloc(0);
231
- const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
299
+ const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
232
300
  ffmpegProcess.stdout.on("data", (chunk) => {
233
301
  audioBuffer = Buffer.concat([audioBuffer, chunk]);
234
302
  while (audioBuffer.length >= FRAME_SIZE_BYTES) {
@@ -243,6 +311,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
243
311
  });
244
312
  return ffmpegProcess;
245
313
  }
314
+ /**
315
+ * Write audio data from a MediaStreamTrack to the FFmpeg process.
316
+ * This function creates an AudioSink for the track and sets up a data handler
317
+ * that enqueues audio frames into the pacer.
318
+ *
319
+ * @param ffmpegProcess The FFmpeg process to which audio data will be written.
320
+ * @param slot The participant slot number (0..N-1) to which this track belongs.
321
+ * @param audioTrack The MediaStreamTrack containing the audio data.
322
+ * @return An object containing the AudioSink, the writable stream, and a stop function.
323
+ */
246
324
  function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
247
325
  const writer = ffmpegProcess.stdio[3 + slot];
248
326
  const sink = new AudioSink(audioTrack);
@@ -267,6 +345,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
267
345
  };
268
346
  return { sink, writer, stop };
269
347
  }
348
+ /**
349
+ * Stop the FFmpeg process and clean up all resources.
350
+ * This function will unpipe the stdout, end all writable streams for each participant slot,
351
+ * and kill the FFmpeg process.
352
+ * @param ffmpegProcess The FFmpeg process to stop.
353
+ */
270
354
  function stopFFmpegProcess(ffmpegProcess) {
271
355
  stopPacer();
272
356
  if (ffmpegProcess && !ffmpegProcess.killed) {
@@ -320,6 +404,7 @@ class AudioMixer extends EventEmitter {
320
404
  for (const p of participants)
321
405
  this.attachParticipantIfNeeded(p);
322
406
  const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
407
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
323
408
  for (const [slot, pid] of this.participantSlots) {
324
409
  if (pid && !liveIds.has(pid))
325
410
  this.detachParticipant(pid);
@@ -332,6 +417,7 @@ class AudioMixer extends EventEmitter {
332
417
  }
333
418
  this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
334
419
  this.activeSlots = {};
420
+ // Recreate the media stream to avoid stale references
335
421
  this.setupMediaStream();
336
422
  }
337
423
  slotForParticipant(participantId) {
@@ -397,6 +483,7 @@ class AudioMixer extends EventEmitter {
397
483
  }
398
484
  this.activeSlots[slot] = undefined;
399
485
  }
486
+ // Clear any queued audio data for this slot to prevent stale audio
400
487
  clearSlotQueue(slot);
401
488
  this.participantSlots.set(slot, "");
402
489
  }
@@ -524,10 +611,7 @@ dotenv.config();
524
611
  const { IS_LOCAL = "false", BIND_INTERFACE = "en0" } = process.env;
525
612
  function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
526
613
  let wherebyDomain;
527
- if (!IS_LOCAL) {
528
- wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
529
- }
530
- else {
614
+ if (IS_LOCAL === "true") {
531
615
  const ifaceAddrs = networkInterfaces()[BIND_INTERFACE];
532
616
  if (!ifaceAddrs) {
533
617
  throw new Error(`Unknown interface ${BIND_INTERFACE}`);
@@ -538,6 +622,9 @@ function buildRoomUrl(roomPath, wherebySubdomain, baseDomain = "whereby.com") {
538
622
  }
539
623
  wherebyDomain = `${wherebySubdomain}-ip-${bindAddr.address.replace(/[.]/g, "-")}.hereby.dev:4443`;
540
624
  }
625
+ else {
626
+ wherebyDomain = `${wherebySubdomain}.${baseDomain}`;
627
+ }
541
628
  return `https://${wherebyDomain}${roomPath}`;
542
629
  }
543
630
 
@@ -578,6 +665,7 @@ class Trigger extends EventEmitter {
578
665
  const router = webhookRouter(this.webhookTriggers, this, this.assistantKey, this.startCombinedAudioStream, this.startLocalMedia);
579
666
  app.use(router);
580
667
  const server = app.listen(this.port, () => {
668
+ // console.log(`Bot trigger server now running on port[${this.port}]`);
581
669
  });
582
670
  process.on("SIGTERM", () => {
583
671
  server.close();
@@ -38,8 +38,10 @@ typeof SuppressedError === "function" ? SuppressedError : function (error, suppr
38
38
 
39
39
  function setWebsocketOrigin(roomUrl) {
40
40
  try {
41
+ // add pathname needed for parsing in rtcstats-server.
41
42
  const url = new URL(roomUrl);
42
43
  global.window.location.pathname = url.pathname;
44
+ // fix origin header needed for parsing in rtcstats-server.
43
45
  const defaultClientOptions = {
44
46
  origin: url.origin,
45
47
  };
@@ -90,6 +92,10 @@ class RTCPeerConnection extends wrtc.RTCPeerConnection {
90
92
  }
91
93
  getStats(arg) {
92
94
  return __awaiter(this, void 0, void 0, function* () {
95
+ /**
96
+ * node-wrtc seems to expect an Object argument, and doesn't handle the null arg we pass, so we
97
+ * wrap the call and filter the arg
98
+ **/
93
99
  arg = arg instanceof Object ? arg : undefined;
94
100
  const stats = yield this.wrappedGetStats(arg);
95
101
  return stats;
@@ -112,6 +118,6 @@ global.RTCRtpSender = wrtc.RTCRtpSender;
112
118
  global.RTCRtpTransceiver = wrtc.RTCRtpTransceiver;
113
119
  global.RTCSctpTransport = wrtc.RTCSctpTransport;
114
120
  global.RTCSessionDescription = wrtc.RTCSessionDescription;
115
- global.window = Object.assign(Object.assign({}, global), { location: { pathname: "" }, screen: { width: 0 }, setInterval: global.setInterval });
121
+ global.window = Object.assign(Object.assign({}, global), { location: { pathname: "" }, screen: { width: 0 }, setInterval: global.setInterval }); // make sure all the classes / setInterval are available on window for rtcstats
116
122
 
117
123
  exports.setWebsocketOrigin = setWebsocketOrigin;
package/dist/tools.cjs CHANGED
@@ -19,9 +19,14 @@ class AudioSink extends wrtc.nonstandard.RTCAudioSink {
19
19
  }
20
20
  }
21
21
 
22
+ // Number of pipes in the ffmpeg process. We predefine a fixed number of slots, and then we dynamically assign
23
+ // participants to these slots based on mute/unmute state.
22
24
  const PARTICIPANT_SLOTS = 20;
25
+ // Each sample is 2 bytes (16 bits) for PCM audio - s16le format
26
+ // 48000 Hz is the standard sample rate for WebRTC audio
23
27
  const STREAM_INPUT_SAMPLE_RATE_IN_HZ = 48000;
24
28
  const BYTES_PER_SAMPLE = 2;
29
+ // 480 samples per 10ms frame at 48kHz
25
30
  const FRAME_10MS_SAMPLES = 480;
26
31
  const slotBuffers = new Map();
27
32
  function appendAndDrainTo480(slot, newSamples) {
@@ -33,10 +38,10 @@ function appendAndDrainTo480(slot, newSamples) {
33
38
  let offset = 0;
34
39
  while (merged.length - offset >= FRAME_10MS_SAMPLES) {
35
40
  const chunk = merged.subarray(offset, offset + FRAME_10MS_SAMPLES);
36
- enqueueFrame(slot, chunk);
41
+ enqueueFrame(slot, chunk); // always 480
37
42
  offset += FRAME_10MS_SAMPLES;
38
43
  }
39
- slotBuffers.set(slot, merged.subarray(offset));
44
+ slotBuffers.set(slot, merged.subarray(offset)); // keep remainder
40
45
  }
41
46
  ({
42
47
  enqFrames: new Array(PARTICIPANT_SLOTS).fill(0),
@@ -48,6 +53,10 @@ function appendAndDrainTo480(slot, newSamples) {
48
53
  let slots = [];
49
54
  let stopPacerFn = null;
50
55
  let outputPacerState = null;
56
+ /**
57
+ * Simple linear interpolation resampler to convert audio to 48kHz.
58
+ * This handles the common case of 16kHz -> 48kHz (3x upsampling).
59
+ */
51
60
  function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
52
61
  const ratio = STREAM_INPUT_SAMPLE_RATE_IN_HZ / inputSampleRate;
53
62
  const outputLength = Math.floor(inputFrames * ratio);
@@ -67,11 +76,43 @@ function resampleTo48kHz(inputSamples, inputSampleRate, inputFrames) {
67
76
  }
68
77
  return output;
69
78
  }
79
+ /**
80
+ * Enqueue an audio frame for paced delivery to the RTCAudioSource.
81
+ */
70
82
  function enqueueOutputFrame(samples) {
71
83
  if (outputPacerState) {
72
84
  outputPacerState.frameQueue.push(samples);
73
85
  }
74
86
  }
87
+ /**
88
+ * Start the audio pacer loop for all input slots in an FFmpeg process.
89
+ *
90
+ * The pacer ensures each slot (pipe:3..3+N-1) is written to at a steady
91
+ * real-time rate (e.g. 10 ms = 480 samples @ 48kHz), even if WebRTC frames
92
+ * arrive jittery, bursty, or with slightly different clocks.
93
+ *
94
+ * Key behavior:
95
+ * - Writes exactly one frame per period, on a shared wall-clock grid.
96
+ * - Uses silence (zero-filled frame) if a slot's queue is empty, so timing
97
+ * never stalls.
98
+ * - Resnaps the schedule if a slot switches between 10 ms / 20 ms frames.
99
+ * - Honors Node stream backpressure (`write()` return false) without breaking
100
+ * the timing grid.
101
+ *
102
+ * This keeps all FFmpeg inputs phase-aligned and stable, so aresample/amix
103
+ * can mix them without slow-downs or drift.
104
+ *
105
+ * Call this once right after spawning FFmpeg:
106
+ * ```ts
107
+ * const ff = spawnFFmpegProcess();
108
+ * startPacer(ff, PARTICIPANT_SLOTS);
109
+ * ```
110
+ *
111
+ * When tearing down the mixer, always call `stopPacer()` before killing FFmpeg.
112
+ *
113
+ * @param ff Child process handle from spawn("ffmpeg", ...)
114
+ * @param slotCount Number of participant input slots (0..N-1 → fd 3..3+N-1)
115
+ */
75
116
  function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
76
117
  if (stopPacerFn) {
77
118
  stopPacerFn();
@@ -79,11 +120,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
79
120
  }
80
121
  const writers = Array.from({ length: slotCount }, (_, i) => ff.stdio[3 + i]);
81
122
  const nowMs = () => Number(process.hrtime.bigint()) / 1e6;
82
- const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
123
+ const outputFrameMs = (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms
83
124
  const t0 = nowMs();
84
125
  slots = Array.from({ length: slotCount }, () => ({
85
126
  q: [],
86
- lastFrames: FRAME_10MS_SAMPLES,
127
+ lastFrames: FRAME_10MS_SAMPLES, // keep constant
87
128
  nextDueMs: t0 + (FRAME_10MS_SAMPLES / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000,
88
129
  }));
89
130
  outputPacerState = {
@@ -98,10 +139,11 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
98
139
  for (let s = 0; s < slotCount; s++) {
99
140
  const st = slots[s];
100
141
  const w = writers[s];
101
- const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000;
142
+ const frameMs = (st.lastFrames / STREAM_INPUT_SAMPLE_RATE_IN_HZ) * 1000; // 10ms if 480, 20ms if 960
102
143
  if (t >= st.nextDueMs) {
103
144
  const buf = st.q.length ? st.q.shift() : Buffer.alloc(st.lastFrames * BYTES_PER_SAMPLE);
104
145
  if (!w.write(buf)) {
146
+ // Just continue without adding drain listener - backpressure will naturally resolve
105
147
  const late = t - st.nextDueMs;
106
148
  const steps = Math.max(1, Math.ceil(late / frameMs));
107
149
  st.nextDueMs += steps * frameMs;
@@ -114,9 +156,10 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
114
156
  }
115
157
  if (!outputPacerState)
116
158
  return;
159
+ // Handle output pacer for RTCAudioSource
117
160
  const state = outputPacerState;
118
161
  if (t >= state.nextDueMs) {
119
- const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES);
162
+ const samples = state.frameQueue.length > 0 ? state.frameQueue.shift() : new Int16Array(FRAME_10MS_SAMPLES); // silence
120
163
  if (!state.didEmitReadyEvent) {
121
164
  state.onAudioStreamReady();
122
165
  state.didEmitReadyEvent = true;
@@ -132,12 +175,20 @@ function startPacer(ff, slotCount, rtcAudioSource, onAudioStreamReady) {
132
175
  }, 5);
133
176
  stopPacerFn = () => clearInterval(iv);
134
177
  }
178
+ /**
179
+ * Stop the audio pacer loop and clear all input slots.
180
+ * Call this before killing the FFmpeg process to ensure clean shutdown.
181
+ */
135
182
  function stopPacer() {
136
183
  if (stopPacerFn)
137
184
  stopPacerFn();
138
185
  stopPacerFn = null;
139
186
  slots = [];
140
187
  }
188
+ /**
189
+ * Queue a live frame for a given slot (0..N-1).
190
+ * Auto-resnaps the slot's schedule if the frame size (480/960) changes.
191
+ */
141
192
  function enqueueFrame(slot, samples, numberOfFrames) {
142
193
  const st = slots[slot];
143
194
  if (!st)
@@ -145,6 +196,10 @@ function enqueueFrame(slot, samples, numberOfFrames) {
145
196
  const buf = Buffer.from(samples.buffer, samples.byteOffset, samples.byteLength);
146
197
  st.q.push(buf);
147
198
  }
199
+ /**
200
+ * Clear the audio queue for a specific slot when a participant leaves.
201
+ * This prevents stale audio data from continuing to play after disconnect.
202
+ */
148
203
  function clearSlotQueue(slot) {
149
204
  const st = slots[slot];
150
205
  if (st) {
@@ -154,6 +209,11 @@ function clearSlotQueue(slot) {
154
209
  st.nextDueMs = now + frameMs;
155
210
  }
156
211
  }
212
+ /**
213
+ * Get the FFmpeg arguments for mixing audio from multiple participants.
214
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
215
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
216
+ */
157
217
  function getFFmpegArguments() {
158
218
  const N = PARTICIPANT_SLOTS;
159
219
  const SR = STREAM_INPUT_SAMPLE_RATE_IN_HZ;
@@ -171,6 +231,14 @@ function getFFmpegArguments() {
171
231
  ffArgs.push("-hide_banner", "-nostats", "-loglevel", "error", "-filter_complex", filter, "-map", "[mix]", "-f", "s16le", "-ar", String(SR), "-ac", "1", "-c:a", "pcm_s16le", "pipe:1");
172
232
  return ffArgs;
173
233
  }
234
+ /**
235
+ * Spawn a new FFmpeg process for mixing audio from multiple participants.
236
+ * This will read from the input pipes (3..3+N-1) and output a single mixed audio stream.
237
+ * The output is in PCM 16-bit little-endian format at 48kHz sample rate.
238
+ * The process will log its output to stderr.
239
+ * @param rtcAudioSource The RTCAudioSource to which the mixed audio will be sent.
240
+ * @return The spawned FFmpeg process.
241
+ */
174
242
  function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
175
243
  const stdio = ["ignore", "pipe", "pipe", ...Array(PARTICIPANT_SLOTS).fill("pipe")];
176
244
  const args = getFFmpegArguments();
@@ -180,7 +248,7 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
180
248
  ffmpegProcess.stderr.on("data", (d) => console.error("[ffmpeg]", String(d).trim()));
181
249
  ffmpegProcess.on("error", () => console.error("FFmpeg process error: is ffmpeg installed?"));
182
250
  let audioBuffer = Buffer.alloc(0);
183
- const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE;
251
+ const FRAME_SIZE_BYTES = FRAME_10MS_SAMPLES * BYTES_PER_SAMPLE; // 480 samples * 2 bytes = 960 bytes
184
252
  ffmpegProcess.stdout.on("data", (chunk) => {
185
253
  audioBuffer = Buffer.concat([audioBuffer, chunk]);
186
254
  while (audioBuffer.length >= FRAME_SIZE_BYTES) {
@@ -195,6 +263,16 @@ function spawnFFmpegProcess(rtcAudioSource, onAudioStreamReady) {
195
263
  });
196
264
  return ffmpegProcess;
197
265
  }
266
+ /**
267
+ * Write audio data from a MediaStreamTrack to the FFmpeg process.
268
+ * This function creates an AudioSink for the track and sets up a data handler
269
+ * that enqueues audio frames into the pacer.
270
+ *
271
+ * @param ffmpegProcess The FFmpeg process to which audio data will be written.
272
+ * @param slot The participant slot number (0..N-1) to which this track belongs.
273
+ * @param audioTrack The MediaStreamTrack containing the audio data.
274
+ * @return An object containing the AudioSink, the writable stream, and a stop function.
275
+ */
198
276
  function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
199
277
  const writer = ffmpegProcess.stdio[3 + slot];
200
278
  const sink = new AudioSink(audioTrack);
@@ -219,6 +297,12 @@ function writeAudioDataToFFmpeg(ffmpegProcess, slot, audioTrack) {
219
297
  };
220
298
  return { sink, writer, stop };
221
299
  }
300
+ /**
301
+ * Stop the FFmpeg process and clean up all resources.
302
+ * This function will unpipe the stdout, end all writable streams for each participant slot,
303
+ * and kill the FFmpeg process.
304
+ * @param ffmpegProcess The FFmpeg process to stop.
305
+ */
222
306
  function stopFFmpegProcess(ffmpegProcess) {
223
307
  stopPacer();
224
308
  if (ffmpegProcess && !ffmpegProcess.killed) {
@@ -272,6 +356,7 @@ class AudioMixer extends events.EventEmitter {
272
356
  for (const p of participants)
273
357
  this.attachParticipantIfNeeded(p);
274
358
  const liveIds = new Set(participants.map((p) => p.id).filter(Boolean));
359
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
275
360
  for (const [slot, pid] of this.participantSlots) {
276
361
  if (pid && !liveIds.has(pid))
277
362
  this.detachParticipant(pid);
@@ -284,6 +369,7 @@ class AudioMixer extends events.EventEmitter {
284
369
  }
285
370
  this.participantSlots = new Map(Array.from({ length: PARTICIPANT_SLOTS }, (_, i) => [i, ""]));
286
371
  this.activeSlots = {};
372
+ // Recreate the media stream to avoid stale references
287
373
  this.setupMediaStream();
288
374
  }
289
375
  slotForParticipant(participantId) {
@@ -349,6 +435,7 @@ class AudioMixer extends events.EventEmitter {
349
435
  }
350
436
  this.activeSlots[slot] = undefined;
351
437
  }
438
+ // Clear any queued audio data for this slot to prevent stale audio
352
439
  clearSlotQueue(slot);
353
440
  this.participantSlots.set(slot, "");
354
441
  }
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@whereby.com/assistant-sdk",
3
3
  "description": "Assistant SDK for whereby.com",
4
4
  "author": "Whereby AS",
5
- "version": "0.0.0-canary-20250912102624",
5
+ "version": "0.0.0-canary-20250912144626",
6
6
  "license": "MIT",
7
7
  "files": [
8
8
  "dist",
@@ -63,7 +63,7 @@
63
63
  "dotenv": "^16.4.5",
64
64
  "uuid": "^11.0.3",
65
65
  "ws": "^8.18.0",
66
- "@whereby.com/core": "0.0.0-canary-20250912102624"
66
+ "@whereby.com/core": "0.0.0-canary-20250912144626"
67
67
  },
68
68
  "prettier": "@whereby.com/prettier-config",
69
69
  "scripts": {