@tensamin/audio 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,23 @@
1
1
  // src/vad/vad-node.ts
2
- var energyVadWorkletCode = `
2
+ var createEnergyVadWorkletCode = (vadConfig) => {
3
+ const energyParams = vadConfig?.energyVad || {};
4
+ const smoothing = energyParams.smoothing ?? 0.95;
5
+ const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
6
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
7
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
8
+ const minSNR = energyParams.minSNR ?? 2;
9
+ const snrRange = energyParams.snrRange ?? 8;
10
+ return `
3
11
  class EnergyVadProcessor extends AudioWorkletProcessor {
4
12
  constructor() {
5
13
  super();
6
- this.smoothing = 0.95;
14
+ this.smoothing = ${smoothing};
7
15
  this.energy = 0;
8
- this.noiseFloor = 0.001;
16
+ this.noiseFloor = ${initialNoiseFloor};
17
+ this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
18
+ this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
19
+ this.minSNR = ${minSNR};
20
+ this.snrRange = ${snrRange};
9
21
  }
10
22
 
11
23
  process(inputs, outputs, parameters) {
@@ -13,32 +25,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
13
25
  if (!input || !input.length) return true;
14
26
  const channel = input[0];
15
27
 
16
- // Calculate RMS
28
+ // Calculate RMS (Root Mean Square) energy
17
29
  let sum = 0;
18
30
  for (let i = 0; i < channel.length; i++) {
19
31
  sum += channel[i] * channel[i];
20
32
  }
21
33
  const rms = Math.sqrt(sum / channel.length);
22
34
 
23
- // Simple adaptive noise floor (very basic)
35
+ // Adaptive noise floor estimation
36
+ // When signal is quiet, adapt quickly to find new noise floor
37
+ // When signal is loud (speech), adapt slowly to avoid raising noise floor
24
38
  if (rms < this.noiseFloor) {
25
- this.noiseFloor = this.noiseFloor * 0.99 + rms * 0.01;
39
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
26
40
  } else {
27
- this.noiseFloor = this.noiseFloor * 0.999 + rms * 0.001;
41
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
28
42
  }
29
43
 
30
- // Calculate "probability" based on SNR
31
- // This is a heuristic mapping from energy to 0-1
44
+ // Calculate Signal-to-Noise Ratio (SNR)
32
45
  const snr = rms / (this.noiseFloor + 1e-6);
33
- const probability = Math.min(1, Math.max(0, (snr - 1.5) / 10)); // Arbitrary scaling
46
+
47
+ // Map SNR to probability (0-1)
48
+ // Probability is 0 when SNR <= minSNR
49
+ // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
50
+ // Probability is 1 when SNR >= (minSNR + snrRange)
51
+ const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
34
52
 
35
- this.port.postMessage({ probability });
53
+ this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
36
54
 
37
55
  return true;
38
56
  }
39
57
  }
40
58
  registerProcessor('energy-vad-processor', EnergyVadProcessor);
41
59
  `;
60
+ };
42
61
  var EnergyVADPlugin = class {
43
62
  name = "energy-vad";
44
63
  async createNode(context, config, onDecision) {
@@ -47,7 +66,8 @@ var EnergyVADPlugin = class {
47
66
  const pass = context.createGain();
48
67
  return pass;
49
68
  }
50
- const blob = new Blob([energyVadWorkletCode], {
69
+ const workletCode = createEnergyVadWorkletCode(config);
70
+ const blob = new Blob([workletCode], {
51
71
  type: "application/javascript"
52
72
  });
53
73
  const url = URL.createObjectURL(blob);
@@ -3,7 +3,7 @@ import {
3
3
  } from "./chunk-XO6B3D4A.mjs";
4
4
  import {
5
5
  EnergyVADPlugin
6
- } from "./chunk-R5JVHKWA.mjs";
6
+ } from "./chunk-NMHKX64G.mjs";
7
7
 
8
8
  // src/extensibility/plugins.ts
9
9
  var nsPlugins = /* @__PURE__ */ new Map();
@@ -102,13 +102,25 @@ To disable noise suppression, set noiseSuppression.enabled to false.`
102
102
  };
103
103
 
104
104
  // src/vad/vad-node.ts
105
- var energyVadWorkletCode = `
105
+ var createEnergyVadWorkletCode = (vadConfig) => {
106
+ const energyParams = vadConfig?.energyVad || {};
107
+ const smoothing = energyParams.smoothing ?? 0.95;
108
+ const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
109
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
110
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
111
+ const minSNR = energyParams.minSNR ?? 2;
112
+ const snrRange = energyParams.snrRange ?? 8;
113
+ return `
106
114
  class EnergyVadProcessor extends AudioWorkletProcessor {
107
115
  constructor() {
108
116
  super();
109
- this.smoothing = 0.95;
117
+ this.smoothing = ${smoothing};
110
118
  this.energy = 0;
111
- this.noiseFloor = 0.001;
119
+ this.noiseFloor = ${initialNoiseFloor};
120
+ this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
121
+ this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
122
+ this.minSNR = ${minSNR};
123
+ this.snrRange = ${snrRange};
112
124
  }
113
125
 
114
126
  process(inputs, outputs, parameters) {
@@ -116,32 +128,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
116
128
  if (!input || !input.length) return true;
117
129
  const channel = input[0];
118
130
 
119
- // Calculate RMS
131
+ // Calculate RMS (Root Mean Square) energy
120
132
  let sum = 0;
121
133
  for (let i = 0; i < channel.length; i++) {
122
134
  sum += channel[i] * channel[i];
123
135
  }
124
136
  const rms = Math.sqrt(sum / channel.length);
125
137
 
126
- // Simple adaptive noise floor (very basic)
138
+ // Adaptive noise floor estimation
139
+ // When signal is quiet, adapt quickly to find new noise floor
140
+ // When signal is loud (speech), adapt slowly to avoid raising noise floor
127
141
  if (rms < this.noiseFloor) {
128
- this.noiseFloor = this.noiseFloor * 0.99 + rms * 0.01;
142
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
129
143
  } else {
130
- this.noiseFloor = this.noiseFloor * 0.999 + rms * 0.001;
144
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
131
145
  }
132
146
 
133
- // Calculate "probability" based on SNR
134
- // This is a heuristic mapping from energy to 0-1
147
+ // Calculate Signal-to-Noise Ratio (SNR)
135
148
  const snr = rms / (this.noiseFloor + 1e-6);
136
- const probability = Math.min(1, Math.max(0, (snr - 1.5) / 10)); // Arbitrary scaling
149
+
150
+ // Map SNR to probability (0-1)
151
+ // Probability is 0 when SNR <= minSNR
152
+ // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
153
+ // Probability is 1 when SNR >= (minSNR + snrRange)
154
+ const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
137
155
 
138
- this.port.postMessage({ probability });
156
+ this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
139
157
 
140
158
  return true;
141
159
  }
142
160
  }
143
161
  registerProcessor('energy-vad-processor', EnergyVadProcessor);
144
162
  `;
163
+ };
145
164
  var EnergyVADPlugin = class {
146
165
  name = "energy-vad";
147
166
  async createNode(context, config, onDecision) {
@@ -150,7 +169,8 @@ var EnergyVADPlugin = class {
150
169
  const pass = context.createGain();
151
170
  return pass;
152
171
  }
153
- const blob = new Blob([energyVadWorkletCode], {
172
+ const workletCode = createEnergyVadWorkletCode(config);
173
+ const blob = new Blob([workletCode], {
154
174
  type: "application/javascript"
155
175
  });
156
176
  const url = URL.createObjectURL(blob);
@@ -3,9 +3,9 @@ import {
3
3
  getVADPlugin,
4
4
  registerNoiseSuppressionPlugin,
5
5
  registerVADPlugin
6
- } from "../chunk-6P2RDBW5.mjs";
6
+ } from "../chunk-YOSTLLCS.mjs";
7
7
  import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-R5JVHKWA.mjs";
8
+ import "../chunk-NMHKX64G.mjs";
9
9
  export {
10
10
  getNoiseSuppressionPlugin,
11
11
  getVADPlugin,
package/dist/index.js CHANGED
@@ -154,13 +154,25 @@ To disable noise suppression, set noiseSuppression.enabled to false.`
154
154
  };
155
155
 
156
156
  // src/vad/vad-node.ts
157
- var energyVadWorkletCode = `
157
+ var createEnergyVadWorkletCode = (vadConfig) => {
158
+ const energyParams = vadConfig?.energyVad || {};
159
+ const smoothing = energyParams.smoothing ?? 0.95;
160
+ const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
161
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
162
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
163
+ const minSNR = energyParams.minSNR ?? 2;
164
+ const snrRange = energyParams.snrRange ?? 8;
165
+ return `
158
166
  class EnergyVadProcessor extends AudioWorkletProcessor {
159
167
  constructor() {
160
168
  super();
161
- this.smoothing = 0.95;
169
+ this.smoothing = ${smoothing};
162
170
  this.energy = 0;
163
- this.noiseFloor = 0.001;
171
+ this.noiseFloor = ${initialNoiseFloor};
172
+ this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
173
+ this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
174
+ this.minSNR = ${minSNR};
175
+ this.snrRange = ${snrRange};
164
176
  }
165
177
 
166
178
  process(inputs, outputs, parameters) {
@@ -168,32 +180,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
168
180
  if (!input || !input.length) return true;
169
181
  const channel = input[0];
170
182
 
171
- // Calculate RMS
183
+ // Calculate RMS (Root Mean Square) energy
172
184
  let sum = 0;
173
185
  for (let i = 0; i < channel.length; i++) {
174
186
  sum += channel[i] * channel[i];
175
187
  }
176
188
  const rms = Math.sqrt(sum / channel.length);
177
189
 
178
- // Simple adaptive noise floor (very basic)
190
+ // Adaptive noise floor estimation
191
+ // When signal is quiet, adapt quickly to find new noise floor
192
+ // When signal is loud (speech), adapt slowly to avoid raising noise floor
179
193
  if (rms < this.noiseFloor) {
180
- this.noiseFloor = this.noiseFloor * 0.99 + rms * 0.01;
194
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
181
195
  } else {
182
- this.noiseFloor = this.noiseFloor * 0.999 + rms * 0.001;
196
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
183
197
  }
184
198
 
185
- // Calculate "probability" based on SNR
186
- // This is a heuristic mapping from energy to 0-1
199
+ // Calculate Signal-to-Noise Ratio (SNR)
187
200
  const snr = rms / (this.noiseFloor + 1e-6);
188
- const probability = Math.min(1, Math.max(0, (snr - 1.5) / 10)); // Arbitrary scaling
201
+
202
+ // Map SNR to probability (0-1)
203
+ // Probability is 0 when SNR <= minSNR
204
+ // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
205
+ // Probability is 1 when SNR >= (minSNR + snrRange)
206
+ const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
189
207
 
190
- this.port.postMessage({ probability });
208
+ this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
191
209
 
192
210
  return true;
193
211
  }
194
212
  }
195
213
  registerProcessor('energy-vad-processor', EnergyVadProcessor);
196
214
  `;
215
+ };
197
216
  var EnergyVADPlugin = class {
198
217
  name = "energy-vad";
199
218
  async createNode(context, config, onDecision) {
@@ -202,7 +221,8 @@ var EnergyVADPlugin = class {
202
221
  const pass = context.createGain();
203
222
  return pass;
204
223
  }
205
- const blob = new Blob([energyVadWorkletCode], {
224
+ const workletCode = createEnergyVadWorkletCode(config);
225
+ const blob = new Blob([workletCode], {
206
226
  type: "application/javascript"
207
227
  });
208
228
  const url = URL.createObjectURL(blob);
@@ -288,31 +308,60 @@ var VADStateMachine = class {
288
308
  currentState = "silent";
289
309
  lastSpeechTime = 0;
290
310
  speechStartTime = 0;
311
+ lastSilenceTime = 0;
291
312
  frameDurationMs = 20;
292
313
  // Assumed frame duration, updated by calls
293
314
  constructor(config) {
294
315
  this.config = {
295
316
  enabled: config?.enabled ?? true,
296
317
  pluginName: config?.pluginName ?? "energy-vad",
297
- startThreshold: config?.startThreshold ?? 0.5,
298
- stopThreshold: config?.stopThreshold ?? 0.4,
299
- hangoverMs: config?.hangoverMs ?? 300,
300
- preRollMs: config?.preRollMs ?? 200
318
+ // Voice-optimized defaults
319
+ startThreshold: config?.startThreshold ?? 0.6,
320
+ // Higher threshold to avoid noise
321
+ stopThreshold: config?.stopThreshold ?? 0.45,
322
+ // Balanced for voice
323
+ hangoverMs: config?.hangoverMs ?? 400,
324
+ // Smooth for natural speech
325
+ preRollMs: config?.preRollMs ?? 250,
326
+ // Generous pre-roll
327
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
328
+ minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
329
+ energyVad: {
330
+ smoothing: config?.energyVad?.smoothing ?? 0.95,
331
+ initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
332
+ noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
333
+ noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
334
+ minSNR: config?.energyVad?.minSNR ?? 2,
335
+ snrRange: config?.energyVad?.snrRange ?? 8
336
+ }
301
337
  };
338
+ this.lastSilenceTime = Date.now();
302
339
  }
303
340
  updateConfig(config) {
304
341
  this.config = { ...this.config, ...config };
305
342
  }
306
343
  processFrame(probability, timestamp) {
307
- const { startThreshold, stopThreshold, hangoverMs } = this.config;
344
+ const {
345
+ startThreshold,
346
+ stopThreshold,
347
+ hangoverMs,
348
+ minSpeechDurationMs,
349
+ minSilenceDurationMs
350
+ } = this.config;
308
351
  let newState = this.currentState;
309
352
  if (this.currentState === "silent" || this.currentState === "speech_ending") {
310
353
  if (probability >= startThreshold) {
311
- newState = "speech_starting";
312
- this.speechStartTime = timestamp;
313
- this.lastSpeechTime = timestamp;
354
+ const silenceDuration = timestamp - this.lastSilenceTime;
355
+ if (silenceDuration >= minSilenceDurationMs) {
356
+ newState = "speech_starting";
357
+ this.speechStartTime = timestamp;
358
+ this.lastSpeechTime = timestamp;
359
+ } else {
360
+ newState = "silent";
361
+ }
314
362
  } else {
315
363
  newState = "silent";
364
+ this.lastSilenceTime = timestamp;
316
365
  }
317
366
  } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
318
367
  if (probability >= stopThreshold) {
@@ -320,10 +369,15 @@ var VADStateMachine = class {
320
369
  this.lastSpeechTime = timestamp;
321
370
  } else {
322
371
  const timeSinceSpeech = timestamp - this.lastSpeechTime;
372
+ const speechDuration = timestamp - this.speechStartTime;
323
373
  if (timeSinceSpeech < hangoverMs) {
324
374
  newState = "speaking";
375
+ } else if (speechDuration < minSpeechDurationMs) {
376
+ newState = "silent";
377
+ this.lastSilenceTime = timestamp;
325
378
  } else {
326
379
  newState = "speech_ending";
380
+ this.lastSilenceTime = timestamp;
327
381
  }
328
382
  }
329
383
  }
@@ -342,7 +396,9 @@ var VADStateMachine = class {
342
396
  async function createAudioPipeline(sourceTrack, config = {}) {
343
397
  const context = getAudioContext();
344
398
  registerPipeline();
345
- const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl);
399
+ const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
400
+ config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
401
+ );
346
402
  const vadEnabled = config.vad?.enabled !== false;
347
403
  const fullConfig = {
348
404
  noiseSuppression: {
@@ -351,13 +407,38 @@ async function createAudioPipeline(sourceTrack, config = {}) {
351
407
  },
352
408
  vad: {
353
409
  enabled: vadEnabled,
410
+ // Voice-optimized defaults (will be overridden by config)
411
+ startThreshold: 0.6,
412
+ stopThreshold: 0.45,
413
+ hangoverMs: 400,
414
+ preRollMs: 250,
415
+ minSpeechDurationMs: 100,
416
+ minSilenceDurationMs: 150,
417
+ energyVad: {
418
+ smoothing: 0.95,
419
+ initialNoiseFloor: 1e-3,
420
+ noiseFloorAdaptRateQuiet: 0.01,
421
+ noiseFloorAdaptRateLoud: 1e-3,
422
+ minSNR: 2,
423
+ snrRange: 8
424
+ },
354
425
  ...config.vad
355
426
  },
356
427
  output: {
357
428
  speechGain: 1,
358
- silenceGain: vadEnabled ? 0 : 1,
359
- // If no VAD, always output audio
360
- gainRampTime: 0.02,
429
+ silenceGain: 0,
430
+ // Full mute for voice-only
431
+ gainRampTime: 0.015,
432
+ // Fast but smooth transitions
433
+ smoothTransitions: true,
434
+ maxGainDb: 6,
435
+ enableCompression: false,
436
+ compression: {
437
+ threshold: -24,
438
+ ratio: 3,
439
+ attack: 3e-3,
440
+ release: 0.05
441
+ },
361
442
  ...config.output
362
443
  },
363
444
  livekit: { manageTrackMute: false, ...config.livekit }
@@ -368,7 +449,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
368
449
  output: fullConfig.output
369
450
  });
370
451
  if (!sourceTrack || sourceTrack.kind !== "audio") {
371
- throw new Error("createAudioPipeline requires a valid audio MediaStreamTrack");
452
+ throw new Error(
453
+ "createAudioPipeline requires a valid audio MediaStreamTrack"
454
+ );
372
455
  }
373
456
  if (sourceTrack.readyState === "ended") {
374
457
  throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
@@ -382,10 +465,7 @@ async function createAudioPipeline(sourceTrack, config = {}) {
382
465
  const nsPlugin = getNoiseSuppressionPlugin(
383
466
  fullConfig.noiseSuppression?.pluginName
384
467
  );
385
- nsNode = await nsPlugin.createNode(
386
- context,
387
- fullConfig.noiseSuppression
388
- );
468
+ nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
389
469
  } catch (error) {
390
470
  const err = error instanceof Error ? error : new Error(String(error));
391
471
  console.error("Failed to create noise suppression node:", err);
@@ -395,25 +475,21 @@ async function createAudioPipeline(sourceTrack, config = {}) {
395
475
  const vadStateMachine = new VADStateMachine(fullConfig.vad);
396
476
  try {
397
477
  const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
398
- vadNode = await vadPlugin.createNode(
399
- context,
400
- fullConfig.vad,
401
- (prob) => {
402
- try {
403
- const timestamp = context.currentTime * 1e3;
404
- const newState = vadStateMachine.processFrame(prob, timestamp);
405
- if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
406
- emitter.emit("vadChange", newState);
407
- lastVadState = newState;
408
- updateGain(newState);
409
- }
410
- } catch (vadError) {
411
- const err = vadError instanceof Error ? vadError : new Error(String(vadError));
412
- console.error("Error in VAD callback:", err);
413
- emitter.emit("error", err);
478
+ vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
479
+ try {
480
+ const timestamp = context.currentTime * 1e3;
481
+ const newState = vadStateMachine.processFrame(prob, timestamp);
482
+ if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
483
+ emitter.emit("vadChange", newState);
484
+ lastVadState = newState;
485
+ updateGain(newState);
414
486
  }
487
+ } catch (vadError) {
488
+ const err = vadError instanceof Error ? vadError : new Error(String(vadError));
489
+ console.error("Error in VAD callback:", err);
490
+ emitter.emit("error", err);
415
491
  }
416
- );
492
+ });
417
493
  } catch (error) {
418
494
  const err = error instanceof Error ? error : new Error(String(error));
419
495
  console.error("Failed to create VAD node:", err);
@@ -430,15 +506,31 @@ async function createAudioPipeline(sourceTrack, config = {}) {
430
506
  nsNode.connect(splitter);
431
507
  splitter.connect(vadNode);
432
508
  const delayNode = context.createDelay(1);
433
- const preRollSeconds = (fullConfig.vad?.preRollMs ?? 200) / 1e3;
509
+ const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
434
510
  delayNode.delayTime.value = preRollSeconds;
435
511
  const gainNode = context.createGain();
436
512
  gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
513
+ let compressor = null;
514
+ if (fullConfig.output?.enableCompression) {
515
+ compressor = context.createDynamicsCompressor();
516
+ const comp = fullConfig.output.compression;
517
+ compressor.threshold.value = comp.threshold ?? -24;
518
+ compressor.ratio.value = comp.ratio ?? 3;
519
+ compressor.attack.value = comp.attack ?? 3e-3;
520
+ compressor.release.value = comp.release ?? 0.05;
521
+ compressor.knee.value = 10;
522
+ }
437
523
  const destination = context.createMediaStreamDestination();
438
524
  try {
439
525
  splitter.connect(delayNode);
440
526
  delayNode.connect(gainNode);
441
- gainNode.connect(destination);
527
+ if (compressor) {
528
+ gainNode.connect(compressor);
529
+ compressor.connect(destination);
530
+ console.log("Compression enabled:", fullConfig.output?.compression);
531
+ } else {
532
+ gainNode.connect(destination);
533
+ }
442
534
  } catch (error) {
443
535
  const err = error instanceof Error ? error : new Error(String(error));
444
536
  console.error("Failed to wire audio pipeline:", err);
@@ -447,10 +539,24 @@ async function createAudioPipeline(sourceTrack, config = {}) {
447
539
  }
448
540
  function updateGain(state) {
449
541
  try {
450
- const { speechGain, silenceGain, gainRampTime } = fullConfig.output;
451
- const targetGain = state.isSpeaking ? speechGain ?? 1 : silenceGain ?? 0;
542
+ const {
543
+ speechGain = 1,
544
+ silenceGain = 0,
545
+ gainRampTime = 0.015,
546
+ smoothTransitions = true,
547
+ maxGainDb = 6
548
+ } = fullConfig.output;
549
+ const maxGainLinear = Math.pow(10, maxGainDb / 20);
550
+ const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
551
+ const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
452
552
  const now = context.currentTime;
453
- gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime ?? 0.02);
553
+ if (smoothTransitions) {
554
+ gainNode.gain.cancelScheduledValues(now);
555
+ gainNode.gain.setValueAtTime(gainNode.gain.value, now);
556
+ gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
557
+ } else {
558
+ gainNode.gain.setValueAtTime(targetGain, now);
559
+ }
454
560
  } catch (error) {
455
561
  const err = error instanceof Error ? error : new Error(String(error));
456
562
  console.error("Failed to update gain:", err);
@@ -506,6 +612,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
506
612
  vadNode.disconnect();
507
613
  delayNode.disconnect();
508
614
  gainNode.disconnect();
615
+ if (compressor) {
616
+ compressor.disconnect();
617
+ }
509
618
  destination.stream.getTracks().forEach((t) => t.stop());
510
619
  unregisterPipeline();
511
620
  } catch (error) {
@@ -522,7 +631,47 @@ async function createAudioPipeline(sourceTrack, config = {}) {
522
631
  try {
523
632
  if (newConfig.vad) {
524
633
  vadStateMachine.updateConfig(newConfig.vad);
634
+ Object.assign(fullConfig.vad, newConfig.vad);
635
+ if (newConfig.vad.preRollMs !== void 0) {
636
+ const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
637
+ delayNode.delayTime.setValueAtTime(
638
+ preRollSeconds2,
639
+ context.currentTime
640
+ );
641
+ }
642
+ }
643
+ if (newConfig.output) {
644
+ Object.assign(fullConfig.output, newConfig.output);
645
+ updateGain(lastVadState);
646
+ if (compressor && newConfig.output.compression) {
647
+ const comp = newConfig.output.compression;
648
+ if (comp.threshold !== void 0) {
649
+ compressor.threshold.setValueAtTime(
650
+ comp.threshold,
651
+ context.currentTime
652
+ );
653
+ }
654
+ if (comp.ratio !== void 0) {
655
+ compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
656
+ }
657
+ if (comp.attack !== void 0) {
658
+ compressor.attack.setValueAtTime(
659
+ comp.attack,
660
+ context.currentTime
661
+ );
662
+ }
663
+ if (comp.release !== void 0) {
664
+ compressor.release.setValueAtTime(
665
+ comp.release,
666
+ context.currentTime
667
+ );
668
+ }
669
+ }
670
+ }
671
+ if (newConfig.livekit) {
672
+ Object.assign(fullConfig.livekit, newConfig.livekit);
525
673
  }
674
+ console.log("Pipeline config updated:", newConfig);
526
675
  } catch (error) {
527
676
  const err = error instanceof Error ? error : new Error(String(error));
528
677
  console.error("Failed to update config:", err);
package/dist/index.mjs CHANGED
@@ -1,13 +1,13 @@
1
1
  import "./chunk-WBQAMGXK.mjs";
2
2
  import {
3
3
  attachProcessingToTrack
4
- } from "./chunk-XMTQPMQ6.mjs";
4
+ } from "./chunk-ERJVV5JR.mjs";
5
5
  import {
6
6
  createAudioPipeline
7
- } from "./chunk-EXH2PNUE.mjs";
7
+ } from "./chunk-AHBRT4RD.mjs";
8
8
  import {
9
9
  VADStateMachine
10
- } from "./chunk-JJASCVEW.mjs";
10
+ } from "./chunk-N553RHTI.mjs";
11
11
  import {
12
12
  closeAudioContext,
13
13
  getAudioContext,
@@ -21,13 +21,13 @@ import {
21
21
  getVADPlugin,
22
22
  registerNoiseSuppressionPlugin,
23
23
  registerVADPlugin
24
- } from "./chunk-6P2RDBW5.mjs";
24
+ } from "./chunk-YOSTLLCS.mjs";
25
25
  import {
26
26
  RNNoisePlugin
27
27
  } from "./chunk-XO6B3D4A.mjs";
28
28
  import {
29
29
  EnergyVADPlugin
30
- } from "./chunk-R5JVHKWA.mjs";
30
+ } from "./chunk-NMHKX64G.mjs";
31
31
  export {
32
32
  EnergyVADPlugin,
33
33
  RNNoisePlugin,