@tensamin/audio 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -121,13 +121,25 @@ To disable noise suppression, set noiseSuppression.enabled to false.`
121
121
  };
122
122
 
123
123
  // src/vad/vad-node.ts
124
- var energyVadWorkletCode = `
124
+ var createEnergyVadWorkletCode = (vadConfig) => {
125
+ const energyParams = vadConfig?.energyVad || {};
126
+ const smoothing = energyParams.smoothing ?? 0.95;
127
+ const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
128
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
129
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
130
+ const minSNR = energyParams.minSNR ?? 2;
131
+ const snrRange = energyParams.snrRange ?? 8;
132
+ return `
125
133
  class EnergyVadProcessor extends AudioWorkletProcessor {
126
134
  constructor() {
127
135
  super();
128
- this.smoothing = 0.95;
136
+ this.smoothing = ${smoothing};
129
137
  this.energy = 0;
130
- this.noiseFloor = 0.001;
138
+ this.noiseFloor = ${initialNoiseFloor};
139
+ this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
140
+ this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
141
+ this.minSNR = ${minSNR};
142
+ this.snrRange = ${snrRange};
131
143
  }
132
144
 
133
145
  process(inputs, outputs, parameters) {
@@ -135,32 +147,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
135
147
  if (!input || !input.length) return true;
136
148
  const channel = input[0];
137
149
 
138
- // Calculate RMS
150
+ // Calculate RMS (Root Mean Square) energy
139
151
  let sum = 0;
140
152
  for (let i = 0; i < channel.length; i++) {
141
153
  sum += channel[i] * channel[i];
142
154
  }
143
155
  const rms = Math.sqrt(sum / channel.length);
144
156
 
145
- // Simple adaptive noise floor (very basic)
157
+ // Adaptive noise floor estimation
158
+ // When signal is quiet, adapt quickly to find new noise floor
159
+ // When signal is loud (speech), adapt slowly to avoid raising noise floor
146
160
  if (rms < this.noiseFloor) {
147
- this.noiseFloor = this.noiseFloor * 0.99 + rms * 0.01;
161
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
148
162
  } else {
149
- this.noiseFloor = this.noiseFloor * 0.999 + rms * 0.001;
163
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
150
164
  }
151
165
 
152
- // Calculate "probability" based on SNR
153
- // This is a heuristic mapping from energy to 0-1
166
+ // Calculate Signal-to-Noise Ratio (SNR)
154
167
  const snr = rms / (this.noiseFloor + 1e-6);
155
- const probability = Math.min(1, Math.max(0, (snr - 1.5) / 10)); // Arbitrary scaling
168
+
169
+ // Map SNR to probability (0-1)
170
+ // Probability is 0 when SNR <= minSNR
171
+ // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
172
+ // Probability is 1 when SNR >= (minSNR + snrRange)
173
+ const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
156
174
 
157
- this.port.postMessage({ probability });
175
+ this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
158
176
 
159
177
  return true;
160
178
  }
161
179
  }
162
180
  registerProcessor('energy-vad-processor', EnergyVadProcessor);
163
181
  `;
182
+ };
164
183
  var EnergyVADPlugin = class {
165
184
  name = "energy-vad";
166
185
  async createNode(context, config, onDecision) {
@@ -169,7 +188,8 @@ var EnergyVADPlugin = class {
169
188
  const pass = context.createGain();
170
189
  return pass;
171
190
  }
172
- const blob = new Blob([energyVadWorkletCode], {
191
+ const workletCode = createEnergyVadWorkletCode(config);
192
+ const blob = new Blob([workletCode], {
173
193
  type: "application/javascript"
174
194
  });
175
195
  const url = URL.createObjectURL(blob);
@@ -249,31 +269,60 @@ var VADStateMachine = class {
249
269
  currentState = "silent";
250
270
  lastSpeechTime = 0;
251
271
  speechStartTime = 0;
272
+ lastSilenceTime = 0;
252
273
  frameDurationMs = 20;
253
274
  // Assumed frame duration, updated by calls
254
275
  constructor(config) {
255
276
  this.config = {
256
277
  enabled: config?.enabled ?? true,
257
278
  pluginName: config?.pluginName ?? "energy-vad",
258
- startThreshold: config?.startThreshold ?? 0.5,
259
- stopThreshold: config?.stopThreshold ?? 0.4,
260
- hangoverMs: config?.hangoverMs ?? 300,
261
- preRollMs: config?.preRollMs ?? 200
279
+ // Voice-optimized defaults
280
+ startThreshold: config?.startThreshold ?? 0.6,
281
+ // Higher threshold to avoid noise
282
+ stopThreshold: config?.stopThreshold ?? 0.45,
283
+ // Balanced for voice
284
+ hangoverMs: config?.hangoverMs ?? 400,
285
+ // Smooth for natural speech
286
+ preRollMs: config?.preRollMs ?? 250,
287
+ // Generous pre-roll
288
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
289
+ minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
290
+ energyVad: {
291
+ smoothing: config?.energyVad?.smoothing ?? 0.95,
292
+ initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
293
+ noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
294
+ noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
295
+ minSNR: config?.energyVad?.minSNR ?? 2,
296
+ snrRange: config?.energyVad?.snrRange ?? 8
297
+ }
262
298
  };
299
+ this.lastSilenceTime = Date.now();
263
300
  }
264
301
  updateConfig(config) {
265
302
  this.config = { ...this.config, ...config };
266
303
  }
267
304
  processFrame(probability, timestamp) {
268
- const { startThreshold, stopThreshold, hangoverMs } = this.config;
305
+ const {
306
+ startThreshold,
307
+ stopThreshold,
308
+ hangoverMs,
309
+ minSpeechDurationMs,
310
+ minSilenceDurationMs
311
+ } = this.config;
269
312
  let newState = this.currentState;
270
313
  if (this.currentState === "silent" || this.currentState === "speech_ending") {
271
314
  if (probability >= startThreshold) {
272
- newState = "speech_starting";
273
- this.speechStartTime = timestamp;
274
- this.lastSpeechTime = timestamp;
315
+ const silenceDuration = timestamp - this.lastSilenceTime;
316
+ if (silenceDuration >= minSilenceDurationMs) {
317
+ newState = "speech_starting";
318
+ this.speechStartTime = timestamp;
319
+ this.lastSpeechTime = timestamp;
320
+ } else {
321
+ newState = "silent";
322
+ }
275
323
  } else {
276
324
  newState = "silent";
325
+ this.lastSilenceTime = timestamp;
277
326
  }
278
327
  } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
279
328
  if (probability >= stopThreshold) {
@@ -281,10 +330,15 @@ var VADStateMachine = class {
281
330
  this.lastSpeechTime = timestamp;
282
331
  } else {
283
332
  const timeSinceSpeech = timestamp - this.lastSpeechTime;
333
+ const speechDuration = timestamp - this.speechStartTime;
284
334
  if (timeSinceSpeech < hangoverMs) {
285
335
  newState = "speaking";
336
+ } else if (speechDuration < minSpeechDurationMs) {
337
+ newState = "silent";
338
+ this.lastSilenceTime = timestamp;
286
339
  } else {
287
340
  newState = "speech_ending";
341
+ this.lastSilenceTime = timestamp;
288
342
  }
289
343
  }
290
344
  }
@@ -303,7 +357,9 @@ var VADStateMachine = class {
303
357
  async function createAudioPipeline(sourceTrack, config = {}) {
304
358
  const context = getAudioContext();
305
359
  registerPipeline();
306
- const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl);
360
+ const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
361
+ config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
362
+ );
307
363
  const vadEnabled = config.vad?.enabled !== false;
308
364
  const fullConfig = {
309
365
  noiseSuppression: {
@@ -312,13 +368,38 @@ async function createAudioPipeline(sourceTrack, config = {}) {
312
368
  },
313
369
  vad: {
314
370
  enabled: vadEnabled,
371
+ // Voice-optimized defaults (will be overridden by config)
372
+ startThreshold: 0.6,
373
+ stopThreshold: 0.45,
374
+ hangoverMs: 400,
375
+ preRollMs: 250,
376
+ minSpeechDurationMs: 100,
377
+ minSilenceDurationMs: 150,
378
+ energyVad: {
379
+ smoothing: 0.95,
380
+ initialNoiseFloor: 1e-3,
381
+ noiseFloorAdaptRateQuiet: 0.01,
382
+ noiseFloorAdaptRateLoud: 1e-3,
383
+ minSNR: 2,
384
+ snrRange: 8
385
+ },
315
386
  ...config.vad
316
387
  },
317
388
  output: {
318
389
  speechGain: 1,
319
- silenceGain: vadEnabled ? 0 : 1,
320
- // If no VAD, always output audio
321
- gainRampTime: 0.02,
390
+ silenceGain: 0,
391
+ // Full mute for voice-only
392
+ gainRampTime: 0.015,
393
+ // Fast but smooth transitions
394
+ smoothTransitions: true,
395
+ maxGainDb: 6,
396
+ enableCompression: false,
397
+ compression: {
398
+ threshold: -24,
399
+ ratio: 3,
400
+ attack: 3e-3,
401
+ release: 0.05
402
+ },
322
403
  ...config.output
323
404
  },
324
405
  livekit: { manageTrackMute: false, ...config.livekit }
@@ -329,7 +410,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
329
410
  output: fullConfig.output
330
411
  });
331
412
  if (!sourceTrack || sourceTrack.kind !== "audio") {
332
- throw new Error("createAudioPipeline requires a valid audio MediaStreamTrack");
413
+ throw new Error(
414
+ "createAudioPipeline requires a valid audio MediaStreamTrack"
415
+ );
333
416
  }
334
417
  if (sourceTrack.readyState === "ended") {
335
418
  throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
@@ -343,10 +426,7 @@ async function createAudioPipeline(sourceTrack, config = {}) {
343
426
  const nsPlugin = getNoiseSuppressionPlugin(
344
427
  fullConfig.noiseSuppression?.pluginName
345
428
  );
346
- nsNode = await nsPlugin.createNode(
347
- context,
348
- fullConfig.noiseSuppression
349
- );
429
+ nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
350
430
  } catch (error) {
351
431
  const err = error instanceof Error ? error : new Error(String(error));
352
432
  console.error("Failed to create noise suppression node:", err);
@@ -356,25 +436,21 @@ async function createAudioPipeline(sourceTrack, config = {}) {
356
436
  const vadStateMachine = new VADStateMachine(fullConfig.vad);
357
437
  try {
358
438
  const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
359
- vadNode = await vadPlugin.createNode(
360
- context,
361
- fullConfig.vad,
362
- (prob) => {
363
- try {
364
- const timestamp = context.currentTime * 1e3;
365
- const newState = vadStateMachine.processFrame(prob, timestamp);
366
- if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
367
- emitter.emit("vadChange", newState);
368
- lastVadState = newState;
369
- updateGain(newState);
370
- }
371
- } catch (vadError) {
372
- const err = vadError instanceof Error ? vadError : new Error(String(vadError));
373
- console.error("Error in VAD callback:", err);
374
- emitter.emit("error", err);
439
+ vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
440
+ try {
441
+ const timestamp = context.currentTime * 1e3;
442
+ const newState = vadStateMachine.processFrame(prob, timestamp);
443
+ if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
444
+ emitter.emit("vadChange", newState);
445
+ lastVadState = newState;
446
+ updateGain(newState);
375
447
  }
448
+ } catch (vadError) {
449
+ const err = vadError instanceof Error ? vadError : new Error(String(vadError));
450
+ console.error("Error in VAD callback:", err);
451
+ emitter.emit("error", err);
376
452
  }
377
- );
453
+ });
378
454
  } catch (error) {
379
455
  const err = error instanceof Error ? error : new Error(String(error));
380
456
  console.error("Failed to create VAD node:", err);
@@ -391,15 +467,31 @@ async function createAudioPipeline(sourceTrack, config = {}) {
391
467
  nsNode.connect(splitter);
392
468
  splitter.connect(vadNode);
393
469
  const delayNode = context.createDelay(1);
394
- const preRollSeconds = (fullConfig.vad?.preRollMs ?? 200) / 1e3;
470
+ const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
395
471
  delayNode.delayTime.value = preRollSeconds;
396
472
  const gainNode = context.createGain();
397
473
  gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
474
+ let compressor = null;
475
+ if (fullConfig.output?.enableCompression) {
476
+ compressor = context.createDynamicsCompressor();
477
+ const comp = fullConfig.output.compression;
478
+ compressor.threshold.value = comp.threshold ?? -24;
479
+ compressor.ratio.value = comp.ratio ?? 3;
480
+ compressor.attack.value = comp.attack ?? 3e-3;
481
+ compressor.release.value = comp.release ?? 0.05;
482
+ compressor.knee.value = 10;
483
+ }
398
484
  const destination = context.createMediaStreamDestination();
399
485
  try {
400
486
  splitter.connect(delayNode);
401
487
  delayNode.connect(gainNode);
402
- gainNode.connect(destination);
488
+ if (compressor) {
489
+ gainNode.connect(compressor);
490
+ compressor.connect(destination);
491
+ console.log("Compression enabled:", fullConfig.output?.compression);
492
+ } else {
493
+ gainNode.connect(destination);
494
+ }
403
495
  } catch (error) {
404
496
  const err = error instanceof Error ? error : new Error(String(error));
405
497
  console.error("Failed to wire audio pipeline:", err);
@@ -408,10 +500,24 @@ async function createAudioPipeline(sourceTrack, config = {}) {
408
500
  }
409
501
  function updateGain(state) {
410
502
  try {
411
- const { speechGain, silenceGain, gainRampTime } = fullConfig.output;
412
- const targetGain = state.isSpeaking ? speechGain ?? 1 : silenceGain ?? 0;
503
+ const {
504
+ speechGain = 1,
505
+ silenceGain = 0,
506
+ gainRampTime = 0.015,
507
+ smoothTransitions = true,
508
+ maxGainDb = 6
509
+ } = fullConfig.output;
510
+ const maxGainLinear = Math.pow(10, maxGainDb / 20);
511
+ const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
512
+ const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
413
513
  const now = context.currentTime;
414
- gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime ?? 0.02);
514
+ if (smoothTransitions) {
515
+ gainNode.gain.cancelScheduledValues(now);
516
+ gainNode.gain.setValueAtTime(gainNode.gain.value, now);
517
+ gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
518
+ } else {
519
+ gainNode.gain.setValueAtTime(targetGain, now);
520
+ }
415
521
  } catch (error) {
416
522
  const err = error instanceof Error ? error : new Error(String(error));
417
523
  console.error("Failed to update gain:", err);
@@ -467,6 +573,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
467
573
  vadNode.disconnect();
468
574
  delayNode.disconnect();
469
575
  gainNode.disconnect();
576
+ if (compressor) {
577
+ compressor.disconnect();
578
+ }
470
579
  destination.stream.getTracks().forEach((t) => t.stop());
471
580
  unregisterPipeline();
472
581
  } catch (error) {
@@ -483,7 +592,47 @@ async function createAudioPipeline(sourceTrack, config = {}) {
483
592
  try {
484
593
  if (newConfig.vad) {
485
594
  vadStateMachine.updateConfig(newConfig.vad);
595
+ Object.assign(fullConfig.vad, newConfig.vad);
596
+ if (newConfig.vad.preRollMs !== void 0) {
597
+ const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
598
+ delayNode.delayTime.setValueAtTime(
599
+ preRollSeconds2,
600
+ context.currentTime
601
+ );
602
+ }
603
+ }
604
+ if (newConfig.output) {
605
+ Object.assign(fullConfig.output, newConfig.output);
606
+ updateGain(lastVadState);
607
+ if (compressor && newConfig.output.compression) {
608
+ const comp = newConfig.output.compression;
609
+ if (comp.threshold !== void 0) {
610
+ compressor.threshold.setValueAtTime(
611
+ comp.threshold,
612
+ context.currentTime
613
+ );
614
+ }
615
+ if (comp.ratio !== void 0) {
616
+ compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
617
+ }
618
+ if (comp.attack !== void 0) {
619
+ compressor.attack.setValueAtTime(
620
+ comp.attack,
621
+ context.currentTime
622
+ );
623
+ }
624
+ if (comp.release !== void 0) {
625
+ compressor.release.setValueAtTime(
626
+ comp.release,
627
+ context.currentTime
628
+ );
629
+ }
630
+ }
631
+ }
632
+ if (newConfig.livekit) {
633
+ Object.assign(fullConfig.livekit, newConfig.livekit);
486
634
  }
635
+ console.log("Pipeline config updated:", newConfig);
487
636
  } catch (error) {
488
637
  const err = error instanceof Error ? error : new Error(String(error));
489
638
  console.error("Failed to update config:", err);
@@ -1,11 +1,11 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "../chunk-EXH2PNUE.mjs";
4
- import "../chunk-JJASCVEW.mjs";
3
+ } from "../chunk-AHBRT4RD.mjs";
4
+ import "../chunk-N553RHTI.mjs";
5
5
  import "../chunk-OZ7KMC4S.mjs";
6
- import "../chunk-6P2RDBW5.mjs";
6
+ import "../chunk-YOSTLLCS.mjs";
7
7
  import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-R5JVHKWA.mjs";
8
+ import "../chunk-NMHKX64G.mjs";
9
9
  export {
10
10
  createAudioPipeline
11
11
  };
package/dist/types.d.mts CHANGED
@@ -35,46 +35,154 @@ interface AudioProcessingConfig {
35
35
  vad?: {
36
36
  enabled: boolean;
37
37
  /**
38
- * Plugin name to use. Defaults to 'rnnoise-vad' or 'energy-vad'.
38
+ * Plugin name to use. Defaults to 'energy-vad'.
39
39
  */
40
40
  pluginName?: string;
41
41
  /**
42
42
  * Probability threshold for speech onset (0-1).
43
- * Default: 0.5
43
+ * When VAD probability rises above this, audio is unmuted.
44
+ * Lower = more sensitive (catches quiet speech, may include noise)
45
+ * Higher = less sensitive (only confident speech, may clip quiet parts)
46
+ * Default: 0.6 (optimized for voice-only)
44
47
  */
45
48
  startThreshold?: number;
46
49
  /**
47
50
  * Probability threshold for speech offset (0-1).
48
- * Default: 0.4
51
+ * When VAD probability drops below this (after hangover), audio is muted.
52
+ * Lower = keeps audio on longer (less aggressive gating)
53
+ * Higher = mutes faster (more aggressive noise suppression)
54
+ * Default: 0.45 (balanced voice detection)
49
55
  */
50
56
  stopThreshold?: number;
51
57
  /**
52
- * Time in ms to wait after speech stops before considering it silent.
53
- * Default: 300ms
58
+ * Time in ms to wait after speech stops before muting.
59
+ * Prevents rapid on/off toggling during pauses.
60
+ * Lower = more aggressive gating, may clip between words
61
+ * Higher = smoother but may let trailing noise through
62
+ * Default: 400ms (optimized for natural speech)
54
63
  */
55
64
  hangoverMs?: number;
56
65
  /**
57
- * Time in ms of audio to buffer before speech onset to avoid cutting the start.
58
- * Default: 200ms
66
+ * Time in ms of audio to buffer before speech onset.
67
+ * Prevents cutting off the beginning of speech.
68
+ * Default: 250ms (generous pre-roll for voice)
59
69
  */
60
70
  preRollMs?: number;
71
+ /**
72
+ * Minimum speech duration in ms to consider it valid speech.
73
+ * Filters out very brief noise spikes.
74
+ * Default: 100ms
75
+ */
76
+ minSpeechDurationMs?: number;
77
+ /**
78
+ * Minimum silence duration in ms before allowing another speech segment.
79
+ * Prevents false positives from quick noise bursts.
80
+ * Default: 150ms
81
+ */
82
+ minSilenceDurationMs?: number;
83
+ /**
84
+ * Advanced: Energy VAD specific parameters
85
+ */
86
+ energyVad?: {
87
+ /**
88
+ * Smoothing factor for energy calculation (0-1).
89
+ * Higher = more smoothing, slower to react
90
+ * Default: 0.95
91
+ */
92
+ smoothing?: number;
93
+ /**
94
+ * Initial noise floor estimate.
95
+ * Default: 0.001
96
+ */
97
+ initialNoiseFloor?: number;
98
+ /**
99
+ * Rate at which noise floor adapts to quiet signals (0-1).
100
+ * Default: 0.01
101
+ */
102
+ noiseFloorAdaptRateQuiet?: number;
103
+ /**
104
+ * Rate at which noise floor adapts to loud signals (0-1).
105
+ * Default: 0.001 (slower adaptation for speech)
106
+ */
107
+ noiseFloorAdaptRateLoud?: number;
108
+ /**
109
+ * Minimum SNR (Signal-to-Noise Ratio) for speech detection.
110
+ * Default: 2.0 (voice is 2x louder than noise floor)
111
+ */
112
+ minSNR?: number;
113
+ /**
114
+ * SNR range for probability scaling.
115
+ * Default: 8.0 (probability scales from minSNR to minSNR+snrRange)
116
+ */
117
+ snrRange?: number;
118
+ };
61
119
  };
62
120
  /**
63
121
  * Output gain and muting configuration.
64
122
  */
65
123
  output?: {
66
124
  /**
67
- * Gain to apply when speaking (0-1+). Default: 1.0
125
+ * Gain to apply when speaking (0-infinity).
126
+ * Values > 1.0 will amplify the voice.
127
+ * Default: 1.0 (unity gain)
68
128
  */
69
129
  speechGain?: number;
70
130
  /**
71
- * Gain to apply when silent (0-1). Default: 0.0 (mute)
131
+ * Gain to apply when silent (0-1).
132
+ * 0.0 = complete mute (recommended for voice-only)
133
+ * 0.1-0.3 = allow some background ambience
134
+ * Default: 0.0 (full mute for voice-only)
72
135
  */
73
136
  silenceGain?: number;
74
137
  /**
75
- * Time in seconds to ramp gain changes. Default: 0.02
138
+ * Time in seconds to ramp gain changes.
139
+ * Lower = faster transitions (may cause clicks)
140
+ * Higher = smoother transitions (may sound sluggish)
141
+ * Default: 0.015 (fast but smooth for voice)
76
142
  */
77
143
  gainRampTime?: number;
144
+ /**
145
+ * Apply additional gain reduction during the transition to silence.
146
+ * Helps create cleaner cutoffs without abrupt clicks.
147
+ * Default: true
148
+ */
149
+ smoothTransitions?: boolean;
150
+ /**
151
+ * Maximum gain in dB to apply (prevents clipping).
152
+ * Default: 6.0 dB (roughly 2x amplitude)
153
+ */
154
+ maxGainDb?: number;
155
+ /**
156
+ * Apply dynamic range compression when speaking.
157
+ * Makes quiet parts louder and loud parts quieter.
158
+ * Default: false (transparent audio)
159
+ */
160
+ enableCompression?: boolean;
161
+ /**
162
+ * Compression settings (when enabled)
163
+ */
164
+ compression?: {
165
+ /**
166
+ * Threshold in dB above which compression starts.
167
+ * Default: -24.0 dB
168
+ */
169
+ threshold?: number;
170
+ /**
171
+ * Compression ratio (1:N).
172
+ * Default: 3.0 (3:1 ratio)
173
+ */
174
+ ratio?: number;
175
+ /**
176
+ * Attack time in seconds.
177
+ * Default: 0.003 (3ms)
178
+ */
179
+ attack?: number;
180
+ /**
181
+ * Release time in seconds.
182
+ * Default: 0.05 (50ms)
183
+ */
184
+ release?: number;
185
+ };
78
186
  };
79
187
  /**
80
188
  * LiveKit integration configuration.