npm - @tensamin/audio - Versions diffs - 0.1.6 → 0.1.8 - Mend

@tensamin/audio 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +11 -9
package/dist/{chunk-DF4AYGHJ.mjs → chunk-2UPI6VWY.mjs} +2 -2
package/dist/{chunk-TLPO52HV.mjs → chunk-3A2CTC4K.mjs} +49 -31
package/dist/{chunk-ZCC7ID7L.mjs → chunk-FOGC2MFA.mjs} +1 -1
package/dist/{chunk-N553RHTI.mjs → chunk-XHMNP7NC.mjs} +8 -7
package/dist/{chunk-TWQJGBBU.mjs → chunk-Y6IG7XGC.mjs} +1 -1
package/dist/extensibility/plugins.js +49 -31
package/dist/extensibility/plugins.mjs +2 -2
package/dist/index.js +57 -38
package/dist/index.mjs +5 -5
package/dist/livekit/integration.js +57 -38
package/dist/livekit/integration.mjs +5 -5
package/dist/pipeline/audio-pipeline.js +57 -38
package/dist/pipeline/audio-pipeline.mjs +4 -4
package/dist/types.d.mts +15 -9
package/dist/types.d.ts +15 -9
package/dist/vad/vad-node.js +49 -31
package/dist/vad/vad-node.mjs +1 -1
package/dist/vad/vad-state.js +8 -7
package/dist/vad/vad-state.mjs +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -101,19 +101,20 @@ vad: {
   energyVad?: {
     smoothing: number;                 // Default: 0.95
     initialNoiseFloor: number;         // Default: 0.001
-    noiseFloorAdaptRateQuiet: number;  // Default: 0.01
-    noiseFloorAdaptRateLoud: number;   // Default: 0.001
-    minSNR: number;                    // Default: 2.0
-    snrRange: number;                  // Default: 8.0
+    noiseFloorAdaptRateQuiet: number;  // Default: 0.05
+    noiseFloorAdaptRateLoud: number;   // Default: 0.01
+    minSNR: number;                    // Default: 10.0 (dB)
+    snrRange: number;                  // Default: 10.0 (dB)
+    minEnergy: number;                 // Default: 0.0005
   };
 }
 ```
 **Threshold Parameters:**
-- `startThreshold`: Probability threshold to unmute audio
-- `stopThreshold`: Probability threshold to mute audio (after hangover)
-- `hangoverMs`: Delay before muting after speech stops
+- `startThreshold`: Probability threshold to unmute audio (Default: 0.8, ~18dB SNR)
+- `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
+- `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
 - `preRollMs`: Audio buffer duration before speech onset
 - `minSpeechDurationMs`: Minimum duration to consider as valid speech
 - `minSilenceDurationMs`: Minimum silence duration between speech segments
@@ -121,8 +122,9 @@ vad: {
 **Energy VAD Parameters:**
 - `smoothing`: Energy calculation smoothing factor (0-1)
-- `minSNR`: Minimum signal-to-noise ratio for speech detection
-- `snrRange`: Range for probability scaling from minSNR
+- `minSNR`: Minimum signal-to-noise ratio in dB for speech detection
+- `snrRange`: Range in dB for probability scaling from minSNR
+- `minEnergy`: Minimum absolute RMS energy to consider as speech
 ### Output Control

package/dist/{chunk-DF4AYGHJ.mjs → chunk-2UPI6VWY.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   VADStateMachine
-} from "./chunk-N553RHTI.mjs";
+} from "./chunk-XHMNP7NC.mjs";
 import {
   getAudioContext,
   registerPipeline,
@@ -9,7 +9,7 @@ import {
 import {
   getNoiseSuppressionPlugin,
   getVADPlugin
-} from "./chunk-ZCC7ID7L.mjs";
+} from "./chunk-FOGC2MFA.mjs";
 // src/pipeline/audio-pipeline.ts
 import mitt from "mitt";

package/dist/{chunk-TLPO52HV.mjs → chunk-3A2CTC4K.mjs} RENAMED Viewed

@@ -3,10 +3,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
-  const minSNR = energyParams.minSNR ?? 2;
-  const snrRange = energyParams.snrRange ?? 8;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
+  const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -18,6 +19,7 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
+    this.minEnergy = ${minEnergy};
     this.isSpeaking = false;
     this.port.onmessage = (event) => {
@@ -32,44 +34,60 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     if (!input || !input.length) return true;
     const channel = input[0];
-    // Calculate RMS (Root Mean Square) energy
+    // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
     for (let i = 0; i < channel.length; i++) {
       sum += channel[i] * channel[i];
     }
-    const rms = Math.sqrt(sum / channel.length);
+    const instantRms = Math.sqrt(sum / channel.length);
-    // Adaptive noise floor estimation - ONLY during silence
-    // This prevents the noise floor from rising during speech
-    if (!this.isSpeaking) {
-      if (rms < this.noiseFloor) {
-        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
-      } else {
-        // Even during silence, if we detect a loud signal, adapt very slowly
-        // This could be brief noise we haven't classified as speech yet
-        // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
-        // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
-        // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
-        const instantSnr = rms / (this.noiseFloor + 1e-6);
-        if (instantSnr < 3.0) {
-          this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
-        }
+    // Smooth the RMS energy to reduce jitter
+    // this.energy acts as the smoothed RMS value
+    this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
+    // Adaptive noise floor estimation
+    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    if (instantRms < this.noiseFloor) {
+      // If signal is quieter than noise floor, adapt downwards quickly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else {
+      // If signal is louder, adapt upwards
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
       }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
+      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
-    // During speech, freeze the noise floor to maintain consistent detection
-    // Calculate Signal-to-Noise Ratio (SNR)
-    const snr = rms / (this.noiseFloor + 1e-6);
+    // Ensure noise floor doesn't drop to absolute zero
+    this.noiseFloor = Math.max(this.noiseFloor, 1e-5);
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    const snr = this.energy / (this.noiseFloor + 1e-6);
+    const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-    // Map SNR to probability (0-1)
-    // Probability is 0 when SNR <= minSNR
+    // Map SNR dB to probability (0-1)
+    // Probability is 0 when snrDb <= minSNR
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
-    // Probability is 1 when SNR >= (minSNR + snrRange)
-    const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
+    let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
+    // Apply absolute energy threshold
+    if (this.energy < this.minEnergy) {
+      probability = 0;
+    }
-    this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
+    this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
     return true;
   }

package/dist/{chunk-ZCC7ID7L.mjs → chunk-FOGC2MFA.mjs} RENAMED Viewed

@@ -3,7 +3,7 @@ import {
 } from "./chunk-XO6B3D4A.mjs";
 import {
   EnergyVADPlugin
-} from "./chunk-TLPO52HV.mjs";
+} from "./chunk-3A2CTC4K.mjs";
 // src/extensibility/plugins.ts
 var nsPlugins = /* @__PURE__ */ new Map();

package/dist/{chunk-N553RHTI.mjs → chunk-XHMNP7NC.mjs} RENAMED Viewed

@@ -12,11 +12,11 @@ var VADStateMachine = class {
       enabled: config?.enabled ?? true,
       pluginName: config?.pluginName ?? "energy-vad",
       // Voice-optimized defaults
-      startThreshold: config?.startThreshold ?? 0.6,
+      startThreshold: config?.startThreshold ?? 0.8,
       // Higher threshold to avoid noise
-      stopThreshold: config?.stopThreshold ?? 0.45,
+      stopThreshold: config?.stopThreshold ?? 0.3,
       // Balanced for voice
-      hangoverMs: config?.hangoverMs ?? 400,
+      hangoverMs: config?.hangoverMs ?? 300,
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
@@ -25,10 +25,11 @@ var VADStateMachine = class {
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
-        minSNR: config?.energyVad?.minSNR ?? 2,
-        snrRange: config?.energyVad?.snrRange ?? 8
+        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        minSNR: config?.energyVad?.minSNR ?? 10,
+        snrRange: config?.energyVad?.snrRange ?? 10,
+        minEnergy: config?.energyVad?.minEnergy ?? 5e-4
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/{chunk-TWQJGBBU.mjs → chunk-Y6IG7XGC.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   createAudioPipeline
-} from "./chunk-DF4AYGHJ.mjs";
+} from "./chunk-2UPI6VWY.mjs";
 // src/livekit/integration.ts
 async function attachProcessingToTrack(track, config = {}) {

package/dist/extensibility/plugins.js CHANGED Viewed

@@ -106,10 +106,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
-  const minSNR = energyParams.minSNR ?? 2;
-  const snrRange = energyParams.snrRange ?? 8;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
+  const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -121,6 +122,7 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
+    this.minEnergy = ${minEnergy};
     this.isSpeaking = false;
     this.port.onmessage = (event) => {
@@ -135,44 +137,60 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     if (!input || !input.length) return true;
     const channel = input[0];
-    // Calculate RMS (Root Mean Square) energy
+    // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
     for (let i = 0; i < channel.length; i++) {
       sum += channel[i] * channel[i];
     }
-    const rms = Math.sqrt(sum / channel.length);
+    const instantRms = Math.sqrt(sum / channel.length);
-    // Adaptive noise floor estimation - ONLY during silence
-    // This prevents the noise floor from rising during speech
-    if (!this.isSpeaking) {
-      if (rms < this.noiseFloor) {
-        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
-      } else {
-        // Even during silence, if we detect a loud signal, adapt very slowly
-        // This could be brief noise we haven't classified as speech yet
-        // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
-        // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
-        // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
-        const instantSnr = rms / (this.noiseFloor + 1e-6);
-        if (instantSnr < 3.0) {
-          this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
-        }
+    // Smooth the RMS energy to reduce jitter
+    // this.energy acts as the smoothed RMS value
+    this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
+    // Adaptive noise floor estimation
+    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    if (instantRms < this.noiseFloor) {
+      // If signal is quieter than noise floor, adapt downwards quickly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else {
+      // If signal is louder, adapt upwards
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
       }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
+      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
-    // During speech, freeze the noise floor to maintain consistent detection
-    // Calculate Signal-to-Noise Ratio (SNR)
-    const snr = rms / (this.noiseFloor + 1e-6);
+    // Ensure noise floor doesn't drop to absolute zero
+    this.noiseFloor = Math.max(this.noiseFloor, 1e-5);
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    const snr = this.energy / (this.noiseFloor + 1e-6);
+    const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-    // Map SNR to probability (0-1)
-    // Probability is 0 when SNR <= minSNR
+    // Map SNR dB to probability (0-1)
+    // Probability is 0 when snrDb <= minSNR
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
-    // Probability is 1 when SNR >= (minSNR + snrRange)
-    const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
+    let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
+    // Apply absolute energy threshold
+    if (this.energy < this.minEnergy) {
+      probability = 0;
+    }
-    this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
+    this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
     return true;
   }

package/dist/extensibility/plugins.mjs CHANGED Viewed

@@ -3,9 +3,9 @@ import {
   getVADPlugin,
   registerNoiseSuppressionPlugin,
   registerVADPlugin
-} from "../chunk-ZCC7ID7L.mjs";
+} from "../chunk-FOGC2MFA.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-TLPO52HV.mjs";
+import "../chunk-3A2CTC4K.mjs";
 export {
   getNoiseSuppressionPlugin,
   getVADPlugin,

package/dist/index.js CHANGED Viewed

@@ -158,10 +158,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
-  const minSNR = energyParams.minSNR ?? 2;
-  const snrRange = energyParams.snrRange ?? 8;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
+  const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -173,6 +174,7 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
+    this.minEnergy = ${minEnergy};
     this.isSpeaking = false;
     this.port.onmessage = (event) => {
@@ -187,44 +189,60 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     if (!input || !input.length) return true;
     const channel = input[0];
-    // Calculate RMS (Root Mean Square) energy
+    // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
     for (let i = 0; i < channel.length; i++) {
       sum += channel[i] * channel[i];
     }
-    const rms = Math.sqrt(sum / channel.length);
+    const instantRms = Math.sqrt(sum / channel.length);
-    // Adaptive noise floor estimation - ONLY during silence
-    // This prevents the noise floor from rising during speech
-    if (!this.isSpeaking) {
-      if (rms < this.noiseFloor) {
-        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
-      } else {
-        // Even during silence, if we detect a loud signal, adapt very slowly
-        // This could be brief noise we haven't classified as speech yet
-        // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
-        // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
-        // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
-        const instantSnr = rms / (this.noiseFloor + 1e-6);
-        if (instantSnr < 3.0) {
-          this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
-        }
+    // Smooth the RMS energy to reduce jitter
+    // this.energy acts as the smoothed RMS value
+    this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
+    // Adaptive noise floor estimation
+    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    if (instantRms < this.noiseFloor) {
+      // If signal is quieter than noise floor, adapt downwards quickly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else {
+      // If signal is louder, adapt upwards
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
       }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
+      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
-    // During speech, freeze the noise floor to maintain consistent detection
-    // Calculate Signal-to-Noise Ratio (SNR)
-    const snr = rms / (this.noiseFloor + 1e-6);
+    // Ensure noise floor doesn't drop to absolute zero
+    this.noiseFloor = Math.max(this.noiseFloor, 1e-5);
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    const snr = this.energy / (this.noiseFloor + 1e-6);
+    const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-    // Map SNR to probability (0-1)
-    // Probability is 0 when SNR <= minSNR
+    // Map SNR dB to probability (0-1)
+    // Probability is 0 when snrDb <= minSNR
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
-    // Probability is 1 when SNR >= (minSNR + snrRange)
-    const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
+    let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
+    // Apply absolute energy threshold
+    if (this.energy < this.minEnergy) {
+      probability = 0;
+    }
-    this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
+    this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
     return true;
   }
@@ -342,11 +360,11 @@ var VADStateMachine = class {
       enabled: config?.enabled ?? true,
       pluginName: config?.pluginName ?? "energy-vad",
       // Voice-optimized defaults
-      startThreshold: config?.startThreshold ?? 0.6,
+      startThreshold: config?.startThreshold ?? 0.8,
       // Higher threshold to avoid noise
-      stopThreshold: config?.stopThreshold ?? 0.45,
+      stopThreshold: config?.stopThreshold ?? 0.3,
       // Balanced for voice
-      hangoverMs: config?.hangoverMs ?? 400,
+      hangoverMs: config?.hangoverMs ?? 300,
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
@@ -355,10 +373,11 @@ var VADStateMachine = class {
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
-        minSNR: config?.energyVad?.minSNR ?? 2,
-        snrRange: config?.energyVad?.snrRange ?? 8
+        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        minSNR: config?.energyVad?.minSNR ?? 10,
+        snrRange: config?.energyVad?.snrRange ?? 10,
+        minEnergy: config?.energyVad?.minEnergy ?? 5e-4
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/index.mjs CHANGED Viewed

@@ -1,13 +1,13 @@
 import "./chunk-WBQAMGXK.mjs";
 import {
   attachProcessingToTrack
-} from "./chunk-TWQJGBBU.mjs";
+} from "./chunk-Y6IG7XGC.mjs";
 import {
   createAudioPipeline
-} from "./chunk-DF4AYGHJ.mjs";
+} from "./chunk-2UPI6VWY.mjs";
 import {
   VADStateMachine
-} from "./chunk-N553RHTI.mjs";
+} from "./chunk-XHMNP7NC.mjs";
 import {
   closeAudioContext,
   getAudioContext,
@@ -21,13 +21,13 @@ import {
   getVADPlugin,
   registerNoiseSuppressionPlugin,
   registerVADPlugin
-} from "./chunk-ZCC7ID7L.mjs";
+} from "./chunk-FOGC2MFA.mjs";
 import {
   RNNoisePlugin
 } from "./chunk-XO6B3D4A.mjs";
 import {
   EnergyVADPlugin
-} from "./chunk-TLPO52HV.mjs";
+} from "./chunk-3A2CTC4K.mjs";
 export {
   EnergyVADPlugin,
   RNNoisePlugin,

package/dist/livekit/integration.js CHANGED Viewed

@@ -127,10 +127,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
-  const minSNR = energyParams.minSNR ?? 2;
-  const snrRange = energyParams.snrRange ?? 8;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
+  const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -142,6 +143,7 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
+    this.minEnergy = ${minEnergy};
     this.isSpeaking = false;
     this.port.onmessage = (event) => {
@@ -156,44 +158,60 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     if (!input || !input.length) return true;
     const channel = input[0];
-    // Calculate RMS (Root Mean Square) energy
+    // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
     for (let i = 0; i < channel.length; i++) {
       sum += channel[i] * channel[i];
     }
-    const rms = Math.sqrt(sum / channel.length);
+    const instantRms = Math.sqrt(sum / channel.length);
-    // Adaptive noise floor estimation - ONLY during silence
-    // This prevents the noise floor from rising during speech
-    if (!this.isSpeaking) {
-      if (rms < this.noiseFloor) {
-        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
-      } else {
-        // Even during silence, if we detect a loud signal, adapt very slowly
-        // This could be brief noise we haven't classified as speech yet
-        // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
-        // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
-        // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
-        const instantSnr = rms / (this.noiseFloor + 1e-6);
-        if (instantSnr < 3.0) {
-          this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
-        }
+    // Smooth the RMS energy to reduce jitter
+    // this.energy acts as the smoothed RMS value
+    this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
+    // Adaptive noise floor estimation
+    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    if (instantRms < this.noiseFloor) {
+      // If signal is quieter than noise floor, adapt downwards quickly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else {
+      // If signal is louder, adapt upwards
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
       }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
+      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
-    // During speech, freeze the noise floor to maintain consistent detection
-    // Calculate Signal-to-Noise Ratio (SNR)
-    const snr = rms / (this.noiseFloor + 1e-6);
+    // Ensure noise floor doesn't drop to absolute zero
+    this.noiseFloor = Math.max(this.noiseFloor, 1e-5);
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    const snr = this.energy / (this.noiseFloor + 1e-6);
+    const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-    // Map SNR to probability (0-1)
-    // Probability is 0 when SNR <= minSNR
+    // Map SNR dB to probability (0-1)
+    // Probability is 0 when snrDb <= minSNR
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
-    // Probability is 1 when SNR >= (minSNR + snrRange)
-    const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
+    let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
+    // Apply absolute energy threshold
+    if (this.energy < this.minEnergy) {
+      probability = 0;
+    }
-    this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
+    this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
     return true;
   }
@@ -305,11 +323,11 @@ var VADStateMachine = class {
       enabled: config?.enabled ?? true,
       pluginName: config?.pluginName ?? "energy-vad",
       // Voice-optimized defaults
-      startThreshold: config?.startThreshold ?? 0.6,
+      startThreshold: config?.startThreshold ?? 0.8,
       // Higher threshold to avoid noise
-      stopThreshold: config?.stopThreshold ?? 0.45,
+      stopThreshold: config?.stopThreshold ?? 0.3,
       // Balanced for voice
-      hangoverMs: config?.hangoverMs ?? 400,
+      hangoverMs: config?.hangoverMs ?? 300,
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
@@ -318,10 +336,11 @@ var VADStateMachine = class {
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
-        minSNR: config?.energyVad?.minSNR ?? 2,
-        snrRange: config?.energyVad?.snrRange ?? 8
+        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        minSNR: config?.energyVad?.minSNR ?? 10,
+        snrRange: config?.energyVad?.snrRange ?? 10,
+        minEnergy: config?.energyVad?.minEnergy ?? 5e-4
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/livekit/integration.mjs CHANGED Viewed

@@ -1,12 +1,12 @@
 import {
   attachProcessingToTrack
-} from "../chunk-TWQJGBBU.mjs";
-import "../chunk-DF4AYGHJ.mjs";
-import "../chunk-N553RHTI.mjs";
+} from "../chunk-Y6IG7XGC.mjs";
+import "../chunk-2UPI6VWY.mjs";
+import "../chunk-XHMNP7NC.mjs";
 import "../chunk-OZ7KMC4S.mjs";
-import "../chunk-ZCC7ID7L.mjs";
+import "../chunk-FOGC2MFA.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-TLPO52HV.mjs";
+import "../chunk-3A2CTC4K.mjs";
 export {
   attachProcessingToTrack
 };

package/dist/pipeline/audio-pipeline.js CHANGED Viewed

@@ -125,10 +125,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
-  const minSNR = energyParams.minSNR ?? 2;
-  const snrRange = energyParams.snrRange ?? 8;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
+  const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -140,6 +141,7 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
+    this.minEnergy = ${minEnergy};
     this.isSpeaking = false;
     this.port.onmessage = (event) => {
@@ -154,44 +156,60 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     if (!input || !input.length) return true;
     const channel = input[0];
-    // Calculate RMS (Root Mean Square) energy
+    // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
     for (let i = 0; i < channel.length; i++) {
       sum += channel[i] * channel[i];
     }
-    const rms = Math.sqrt(sum / channel.length);
+    const instantRms = Math.sqrt(sum / channel.length);
-    // Adaptive noise floor estimation - ONLY during silence
-    // This prevents the noise floor from rising during speech
-    if (!this.isSpeaking) {
-      if (rms < this.noiseFloor) {
-        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
-      } else {
-        // Even during silence, if we detect a loud signal, adapt very slowly
-        // This could be brief noise we haven't classified as speech yet
-        // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
-        // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
-        // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
-        const instantSnr = rms / (this.noiseFloor + 1e-6);
-        if (instantSnr < 3.0) {
-          this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
-        }
+    // Smooth the RMS energy to reduce jitter
+    // this.energy acts as the smoothed RMS value
+    this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
+    // Adaptive noise floor estimation
+    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    if (instantRms < this.noiseFloor) {
+      // If signal is quieter than noise floor, adapt downwards quickly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else {
+      // If signal is louder, adapt upwards
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
       }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
+      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
-    // During speech, freeze the noise floor to maintain consistent detection
-    // Calculate Signal-to-Noise Ratio (SNR)
-    const snr = rms / (this.noiseFloor + 1e-6);
+    // Ensure noise floor doesn't drop to absolute zero
+    this.noiseFloor = Math.max(this.noiseFloor, 1e-5);
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    const snr = this.energy / (this.noiseFloor + 1e-6);
+    const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-    // Map SNR to probability (0-1)
-    // Probability is 0 when SNR <= minSNR
+    // Map SNR dB to probability (0-1)
+    // Probability is 0 when snrDb <= minSNR
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
-    // Probability is 1 when SNR >= (minSNR + snrRange)
-    const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
+    let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
+    // Apply absolute energy threshold
+    if (this.energy < this.minEnergy) {
+      probability = 0;
+    }
-    this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
+    this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
     return true;
   }
@@ -303,11 +321,11 @@ var VADStateMachine = class {
       enabled: config?.enabled ?? true,
       pluginName: config?.pluginName ?? "energy-vad",
       // Voice-optimized defaults
-      startThreshold: config?.startThreshold ?? 0.6,
+      startThreshold: config?.startThreshold ?? 0.8,
       // Higher threshold to avoid noise
-      stopThreshold: config?.stopThreshold ?? 0.45,
+      stopThreshold: config?.stopThreshold ?? 0.3,
       // Balanced for voice
-      hangoverMs: config?.hangoverMs ?? 400,
+      hangoverMs: config?.hangoverMs ?? 300,
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
@@ -316,10 +334,11 @@ var VADStateMachine = class {
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
-        minSNR: config?.energyVad?.minSNR ?? 2,
-        snrRange: config?.energyVad?.snrRange ?? 8
+        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        minSNR: config?.energyVad?.minSNR ?? 10,
+        snrRange: config?.energyVad?.snrRange ?? 10,
+        minEnergy: config?.energyVad?.minEnergy ?? 5e-4
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/pipeline/audio-pipeline.mjs CHANGED Viewed

@@ -1,11 +1,11 @@
 import {
   createAudioPipeline
-} from "../chunk-DF4AYGHJ.mjs";
-import "../chunk-N553RHTI.mjs";
+} from "../chunk-2UPI6VWY.mjs";
+import "../chunk-XHMNP7NC.mjs";
 import "../chunk-OZ7KMC4S.mjs";
-import "../chunk-ZCC7ID7L.mjs";
+import "../chunk-FOGC2MFA.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-TLPO52HV.mjs";
+import "../chunk-3A2CTC4K.mjs";
 export {
   createAudioPipeline
 };

package/dist/types.d.mts CHANGED Viewed

@@ -43,7 +43,7 @@ interface AudioProcessingConfig {
          * When VAD probability rises above this, audio is unmuted.
          * Lower = more sensitive (catches quiet speech, may include noise)
          * Higher = less sensitive (only confident speech, may clip quiet parts)
-         * Default: 0.6 (optimized for voice-only)
+         * Default: 0.8 (aggressive noise rejection)
          */
         startThreshold?: number;
         /**
@@ -51,7 +51,7 @@ interface AudioProcessingConfig {
          * When VAD probability drops below this (after hangover), audio is muted.
          * Lower = keeps audio on longer (less aggressive gating)
          * Higher = mutes faster (more aggressive noise suppression)
-         * Default: 0.45 (balanced voice detection)
+         * Default: 0.3 (wide hysteresis for stability)
          */
         stopThreshold?: number;
         /**
@@ -59,7 +59,7 @@ interface AudioProcessingConfig {
          * Prevents rapid on/off toggling during pauses.
          * Lower = more aggressive gating, may clip between words
          * Higher = smoother but may let trailing noise through
-         * Default: 400ms (optimized for natural speech)
+         * Default: 300ms
          */
         hangoverMs?: number;
         /**
@@ -97,24 +97,30 @@ interface AudioProcessingConfig {
             initialNoiseFloor?: number;
             /**
              * Rate at which noise floor adapts to quiet signals (0-1).
-             * Default: 0.01
+             * Default: 0.05
              */
             noiseFloorAdaptRateQuiet?: number;
             /**
              * Rate at which noise floor adapts to loud signals (0-1).
-             * Default: 0.001 (slower adaptation for speech)
+             * Default: 0.01 (faster tracking of rising noise)
              */
             noiseFloorAdaptRateLoud?: number;
             /**
-             * Minimum SNR (Signal-to-Noise Ratio) for speech detection.
-             * Default: 2.0 (voice is 2x louder than noise floor)
+             * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
+             * Default: 10.0 (more aggressive noise rejection)
              */
             minSNR?: number;
             /**
-             * SNR range for probability scaling.
-             * Default: 8.0 (probability scales from minSNR to minSNR+snrRange)
+             * SNR range in dB for probability scaling.
+             * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
              */
             snrRange?: number;
+            /**
+             * Minimum absolute RMS energy to consider as speech.
+             * Prevents triggering on very quiet background noise in silent rooms.
+             * Default: 0.0005
+             */
+            minEnergy?: number;
         };
     };
     /**

package/dist/types.d.ts CHANGED Viewed

@@ -43,7 +43,7 @@ interface AudioProcessingConfig {
          * When VAD probability rises above this, audio is unmuted.
          * Lower = more sensitive (catches quiet speech, may include noise)
          * Higher = less sensitive (only confident speech, may clip quiet parts)
-         * Default: 0.6 (optimized for voice-only)
+         * Default: 0.8 (aggressive noise rejection)
          */
         startThreshold?: number;
         /**
@@ -51,7 +51,7 @@ interface AudioProcessingConfig {
          * When VAD probability drops below this (after hangover), audio is muted.
          * Lower = keeps audio on longer (less aggressive gating)
          * Higher = mutes faster (more aggressive noise suppression)
-         * Default: 0.45 (balanced voice detection)
+         * Default: 0.3 (wide hysteresis for stability)
          */
         stopThreshold?: number;
         /**
@@ -59,7 +59,7 @@ interface AudioProcessingConfig {
          * Prevents rapid on/off toggling during pauses.
          * Lower = more aggressive gating, may clip between words
          * Higher = smoother but may let trailing noise through
-         * Default: 400ms (optimized for natural speech)
+         * Default: 300ms
          */
         hangoverMs?: number;
         /**
@@ -97,24 +97,30 @@ interface AudioProcessingConfig {
             initialNoiseFloor?: number;
             /**
              * Rate at which noise floor adapts to quiet signals (0-1).
-             * Default: 0.01
+             * Default: 0.05
              */
             noiseFloorAdaptRateQuiet?: number;
             /**
              * Rate at which noise floor adapts to loud signals (0-1).
-             * Default: 0.001 (slower adaptation for speech)
+             * Default: 0.01 (faster tracking of rising noise)
              */
             noiseFloorAdaptRateLoud?: number;
             /**
-             * Minimum SNR (Signal-to-Noise Ratio) for speech detection.
-             * Default: 2.0 (voice is 2x louder than noise floor)
+             * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
+             * Default: 10.0 (more aggressive noise rejection)
              */
             minSNR?: number;
             /**
-             * SNR range for probability scaling.
-             * Default: 8.0 (probability scales from minSNR to minSNR+snrRange)
+             * SNR range in dB for probability scaling.
+             * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
              */
             snrRange?: number;
+            /**
+             * Minimum absolute RMS energy to consider as speech.
+             * Prevents triggering on very quiet background noise in silent rooms.
+             * Default: 0.0005
+             */
+            minEnergy?: number;
         };
     };
     /**

package/dist/vad/vad-node.js CHANGED Viewed

@@ -27,10 +27,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
-  const minSNR = energyParams.minSNR ?? 2;
-  const snrRange = energyParams.snrRange ?? 8;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
+  const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -42,6 +43,7 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
+    this.minEnergy = ${minEnergy};
     this.isSpeaking = false;
     this.port.onmessage = (event) => {
@@ -56,44 +58,60 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     if (!input || !input.length) return true;
     const channel = input[0];
-    // Calculate RMS (Root Mean Square) energy
+    // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
     for (let i = 0; i < channel.length; i++) {
       sum += channel[i] * channel[i];
     }
-    const rms = Math.sqrt(sum / channel.length);
+    const instantRms = Math.sqrt(sum / channel.length);
-    // Adaptive noise floor estimation - ONLY during silence
-    // This prevents the noise floor from rising during speech
-    if (!this.isSpeaking) {
-      if (rms < this.noiseFloor) {
-        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
-      } else {
-        // Even during silence, if we detect a loud signal, adapt very slowly
-        // This could be brief noise we haven't classified as speech yet
-        // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
-        // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
-        // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
-        const instantSnr = rms / (this.noiseFloor + 1e-6);
-        if (instantSnr < 3.0) {
-          this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
-        }
+    // Smooth the RMS energy to reduce jitter
+    // this.energy acts as the smoothed RMS value
+    this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
+    // Adaptive noise floor estimation
+    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    if (instantRms < this.noiseFloor) {
+      // If signal is quieter than noise floor, adapt downwards quickly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else {
+      // If signal is louder, adapt upwards
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
       }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
+      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
-    // During speech, freeze the noise floor to maintain consistent detection
-    // Calculate Signal-to-Noise Ratio (SNR)
-    const snr = rms / (this.noiseFloor + 1e-6);
+    // Ensure noise floor doesn't drop to absolute zero
+    this.noiseFloor = Math.max(this.noiseFloor, 1e-5);
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    const snr = this.energy / (this.noiseFloor + 1e-6);
+    const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-    // Map SNR to probability (0-1)
-    // Probability is 0 when SNR <= minSNR
+    // Map SNR dB to probability (0-1)
+    // Probability is 0 when snrDb <= minSNR
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
-    // Probability is 1 when SNR >= (minSNR + snrRange)
-    const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
+    let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
+    // Apply absolute energy threshold
+    if (this.energy < this.minEnergy) {
+      probability = 0;
+    }
-    this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
+    this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
     return true;
   }

package/dist/vad/vad-node.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   EnergyVADPlugin
-} from "../chunk-TLPO52HV.mjs";
+} from "../chunk-3A2CTC4K.mjs";
 export {
   EnergyVADPlugin
 };

package/dist/vad/vad-state.js CHANGED Viewed

@@ -36,11 +36,11 @@ var VADStateMachine = class {
       enabled: config?.enabled ?? true,
       pluginName: config?.pluginName ?? "energy-vad",
       // Voice-optimized defaults
-      startThreshold: config?.startThreshold ?? 0.6,
+      startThreshold: config?.startThreshold ?? 0.8,
       // Higher threshold to avoid noise
-      stopThreshold: config?.stopThreshold ?? 0.45,
+      stopThreshold: config?.stopThreshold ?? 0.3,
       // Balanced for voice
-      hangoverMs: config?.hangoverMs ?? 400,
+      hangoverMs: config?.hangoverMs ?? 300,
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
@@ -49,10 +49,11 @@ var VADStateMachine = class {
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
-        minSNR: config?.energyVad?.minSNR ?? 2,
-        snrRange: config?.energyVad?.snrRange ?? 8
+        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        minSNR: config?.energyVad?.minSNR ?? 10,
+        snrRange: config?.energyVad?.snrRange ?? 10,
+        minEnergy: config?.energyVad?.minEnergy ?? 5e-4
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/vad/vad-state.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   VADStateMachine
-} from "../chunk-N553RHTI.mjs";
+} from "../chunk-XHMNP7NC.mjs";
 export {
   VADStateMachine
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tensamin/audio",
-  "version": "0.1.6",
+  "version": "0.1.8",
   "main": "dist/index.js",
   "module": "dist/index.mjs",
   "types": "dist/index.d.ts",