npm - @tensamin/audio - Versions diffs - 0.1.11 → 0.1.13 - Mend

@tensamin/audio 0.1.11 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +2 -2
package/dist/{chunk-GFLVGUTU.mjs → chunk-DLLK6K76.mjs} +15 -7
package/dist/{chunk-B36JBXOK.mjs → chunk-FKR6NWZF.mjs} +18 -22
package/dist/{chunk-RLZVZ6D6.mjs → chunk-K6X52R7N.mjs} +1 -1
package/dist/{chunk-3I4OQD2L.mjs → chunk-OXV7BHX5.mjs} +1 -1
package/dist/{chunk-I5AR7XQD.mjs → chunk-RD4GDIPO.mjs} +2 -2
package/dist/extensibility/plugins.js +18 -22
package/dist/extensibility/plugins.mjs +2 -2
package/dist/index.js +33 -29
package/dist/index.mjs +5 -5
package/dist/livekit/integration.js +33 -29
package/dist/livekit/integration.mjs +5 -5
package/dist/pipeline/audio-pipeline.js +33 -29
package/dist/pipeline/audio-pipeline.mjs +4 -4
package/dist/types.d.mts +3 -2
package/dist/types.d.ts +3 -2
package/dist/vad/vad-node.js +18 -22
package/dist/vad/vad-node.mjs +1 -1
package/dist/vad/vad-state.js +15 -7
package/dist/vad/vad-state.mjs +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -101,8 +101,8 @@ vad: {
   energyVad?: {
     smoothing: number;                 // Default: 0.95
     initialNoiseFloor: number;         // Default: 0.001
-    noiseFloorAdaptRateQuiet: number;  // Default: 0.005
-    noiseFloorAdaptRateLoud: number;   // Default: 0.01
+    noiseFloorAdaptRateQuiet: number;  // Default: 0.01
+    noiseFloorAdaptRateLoud: number;   // Default: 0.1
     minSNR: number;                    // Default: 10.0 (dB)
     snrRange: number;                  // Default: 10.0 (dB)
     minEnergy: number;                 // Default: 0.001

package/dist/{chunk-GFLVGUTU.mjs → chunk-DLLK6K76.mjs} RENAMED Viewed

@@ -27,7 +27,7 @@ var VADStateMachine = class {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
         noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
         minSNR: config?.energyVad?.minSNR ?? 10,
         snrRange: config?.energyVad?.snrRange ?? 10,
         minEnergy: config?.energyVad?.minEnergy ?? 1e-3
@@ -61,25 +61,33 @@ var VADStateMachine = class {
         newState = "silent";
         this.lastSilenceTime = timestamp;
       }
-    } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
+    } else if (this.currentState === "speech_starting") {
+      if (probability >= stopThreshold) {
+        const speechDuration = timestamp - this.speechStartTime;
+        if (speechDuration >= minSpeechDurationMs) {
+          newState = "speaking";
+        } else {
+          newState = "speech_starting";
+        }
+        this.lastSpeechTime = timestamp;
+      } else {
+        newState = "silent";
+        this.lastSilenceTime = timestamp;
+      }
+    } else if (this.currentState === "speaking") {
       if (probability >= stopThreshold) {
         newState = "speaking";
         this.lastSpeechTime = timestamp;
       } else {
         const timeSinceSpeech = timestamp - this.lastSpeechTime;
-        const speechDuration = timestamp - this.speechStartTime;
         if (timeSinceSpeech < hangoverMs) {
           newState = "speaking";
-        } else if (speechDuration < minSpeechDurationMs) {
-          newState = "silent";
-          this.lastSilenceTime = timestamp;
         } else {
           newState = "speech_ending";
           this.lastSilenceTime = timestamp;
         }
       }
     }
-    if (newState === "speech_starting") newState = "speaking";
     if (newState === "speech_ending") newState = "silent";
     this.currentState = newState;
     return {

package/dist/{chunk-B36JBXOK.mjs → chunk-FKR6NWZF.mjs} RENAMED Viewed

@@ -3,8 +3,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
   const minSNR = energyParams.minSNR ?? 10;
   const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 1e-3;
@@ -46,35 +46,31 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
     // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    // We use a TWO-PASS approach to avoid circular dependencies:
+    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
+    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
+    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
+    // Adapt the noise floor based on instantaneous SNR
     if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
+      // Signal is quieter than noise floor, adapt downwards quickly
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else if (instantSnrDb < 12) {
+      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
+      // Adapt upwards at normal rate to track rising noise
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.01;
-      } else if (snrDb > 20) {
-        multiplier = 0.1;
-      }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
+      // Signal has high SNR (>= 12dB) - likely speech or transient
+      // Adapt VERY slowly to avoid "chasing" speech
+      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
+      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
     // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
     this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));

package/dist/{chunk-RLZVZ6D6.mjs → chunk-K6X52R7N.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   createAudioPipeline
-} from "./chunk-I5AR7XQD.mjs";
+} from "./chunk-RD4GDIPO.mjs";
 // src/livekit/integration.ts
 async function attachProcessingToTrack(track, config = {}) {

package/dist/{chunk-3I4OQD2L.mjs → chunk-OXV7BHX5.mjs} RENAMED Viewed

@@ -3,7 +3,7 @@ import {
 } from "./chunk-XO6B3D4A.mjs";
 import {
   EnergyVADPlugin
-} from "./chunk-B36JBXOK.mjs";
+} from "./chunk-FKR6NWZF.mjs";
 // src/extensibility/plugins.ts
 var nsPlugins = /* @__PURE__ */ new Map();

package/dist/{chunk-I5AR7XQD.mjs → chunk-RD4GDIPO.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   VADStateMachine
-} from "./chunk-GFLVGUTU.mjs";
+} from "./chunk-DLLK6K76.mjs";
 import {
   getAudioContext,
   registerPipeline,
@@ -9,7 +9,7 @@ import {
 import {
   getNoiseSuppressionPlugin,
   getVADPlugin
-} from "./chunk-3I4OQD2L.mjs";
+} from "./chunk-OXV7BHX5.mjs";
 // src/pipeline/audio-pipeline.ts
 import mitt from "mitt";

package/dist/extensibility/plugins.js CHANGED Viewed

@@ -106,8 +106,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
   const minSNR = energyParams.minSNR ?? 10;
   const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 1e-3;
@@ -149,35 +149,31 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
     // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    // We use a TWO-PASS approach to avoid circular dependencies:
+    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
+    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
+    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
+    // Adapt the noise floor based on instantaneous SNR
     if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
+      // Signal is quieter than noise floor, adapt downwards quickly
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else if (instantSnrDb < 12) {
+      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
+      // Adapt upwards at normal rate to track rising noise
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.01;
-      } else if (snrDb > 20) {
-        multiplier = 0.1;
-      }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
+      // Signal has high SNR (>= 12dB) - likely speech or transient
+      // Adapt VERY slowly to avoid "chasing" speech
+      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
+      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
     // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
     this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));

package/dist/extensibility/plugins.mjs CHANGED Viewed

@@ -3,9 +3,9 @@ import {
   getVADPlugin,
   registerNoiseSuppressionPlugin,
   registerVADPlugin
-} from "../chunk-3I4OQD2L.mjs";
+} from "../chunk-OXV7BHX5.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-B36JBXOK.mjs";
+import "../chunk-FKR6NWZF.mjs";
 export {
   getNoiseSuppressionPlugin,
   getVADPlugin,

package/dist/index.js CHANGED Viewed

@@ -158,8 +158,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
   const minSNR = energyParams.minSNR ?? 10;
   const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 1e-3;
@@ -201,35 +201,31 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
     // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    // We use a TWO-PASS approach to avoid circular dependencies:
+    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
+    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
+    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
+    // Adapt the noise floor based on instantaneous SNR
     if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
+      // Signal is quieter than noise floor, adapt downwards quickly
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else if (instantSnrDb < 12) {
+      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
+      // Adapt upwards at normal rate to track rising noise
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.01;
-      } else if (snrDb > 20) {
-        multiplier = 0.1;
-      }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
+      // Signal has high SNR (>= 12dB) - likely speech or transient
+      // Adapt VERY slowly to avoid "chasing" speech
+      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
+      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
     // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
     this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -378,7 +374,7 @@ var VADStateMachine = class {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
         noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
         minSNR: config?.energyVad?.minSNR ?? 10,
         snrRange: config?.energyVad?.snrRange ?? 10,
         minEnergy: config?.energyVad?.minEnergy ?? 1e-3
@@ -412,25 +408,33 @@ var VADStateMachine = class {
         newState = "silent";
         this.lastSilenceTime = timestamp;
       }
-    } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
+    } else if (this.currentState === "speech_starting") {
+      if (probability >= stopThreshold) {
+        const speechDuration = timestamp - this.speechStartTime;
+        if (speechDuration >= minSpeechDurationMs) {
+          newState = "speaking";
+        } else {
+          newState = "speech_starting";
+        }
+        this.lastSpeechTime = timestamp;
+      } else {
+        newState = "silent";
+        this.lastSilenceTime = timestamp;
+      }
+    } else if (this.currentState === "speaking") {
       if (probability >= stopThreshold) {
         newState = "speaking";
         this.lastSpeechTime = timestamp;
       } else {
         const timeSinceSpeech = timestamp - this.lastSpeechTime;
-        const speechDuration = timestamp - this.speechStartTime;
         if (timeSinceSpeech < hangoverMs) {
           newState = "speaking";
-        } else if (speechDuration < minSpeechDurationMs) {
-          newState = "silent";
-          this.lastSilenceTime = timestamp;
         } else {
           newState = "speech_ending";
           this.lastSilenceTime = timestamp;
         }
       }
     }
-    if (newState === "speech_starting") newState = "speaking";
     if (newState === "speech_ending") newState = "silent";
     this.currentState = newState;
     return {

package/dist/index.mjs CHANGED Viewed

@@ -1,13 +1,13 @@
 import "./chunk-WBQAMGXK.mjs";
 import {
   attachProcessingToTrack
-} from "./chunk-RLZVZ6D6.mjs";
+} from "./chunk-K6X52R7N.mjs";
 import {
   createAudioPipeline
-} from "./chunk-I5AR7XQD.mjs";
+} from "./chunk-RD4GDIPO.mjs";
 import {
   VADStateMachine
-} from "./chunk-GFLVGUTU.mjs";
+} from "./chunk-DLLK6K76.mjs";
 import {
   closeAudioContext,
   getAudioContext,
@@ -21,13 +21,13 @@ import {
   getVADPlugin,
   registerNoiseSuppressionPlugin,
   registerVADPlugin
-} from "./chunk-3I4OQD2L.mjs";
+} from "./chunk-OXV7BHX5.mjs";
 import {
   RNNoisePlugin
 } from "./chunk-XO6B3D4A.mjs";
 import {
   EnergyVADPlugin
-} from "./chunk-B36JBXOK.mjs";
+} from "./chunk-FKR6NWZF.mjs";
 export {
   EnergyVADPlugin,
   RNNoisePlugin,

package/dist/livekit/integration.js CHANGED Viewed

@@ -127,8 +127,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
   const minSNR = energyParams.minSNR ?? 10;
   const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 1e-3;
@@ -170,35 +170,31 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
     // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    // We use a TWO-PASS approach to avoid circular dependencies:
+    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
+    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
+    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
+    // Adapt the noise floor based on instantaneous SNR
     if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
+      // Signal is quieter than noise floor, adapt downwards quickly
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else if (instantSnrDb < 12) {
+      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
+      // Adapt upwards at normal rate to track rising noise
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.01;
-      } else if (snrDb > 20) {
-        multiplier = 0.1;
-      }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
+      // Signal has high SNR (>= 12dB) - likely speech or transient
+      // Adapt VERY slowly to avoid "chasing" speech
+      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
+      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
     // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
     this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -341,7 +337,7 @@ var VADStateMachine = class {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
         noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
         minSNR: config?.energyVad?.minSNR ?? 10,
         snrRange: config?.energyVad?.snrRange ?? 10,
         minEnergy: config?.energyVad?.minEnergy ?? 1e-3
@@ -375,25 +371,33 @@ var VADStateMachine = class {
         newState = "silent";
         this.lastSilenceTime = timestamp;
       }
-    } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
+    } else if (this.currentState === "speech_starting") {
+      if (probability >= stopThreshold) {
+        const speechDuration = timestamp - this.speechStartTime;
+        if (speechDuration >= minSpeechDurationMs) {
+          newState = "speaking";
+        } else {
+          newState = "speech_starting";
+        }
+        this.lastSpeechTime = timestamp;
+      } else {
+        newState = "silent";
+        this.lastSilenceTime = timestamp;
+      }
+    } else if (this.currentState === "speaking") {
       if (probability >= stopThreshold) {
         newState = "speaking";
         this.lastSpeechTime = timestamp;
       } else {
         const timeSinceSpeech = timestamp - this.lastSpeechTime;
-        const speechDuration = timestamp - this.speechStartTime;
         if (timeSinceSpeech < hangoverMs) {
           newState = "speaking";
-        } else if (speechDuration < minSpeechDurationMs) {
-          newState = "silent";
-          this.lastSilenceTime = timestamp;
         } else {
           newState = "speech_ending";
           this.lastSilenceTime = timestamp;
         }
       }
     }
-    if (newState === "speech_starting") newState = "speaking";
     if (newState === "speech_ending") newState = "silent";
     this.currentState = newState;
     return {

package/dist/livekit/integration.mjs CHANGED Viewed

@@ -1,12 +1,12 @@
 import {
   attachProcessingToTrack
-} from "../chunk-RLZVZ6D6.mjs";
-import "../chunk-I5AR7XQD.mjs";
-import "../chunk-GFLVGUTU.mjs";
+} from "../chunk-K6X52R7N.mjs";
+import "../chunk-RD4GDIPO.mjs";
+import "../chunk-DLLK6K76.mjs";
 import "../chunk-OZ7KMC4S.mjs";
-import "../chunk-3I4OQD2L.mjs";
+import "../chunk-OXV7BHX5.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-B36JBXOK.mjs";
+import "../chunk-FKR6NWZF.mjs";
 export {
   attachProcessingToTrack
 };

package/dist/pipeline/audio-pipeline.js CHANGED Viewed

@@ -125,8 +125,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
   const minSNR = energyParams.minSNR ?? 10;
   const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 1e-3;
@@ -168,35 +168,31 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
     // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    // We use a TWO-PASS approach to avoid circular dependencies:
+    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
+    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
+    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
+    // Adapt the noise floor based on instantaneous SNR
     if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
+      // Signal is quieter than noise floor, adapt downwards quickly
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else if (instantSnrDb < 12) {
+      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
+      // Adapt upwards at normal rate to track rising noise
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.01;
-      } else if (snrDb > 20) {
-        multiplier = 0.1;
-      }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
+      // Signal has high SNR (>= 12dB) - likely speech or transient
+      // Adapt VERY slowly to avoid "chasing" speech
+      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
+      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
     // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
     this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -339,7 +335,7 @@ var VADStateMachine = class {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
         noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
         minSNR: config?.energyVad?.minSNR ?? 10,
         snrRange: config?.energyVad?.snrRange ?? 10,
         minEnergy: config?.energyVad?.minEnergy ?? 1e-3
@@ -373,25 +369,33 @@ var VADStateMachine = class {
         newState = "silent";
         this.lastSilenceTime = timestamp;
       }
-    } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
+    } else if (this.currentState === "speech_starting") {
+      if (probability >= stopThreshold) {
+        const speechDuration = timestamp - this.speechStartTime;
+        if (speechDuration >= minSpeechDurationMs) {
+          newState = "speaking";
+        } else {
+          newState = "speech_starting";
+        }
+        this.lastSpeechTime = timestamp;
+      } else {
+        newState = "silent";
+        this.lastSilenceTime = timestamp;
+      }
+    } else if (this.currentState === "speaking") {
       if (probability >= stopThreshold) {
         newState = "speaking";
         this.lastSpeechTime = timestamp;
       } else {
         const timeSinceSpeech = timestamp - this.lastSpeechTime;
-        const speechDuration = timestamp - this.speechStartTime;
         if (timeSinceSpeech < hangoverMs) {
           newState = "speaking";
-        } else if (speechDuration < minSpeechDurationMs) {
-          newState = "silent";
-          this.lastSilenceTime = timestamp;
         } else {
           newState = "speech_ending";
           this.lastSilenceTime = timestamp;
         }
       }
     }
-    if (newState === "speech_starting") newState = "speaking";
     if (newState === "speech_ending") newState = "silent";
     this.currentState = newState;
     return {

package/dist/pipeline/audio-pipeline.mjs CHANGED Viewed

@@ -1,11 +1,11 @@
 import {
   createAudioPipeline
-} from "../chunk-I5AR7XQD.mjs";
-import "../chunk-GFLVGUTU.mjs";
+} from "../chunk-RD4GDIPO.mjs";
+import "../chunk-DLLK6K76.mjs";
 import "../chunk-OZ7KMC4S.mjs";
-import "../chunk-3I4OQD2L.mjs";
+import "../chunk-OXV7BHX5.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-B36JBXOK.mjs";
+import "../chunk-FKR6NWZF.mjs";
 export {
   createAudioPipeline
 };

package/dist/types.d.mts CHANGED Viewed

@@ -97,12 +97,13 @@ interface AudioProcessingConfig {
             initialNoiseFloor?: number;
             /**
              * Rate at which noise floor adapts to quiet signals (0-1).
-             * Default: 0.005 (slower downward drift)
+             * Default: 0.01
              */
             noiseFloorAdaptRateQuiet?: number;
             /**
              * Rate at which noise floor adapts to loud signals (0-1).
-             * Default: 0.01
+             * Applied when instantaneous SNR < 12dB (background noise).
+             * Default: 0.1 (fast tracking of rising noise)
              */
             noiseFloorAdaptRateLoud?: number;
             /**

package/dist/types.d.ts CHANGED Viewed

@@ -97,12 +97,13 @@ interface AudioProcessingConfig {
             initialNoiseFloor?: number;
             /**
              * Rate at which noise floor adapts to quiet signals (0-1).
-             * Default: 0.005 (slower downward drift)
+             * Default: 0.01
              */
             noiseFloorAdaptRateQuiet?: number;
             /**
              * Rate at which noise floor adapts to loud signals (0-1).
-             * Default: 0.01
+             * Applied when instantaneous SNR < 12dB (background noise).
+             * Default: 0.1 (fast tracking of rising noise)
              */
             noiseFloorAdaptRateLoud?: number;
             /**

package/dist/vad/vad-node.js CHANGED Viewed

@@ -27,8 +27,8 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
   const minSNR = energyParams.minSNR ?? 10;
   const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 1e-3;
@@ -70,35 +70,31 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
     // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
+    // We use a TWO-PASS approach to avoid circular dependencies:
+    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
+    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
+    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
+    // Adapt the noise floor based on instantaneous SNR
     if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
+      // Signal is quieter than noise floor, adapt downwards quickly
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    } else if (instantSnrDb < 12) {
+      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
+      // Adapt upwards at normal rate to track rising noise
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.01;
-      } else if (snrDb > 20) {
-        multiplier = 0.1;
-      }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
+      // Signal has high SNR (>= 12dB) - likely speech or transient
+      // Adapt VERY slowly to avoid "chasing" speech
+      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
+      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
     // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
     this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));

package/dist/vad/vad-node.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   EnergyVADPlugin
-} from "../chunk-B36JBXOK.mjs";
+} from "../chunk-FKR6NWZF.mjs";
 export {
   EnergyVADPlugin
 };

package/dist/vad/vad-state.js CHANGED Viewed

@@ -51,7 +51,7 @@ var VADStateMachine = class {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
         noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
         minSNR: config?.energyVad?.minSNR ?? 10,
         snrRange: config?.energyVad?.snrRange ?? 10,
         minEnergy: config?.energyVad?.minEnergy ?? 1e-3
@@ -85,25 +85,33 @@ var VADStateMachine = class {
         newState = "silent";
         this.lastSilenceTime = timestamp;
       }
-    } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
+    } else if (this.currentState === "speech_starting") {
+      if (probability >= stopThreshold) {
+        const speechDuration = timestamp - this.speechStartTime;
+        if (speechDuration >= minSpeechDurationMs) {
+          newState = "speaking";
+        } else {
+          newState = "speech_starting";
+        }
+        this.lastSpeechTime = timestamp;
+      } else {
+        newState = "silent";
+        this.lastSilenceTime = timestamp;
+      }
+    } else if (this.currentState === "speaking") {
       if (probability >= stopThreshold) {
         newState = "speaking";
         this.lastSpeechTime = timestamp;
       } else {
         const timeSinceSpeech = timestamp - this.lastSpeechTime;
-        const speechDuration = timestamp - this.speechStartTime;
         if (timeSinceSpeech < hangoverMs) {
           newState = "speaking";
-        } else if (speechDuration < minSpeechDurationMs) {
-          newState = "silent";
-          this.lastSilenceTime = timestamp;
         } else {
           newState = "speech_ending";
           this.lastSilenceTime = timestamp;
         }
       }
     }
-    if (newState === "speech_starting") newState = "speaking";
     if (newState === "speech_ending") newState = "silent";
     this.currentState = newState;
     return {

package/dist/vad/vad-state.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   VADStateMachine
-} from "../chunk-GFLVGUTU.mjs";
+} from "../chunk-DLLK6K76.mjs";
 export {
   VADStateMachine
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tensamin/audio",
-  "version": "0.1.11",
+  "version": "0.1.13",
   "main": "dist/index.js",
   "module": "dist/index.mjs",
   "types": "dist/index.d.ts",