npm - @tensamin/audio - Versions diffs - 0.1.7 → 0.1.9 - Mend

@tensamin/audio 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +6 -6
package/dist/{chunk-2TKYGFMC.mjs → chunk-3A2CTC4K.mjs} +18 -6
package/dist/{chunk-JP6DA62Y.mjs → chunk-E7NH2QKZ.mjs} +2 -2
package/dist/{chunk-BMVZ3KKG.mjs → chunk-FOGC2MFA.mjs} +1 -1
package/dist/{chunk-2EX3FXSF.mjs → chunk-KGCEV2VT.mjs} +8 -7
package/dist/{chunk-UQG6Z5W3.mjs → chunk-ZISGHJDU.mjs} +1 -1
package/dist/extensibility/plugins.js +18 -6
package/dist/extensibility/plugins.mjs +2 -2
package/dist/index.js +26 -13
package/dist/index.mjs +5 -5
package/dist/livekit/integration.js +26 -13
package/dist/livekit/integration.mjs +5 -5
package/dist/pipeline/audio-pipeline.js +26 -13
package/dist/pipeline/audio-pipeline.mjs +4 -4
package/dist/types.d.mts +8 -8
package/dist/types.d.ts +8 -8
package/dist/vad/vad-node.js +18 -6
package/dist/vad/vad-node.mjs +1 -1
package/dist/vad/vad-state.js +8 -7
package/dist/vad/vad-state.mjs +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -102,9 +102,9 @@ vad: {
     smoothing: number;                 // Default: 0.95
     initialNoiseFloor: number;         // Default: 0.001
     noiseFloorAdaptRateQuiet: number;  // Default: 0.05
-    noiseFloorAdaptRateLoud: number;   // Default: 0.005
-    minSNR: number;                    // Default: 6.0 (dB)
-    snrRange: number;                  // Default: 12.0 (dB)
+    noiseFloorAdaptRateLoud: number;   // Default: 0.01
+    minSNR: number;                    // Default: 10.0 (dB)
+    snrRange: number;                  // Default: 10.0 (dB)
     minEnergy: number;                 // Default: 0.0005
   };
 }
@@ -112,9 +112,9 @@ vad: {
 **Threshold Parameters:**
-- `startThreshold`: Probability threshold to unmute audio (Default: 0.6, ~13.2dB SNR)
-- `stopThreshold`: Probability threshold to mute audio (Default: 0.45, ~11.4dB SNR)
-- `hangoverMs`: Delay before muting after speech stops
+- `startThreshold`: Probability threshold to unmute audio (Default: 0.8, ~18dB SNR)
+- `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
+- `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
 - `preRollMs`: Audio buffer duration before speech onset
 - `minSpeechDurationMs`: Minimum duration to consider as valid speech
 - `minSilenceDurationMs`: Minimum silence duration between speech segments

package/dist/{chunk-2TKYGFMC.mjs → chunk-3A2CTC4K.mjs} RENAMED Viewed

@@ -4,9 +4,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
   const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
-  const minSNR = energyParams.minSNR ?? 6;
-  const snrRange = energyParams.snrRange ?? 12;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -52,9 +52,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
     } else {
       // If signal is louder, adapt upwards
-      // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
-      // If we are silent, adapt at the normal loud rate
-      const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
+      }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
       this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }

package/dist/{chunk-JP6DA62Y.mjs → chunk-E7NH2QKZ.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   VADStateMachine
-} from "./chunk-2EX3FXSF.mjs";
+} from "./chunk-KGCEV2VT.mjs";
 import {
   getAudioContext,
   registerPipeline,
@@ -9,7 +9,7 @@ import {
 import {
   getNoiseSuppressionPlugin,
   getVADPlugin
-} from "./chunk-BMVZ3KKG.mjs";
+} from "./chunk-FOGC2MFA.mjs";
 // src/pipeline/audio-pipeline.ts
 import mitt from "mitt";

package/dist/{chunk-BMVZ3KKG.mjs → chunk-FOGC2MFA.mjs} RENAMED Viewed

@@ -3,7 +3,7 @@ import {
 } from "./chunk-XO6B3D4A.mjs";
 import {
   EnergyVADPlugin
-} from "./chunk-2TKYGFMC.mjs";
+} from "./chunk-3A2CTC4K.mjs";
 // src/extensibility/plugins.ts
 var nsPlugins = /* @__PURE__ */ new Map();

package/dist/{chunk-2EX3FXSF.mjs → chunk-KGCEV2VT.mjs} RENAMED Viewed

@@ -12,23 +12,24 @@ var VADStateMachine = class {
       enabled: config?.enabled ?? true,
       pluginName: config?.pluginName ?? "energy-vad",
       // Voice-optimized defaults
-      startThreshold: config?.startThreshold ?? 0.6,
+      startThreshold: config?.startThreshold ?? 0.8,
       // Higher threshold to avoid noise
-      stopThreshold: config?.stopThreshold ?? 0.45,
+      stopThreshold: config?.stopThreshold ?? 0.3,
       // Balanced for voice
-      hangoverMs: config?.hangoverMs ?? 400,
+      hangoverMs: config?.hangoverMs ?? 300,
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
+      // Increased to filter keyboard clicks
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
         noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 5e-3,
-        minSNR: config?.energyVad?.minSNR ?? 6,
-        snrRange: config?.energyVad?.snrRange ?? 12,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        minSNR: config?.energyVad?.minSNR ?? 10,
+        snrRange: config?.energyVad?.snrRange ?? 10,
         minEnergy: config?.energyVad?.minEnergy ?? 5e-4
       }
     };

package/dist/{chunk-UQG6Z5W3.mjs → chunk-ZISGHJDU.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   createAudioPipeline
-} from "./chunk-JP6DA62Y.mjs";
+} from "./chunk-E7NH2QKZ.mjs";
 // src/livekit/integration.ts
 async function attachProcessingToTrack(track, config = {}) {

package/dist/extensibility/plugins.js CHANGED Viewed

@@ -107,9 +107,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
   const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
-  const minSNR = energyParams.minSNR ?? 6;
-  const snrRange = energyParams.snrRange ?? 12;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -155,9 +155,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
     } else {
       // If signal is louder, adapt upwards
-      // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
-      // If we are silent, adapt at the normal loud rate
-      const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
+      }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
       this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }

package/dist/extensibility/plugins.mjs CHANGED Viewed

@@ -3,9 +3,9 @@ import {
   getVADPlugin,
   registerNoiseSuppressionPlugin,
   registerVADPlugin
-} from "../chunk-BMVZ3KKG.mjs";
+} from "../chunk-FOGC2MFA.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-2TKYGFMC.mjs";
+import "../chunk-3A2CTC4K.mjs";
 export {
   getNoiseSuppressionPlugin,
   getVADPlugin,

package/dist/index.js CHANGED Viewed

@@ -159,9 +159,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
   const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
-  const minSNR = energyParams.minSNR ?? 6;
-  const snrRange = energyParams.snrRange ?? 12;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -207,9 +207,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
     } else {
       // If signal is louder, adapt upwards
-      // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
-      // If we are silent, adapt at the normal loud rate
-      const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
+      }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
       this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
@@ -348,23 +360,24 @@ var VADStateMachine = class {
       enabled: config?.enabled ?? true,
       pluginName: config?.pluginName ?? "energy-vad",
       // Voice-optimized defaults
-      startThreshold: config?.startThreshold ?? 0.6,
+      startThreshold: config?.startThreshold ?? 0.8,
       // Higher threshold to avoid noise
-      stopThreshold: config?.stopThreshold ?? 0.45,
+      stopThreshold: config?.stopThreshold ?? 0.3,
       // Balanced for voice
-      hangoverMs: config?.hangoverMs ?? 400,
+      hangoverMs: config?.hangoverMs ?? 300,
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
+      // Increased to filter keyboard clicks
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
         noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 5e-3,
-        minSNR: config?.energyVad?.minSNR ?? 6,
-        snrRange: config?.energyVad?.snrRange ?? 12,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        minSNR: config?.energyVad?.minSNR ?? 10,
+        snrRange: config?.energyVad?.snrRange ?? 10,
         minEnergy: config?.energyVad?.minEnergy ?? 5e-4
       }
     };

package/dist/index.mjs CHANGED Viewed

@@ -1,13 +1,13 @@
 import "./chunk-WBQAMGXK.mjs";
 import {
   attachProcessingToTrack
-} from "./chunk-UQG6Z5W3.mjs";
+} from "./chunk-ZISGHJDU.mjs";
 import {
   createAudioPipeline
-} from "./chunk-JP6DA62Y.mjs";
+} from "./chunk-E7NH2QKZ.mjs";
 import {
   VADStateMachine
-} from "./chunk-2EX3FXSF.mjs";
+} from "./chunk-KGCEV2VT.mjs";
 import {
   closeAudioContext,
   getAudioContext,
@@ -21,13 +21,13 @@ import {
   getVADPlugin,
   registerNoiseSuppressionPlugin,
   registerVADPlugin
-} from "./chunk-BMVZ3KKG.mjs";
+} from "./chunk-FOGC2MFA.mjs";
 import {
   RNNoisePlugin
 } from "./chunk-XO6B3D4A.mjs";
 import {
   EnergyVADPlugin
-} from "./chunk-2TKYGFMC.mjs";
+} from "./chunk-3A2CTC4K.mjs";
 export {
   EnergyVADPlugin,
   RNNoisePlugin,

package/dist/livekit/integration.js CHANGED Viewed

@@ -128,9 +128,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
   const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
-  const minSNR = energyParams.minSNR ?? 6;
-  const snrRange = energyParams.snrRange ?? 12;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -176,9 +176,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
     } else {
       // If signal is louder, adapt upwards
-      // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
-      // If we are silent, adapt at the normal loud rate
-      const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
+      }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
       this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
@@ -311,23 +323,24 @@ var VADStateMachine = class {
       enabled: config?.enabled ?? true,
       pluginName: config?.pluginName ?? "energy-vad",
       // Voice-optimized defaults
-      startThreshold: config?.startThreshold ?? 0.6,
+      startThreshold: config?.startThreshold ?? 0.8,
       // Higher threshold to avoid noise
-      stopThreshold: config?.stopThreshold ?? 0.45,
+      stopThreshold: config?.stopThreshold ?? 0.3,
       // Balanced for voice
-      hangoverMs: config?.hangoverMs ?? 400,
+      hangoverMs: config?.hangoverMs ?? 300,
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
+      // Increased to filter keyboard clicks
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
         noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 5e-3,
-        minSNR: config?.energyVad?.minSNR ?? 6,
-        snrRange: config?.energyVad?.snrRange ?? 12,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        minSNR: config?.energyVad?.minSNR ?? 10,
+        snrRange: config?.energyVad?.snrRange ?? 10,
         minEnergy: config?.energyVad?.minEnergy ?? 5e-4
       }
     };

package/dist/livekit/integration.mjs CHANGED Viewed

@@ -1,12 +1,12 @@
 import {
   attachProcessingToTrack
-} from "../chunk-UQG6Z5W3.mjs";
-import "../chunk-JP6DA62Y.mjs";
-import "../chunk-2EX3FXSF.mjs";
+} from "../chunk-ZISGHJDU.mjs";
+import "../chunk-E7NH2QKZ.mjs";
+import "../chunk-KGCEV2VT.mjs";
 import "../chunk-OZ7KMC4S.mjs";
-import "../chunk-BMVZ3KKG.mjs";
+import "../chunk-FOGC2MFA.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-2TKYGFMC.mjs";
+import "../chunk-3A2CTC4K.mjs";
 export {
   attachProcessingToTrack
 };

package/dist/pipeline/audio-pipeline.js CHANGED Viewed

@@ -126,9 +126,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
   const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
-  const minSNR = energyParams.minSNR ?? 6;
-  const snrRange = energyParams.snrRange ?? 12;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -174,9 +174,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
     } else {
       // If signal is louder, adapt upwards
-      // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
-      // If we are silent, adapt at the normal loud rate
-      const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
+      }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
       this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
@@ -309,23 +321,24 @@ var VADStateMachine = class {
       enabled: config?.enabled ?? true,
       pluginName: config?.pluginName ?? "energy-vad",
       // Voice-optimized defaults
-      startThreshold: config?.startThreshold ?? 0.6,
+      startThreshold: config?.startThreshold ?? 0.8,
       // Higher threshold to avoid noise
-      stopThreshold: config?.stopThreshold ?? 0.45,
+      stopThreshold: config?.stopThreshold ?? 0.3,
       // Balanced for voice
-      hangoverMs: config?.hangoverMs ?? 400,
+      hangoverMs: config?.hangoverMs ?? 300,
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
+      // Increased to filter keyboard clicks
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
         noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 5e-3,
-        minSNR: config?.energyVad?.minSNR ?? 6,
-        snrRange: config?.energyVad?.snrRange ?? 12,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        minSNR: config?.energyVad?.minSNR ?? 10,
+        snrRange: config?.energyVad?.snrRange ?? 10,
         minEnergy: config?.energyVad?.minEnergy ?? 5e-4
       }
     };

package/dist/pipeline/audio-pipeline.mjs CHANGED Viewed

@@ -1,11 +1,11 @@
 import {
   createAudioPipeline
-} from "../chunk-JP6DA62Y.mjs";
-import "../chunk-2EX3FXSF.mjs";
+} from "../chunk-E7NH2QKZ.mjs";
+import "../chunk-KGCEV2VT.mjs";
 import "../chunk-OZ7KMC4S.mjs";
-import "../chunk-BMVZ3KKG.mjs";
+import "../chunk-FOGC2MFA.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-2TKYGFMC.mjs";
+import "../chunk-3A2CTC4K.mjs";
 export {
   createAudioPipeline
 };

package/dist/types.d.mts CHANGED Viewed

@@ -43,7 +43,7 @@ interface AudioProcessingConfig {
          * When VAD probability rises above this, audio is unmuted.
          * Lower = more sensitive (catches quiet speech, may include noise)
          * Higher = less sensitive (only confident speech, may clip quiet parts)
-         * Default: 0.6 (optimized for voice-only)
+         * Default: 0.8 (aggressive noise rejection)
          */
         startThreshold?: number;
         /**
@@ -51,7 +51,7 @@ interface AudioProcessingConfig {
          * When VAD probability drops below this (after hangover), audio is muted.
          * Lower = keeps audio on longer (less aggressive gating)
          * Higher = mutes faster (more aggressive noise suppression)
-         * Default: 0.45 (balanced voice detection)
+         * Default: 0.3 (wide hysteresis for stability)
          */
         stopThreshold?: number;
         /**
@@ -59,7 +59,7 @@ interface AudioProcessingConfig {
          * Prevents rapid on/off toggling during pauses.
          * Lower = more aggressive gating, may clip between words
          * Higher = smoother but may let trailing noise through
-         * Default: 400ms (optimized for natural speech)
+         * Default: 300ms
          */
         hangoverMs?: number;
         /**
@@ -70,8 +70,8 @@ interface AudioProcessingConfig {
         preRollMs?: number;
         /**
          * Minimum speech duration in ms to consider it valid speech.
-         * Filters out very brief noise spikes.
-         * Default: 100ms
+         * Filters out very brief noise spikes like keyboard clicks.
+         * Default: 150ms
          */
         minSpeechDurationMs?: number;
         /**
@@ -102,17 +102,17 @@ interface AudioProcessingConfig {
             noiseFloorAdaptRateQuiet?: number;
             /**
              * Rate at which noise floor adapts to loud signals (0-1).
-             * Default: 0.005 (slower adaptation for speech)
+             * Default: 0.01 (faster tracking of rising noise)
              */
             noiseFloorAdaptRateLoud?: number;
             /**
              * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
-             * Default: 6.0 (voice is ~2x louder than noise floor)
+             * Default: 10.0 (more aggressive noise rejection)
              */
             minSNR?: number;
             /**
              * SNR range in dB for probability scaling.
-             * Default: 12.0 (probability scales from minSNR to minSNR+snrRange)
+             * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
              */
             snrRange?: number;
             /**

package/dist/types.d.ts CHANGED Viewed

@@ -43,7 +43,7 @@ interface AudioProcessingConfig {
          * When VAD probability rises above this, audio is unmuted.
          * Lower = more sensitive (catches quiet speech, may include noise)
          * Higher = less sensitive (only confident speech, may clip quiet parts)
-         * Default: 0.6 (optimized for voice-only)
+         * Default: 0.8 (aggressive noise rejection)
          */
         startThreshold?: number;
         /**
@@ -51,7 +51,7 @@ interface AudioProcessingConfig {
          * When VAD probability drops below this (after hangover), audio is muted.
          * Lower = keeps audio on longer (less aggressive gating)
          * Higher = mutes faster (more aggressive noise suppression)
-         * Default: 0.45 (balanced voice detection)
+         * Default: 0.3 (wide hysteresis for stability)
          */
         stopThreshold?: number;
         /**
@@ -59,7 +59,7 @@ interface AudioProcessingConfig {
          * Prevents rapid on/off toggling during pauses.
          * Lower = more aggressive gating, may clip between words
          * Higher = smoother but may let trailing noise through
-         * Default: 400ms (optimized for natural speech)
+         * Default: 300ms
          */
         hangoverMs?: number;
         /**
@@ -70,8 +70,8 @@ interface AudioProcessingConfig {
         preRollMs?: number;
         /**
          * Minimum speech duration in ms to consider it valid speech.
-         * Filters out very brief noise spikes.
-         * Default: 100ms
+         * Filters out very brief noise spikes like keyboard clicks.
+         * Default: 150ms
          */
         minSpeechDurationMs?: number;
         /**
@@ -102,17 +102,17 @@ interface AudioProcessingConfig {
             noiseFloorAdaptRateQuiet?: number;
             /**
              * Rate at which noise floor adapts to loud signals (0-1).
-             * Default: 0.005 (slower adaptation for speech)
+             * Default: 0.01 (faster tracking of rising noise)
              */
             noiseFloorAdaptRateLoud?: number;
             /**
              * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
-             * Default: 6.0 (voice is ~2x louder than noise floor)
+             * Default: 10.0 (more aggressive noise rejection)
              */
             minSNR?: number;
             /**
              * SNR range in dB for probability scaling.
-             * Default: 12.0 (probability scales from minSNR to minSNR+snrRange)
+             * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
              */
             snrRange?: number;
             /**

package/dist/vad/vad-node.js CHANGED Viewed

@@ -28,9 +28,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
   const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
-  const minSNR = energyParams.minSNR ?? 6;
-  const snrRange = energyParams.snrRange ?? 12;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
+  const minSNR = energyParams.minSNR ?? 10;
+  const snrRange = energyParams.snrRange ?? 10;
   const minEnergy = energyParams.minEnergy ?? 5e-4;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -76,9 +76,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
       this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
     } else {
       // If signal is louder, adapt upwards
-      // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
-      // If we are silent, adapt at the normal loud rate
-      const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
+      // We use a multi-stage adaptation rate:
+      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
+      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
+      // 3. Otherwise, adapt at the normal loud rate
+      const snr = instantRms / (this.noiseFloor + 1e-6);
+      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      let multiplier = 1.0;
+      if (this.isSpeaking) {
+        multiplier = 0.01;
+      } else if (snrDb > 20) {
+        multiplier = 0.1;
+      }
+      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
       this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }

package/dist/vad/vad-node.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   EnergyVADPlugin
-} from "../chunk-2TKYGFMC.mjs";
+} from "../chunk-3A2CTC4K.mjs";
 export {
   EnergyVADPlugin
 };

package/dist/vad/vad-state.js CHANGED Viewed

@@ -36,23 +36,24 @@ var VADStateMachine = class {
       enabled: config?.enabled ?? true,
       pluginName: config?.pluginName ?? "energy-vad",
       // Voice-optimized defaults
-      startThreshold: config?.startThreshold ?? 0.6,
+      startThreshold: config?.startThreshold ?? 0.8,
       // Higher threshold to avoid noise
-      stopThreshold: config?.stopThreshold ?? 0.45,
+      stopThreshold: config?.stopThreshold ?? 0.3,
       // Balanced for voice
-      hangoverMs: config?.hangoverMs ?? 400,
+      hangoverMs: config?.hangoverMs ?? 300,
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
+      // Increased to filter keyboard clicks
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
         noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 5e-3,
-        minSNR: config?.energyVad?.minSNR ?? 6,
-        snrRange: config?.energyVad?.snrRange ?? 12,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
+        minSNR: config?.energyVad?.minSNR ?? 10,
+        snrRange: config?.energyVad?.snrRange ?? 10,
         minEnergy: config?.energyVad?.minEnergy ?? 5e-4
       }
     };

package/dist/vad/vad-state.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   VADStateMachine
-} from "../chunk-2EX3FXSF.mjs";
+} from "../chunk-KGCEV2VT.mjs";
 export {
   VADStateMachine
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tensamin/audio",
-  "version": "0.1.7",
+  "version": "0.1.9",
   "main": "dist/index.js",
   "module": "dist/index.mjs",
   "types": "dist/index.d.ts",