npm - @tensamin/audio - Versions diffs - 0.1.12 → 0.1.14 - Mend

@tensamin/audio 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +6 -6
package/dist/{chunk-KEWK2OKV.mjs → chunk-2G2JFHJY.mjs} +45 -30
package/dist/{chunk-Q2I22TJG.mjs → chunk-6F2HZUYO.mjs} +1 -1
package/dist/{chunk-DYY2MXMU.mjs → chunk-K4YLH73B.mjs} +6 -6
package/dist/{chunk-SMZJFNRU.mjs → chunk-R5M2DGAQ.mjs} +2 -2
package/dist/{chunk-XZSFQJW4.mjs → chunk-UFKIAMG3.mjs} +1 -1
package/dist/extensibility/plugins.js +45 -30
package/dist/extensibility/plugins.mjs +2 -2
package/dist/index.js +51 -36
package/dist/index.mjs +5 -5
package/dist/livekit/integration.js +51 -36
package/dist/livekit/integration.mjs +5 -5
package/dist/pipeline/audio-pipeline.js +51 -36
package/dist/pipeline/audio-pipeline.mjs +4 -4
package/dist/types.d.mts +8 -7
package/dist/types.d.ts +8 -7
package/dist/vad/vad-node.js +45 -30
package/dist/vad/vad-node.mjs +1 -1
package/dist/vad/vad-state.js +6 -6
package/dist/vad/vad-state.mjs +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -101,11 +101,11 @@ vad: {
   energyVad?: {
     smoothing: number;                 // Default: 0.95
     initialNoiseFloor: number;         // Default: 0.001
-    noiseFloorAdaptRateQuiet: number;  // Default: 0.01
-    noiseFloorAdaptRateLoud: number;   // Default: 0.05
-    minSNR: number;                    // Default: 10.0 (dB)
+    noiseFloorAdaptRateQuiet: number;  // Default: 0.002
+    noiseFloorAdaptRateLoud: number;   // Default: 0.02
+    minSNR: number;                    // Default: 12.0 (dB)
     snrRange: number;                  // Default: 10.0 (dB)
-    minEnergy: number;                 // Default: 0.001
+    minEnergy: number;                 // Default: 0.003
   };
 }
 ```
@@ -116,7 +116,7 @@ vad: {
 - `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
 - `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
 - `preRollMs`: Audio buffer duration before speech onset
-- `minSpeechDurationMs`: Minimum duration to consider as valid speech (Default: 150ms)
+- `minSpeechDurationMs`: Minimum duration to consider as valid speech (Default: 250ms)
 - `minSilenceDurationMs`: Minimum silence duration between speech segments
 **Energy VAD Parameters:**
@@ -124,7 +124,7 @@ vad: {
 - `smoothing`: Energy calculation smoothing factor (0-1)
 - `minSNR`: Minimum signal-to-noise ratio in dB for speech detection
 - `snrRange`: Range in dB for probability scaling from minSNR
-- `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.001, ~-60dB)
+- `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.003, ~-50dB)
 ### Output Control

package/dist/{chunk-KEWK2OKV.mjs → chunk-2G2JFHJY.mjs} RENAMED Viewed

@@ -3,11 +3,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
-  const minSNR = energyParams.minSNR ?? 10;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 2e-3;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.02;
+  const minSNR = energyParams.minSNR ?? 12;
   const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minEnergy = energyParams.minEnergy ?? 3e-3;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -36,8 +36,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -45,36 +48,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
-    if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
+    // Adaptive noise floor estimation using SMOOTHED energy (not instantaneous)
+    // This prevents sharp transients from affecting the noise floor
+    if (this.energy < this.noiseFloor) {
+      // Signal is quieter than noise floor, adapt downwards slowly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + this.energy * this.noiseFloorAdaptRateQuiet;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      // Calculate SNR based on smoothed energy
+      const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
+      const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.05;
-      } else if (snrDb > 20) {
-        multiplier = 0.2;
+      // Only adapt upwards if:
+      // 1. SNR is low (< 10dB) - likely just background noise
+      // 2. AND crest factor is low (< 15dB) - not a sharp transient
+      if (smoothedSnrDb < 10 && crestFactorDb < 15) {
+        // This is persistent background noise, adapt upwards
+        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
+      } else {
+        // Either high SNR (speech) or high crest factor (click) - adapt very slowly
+        const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
+        this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
       }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -83,11 +89,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });

package/dist/{chunk-Q2I22TJG.mjs → chunk-6F2HZUYO.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   createAudioPipeline
-} from "./chunk-SMZJFNRU.mjs";
+} from "./chunk-R5M2DGAQ.mjs";
 // src/livekit/integration.ts
 async function attachProcessingToTrack(track, config = {}) {

package/dist/{chunk-DYY2MXMU.mjs → chunk-K4YLH73B.mjs} RENAMED Viewed

@@ -20,17 +20,17 @@ var VADStateMachine = class {
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
-      // Increased to filter keyboard clicks
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
+      // Aggressive transient rejection
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
-        minSNR: config?.energyVad?.minSNR ?? 10,
+        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 2e-3,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.02,
+        minSNR: config?.energyVad?.minSNR ?? 12,
         snrRange: config?.energyVad?.snrRange ?? 10,
-        minEnergy: config?.energyVad?.minEnergy ?? 1e-3
+        minEnergy: config?.energyVad?.minEnergy ?? 3e-3
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/{chunk-SMZJFNRU.mjs → chunk-R5M2DGAQ.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   VADStateMachine
-} from "./chunk-DYY2MXMU.mjs";
+} from "./chunk-K4YLH73B.mjs";
 import {
   getAudioContext,
   registerPipeline,
@@ -9,7 +9,7 @@ import {
 import {
   getNoiseSuppressionPlugin,
   getVADPlugin
-} from "./chunk-XZSFQJW4.mjs";
+} from "./chunk-UFKIAMG3.mjs";
 // src/pipeline/audio-pipeline.ts
 import mitt from "mitt";

package/dist/{chunk-XZSFQJW4.mjs → chunk-UFKIAMG3.mjs} RENAMED Viewed

@@ -3,7 +3,7 @@ import {
 } from "./chunk-XO6B3D4A.mjs";
 import {
   EnergyVADPlugin
-} from "./chunk-KEWK2OKV.mjs";
+} from "./chunk-2G2JFHJY.mjs";
 // src/extensibility/plugins.ts
 var nsPlugins = /* @__PURE__ */ new Map();

package/dist/extensibility/plugins.js CHANGED Viewed

@@ -106,11 +106,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
-  const minSNR = energyParams.minSNR ?? 10;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 2e-3;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.02;
+  const minSNR = energyParams.minSNR ?? 12;
   const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minEnergy = energyParams.minEnergy ?? 3e-3;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -139,8 +139,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -148,36 +151,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
-    if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
+    // Adaptive noise floor estimation using SMOOTHED energy (not instantaneous)
+    // This prevents sharp transients from affecting the noise floor
+    if (this.energy < this.noiseFloor) {
+      // Signal is quieter than noise floor, adapt downwards slowly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + this.energy * this.noiseFloorAdaptRateQuiet;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      // Calculate SNR based on smoothed energy
+      const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
+      const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.05;
-      } else if (snrDb > 20) {
-        multiplier = 0.2;
+      // Only adapt upwards if:
+      // 1. SNR is low (< 10dB) - likely just background noise
+      // 2. AND crest factor is low (< 15dB) - not a sharp transient
+      if (smoothedSnrDb < 10 && crestFactorDb < 15) {
+        // This is persistent background noise, adapt upwards
+        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
+      } else {
+        // Either high SNR (speech) or high crest factor (click) - adapt very slowly
+        const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
+        this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
       }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -186,11 +192,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });

package/dist/extensibility/plugins.mjs CHANGED Viewed

@@ -3,9 +3,9 @@ import {
   getVADPlugin,
   registerNoiseSuppressionPlugin,
   registerVADPlugin
-} from "../chunk-XZSFQJW4.mjs";
+} from "../chunk-UFKIAMG3.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-KEWK2OKV.mjs";
+import "../chunk-2G2JFHJY.mjs";
 export {
   getNoiseSuppressionPlugin,
   getVADPlugin,

package/dist/index.js CHANGED Viewed

@@ -158,11 +158,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
-  const minSNR = energyParams.minSNR ?? 10;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 2e-3;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.02;
+  const minSNR = energyParams.minSNR ?? 12;
   const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minEnergy = energyParams.minEnergy ?? 3e-3;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -191,8 +191,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -200,36 +203,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
-    if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
+    // Adaptive noise floor estimation using SMOOTHED energy (not instantaneous)
+    // This prevents sharp transients from affecting the noise floor
+    if (this.energy < this.noiseFloor) {
+      // Signal is quieter than noise floor, adapt downwards slowly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + this.energy * this.noiseFloorAdaptRateQuiet;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      // Calculate SNR based on smoothed energy
+      const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
+      const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.05;
-      } else if (snrDb > 20) {
-        multiplier = 0.2;
+      // Only adapt upwards if:
+      // 1. SNR is low (< 10dB) - likely just background noise
+      // 2. AND crest factor is low (< 15dB) - not a sharp transient
+      if (smoothedSnrDb < 10 && crestFactorDb < 15) {
+        // This is persistent background noise, adapt upwards
+        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
+      } else {
+        // Either high SNR (speech) or high crest factor (click) - adapt very slowly
+        const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
+        this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
       }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -238,11 +244,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -371,17 +386,17 @@ var VADStateMachine = class {
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
-      // Increased to filter keyboard clicks
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
+      // Aggressive transient rejection
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
-        minSNR: config?.energyVad?.minSNR ?? 10,
+        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 2e-3,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.02,
+        minSNR: config?.energyVad?.minSNR ?? 12,
         snrRange: config?.energyVad?.snrRange ?? 10,
-        minEnergy: config?.energyVad?.minEnergy ?? 1e-3
+        minEnergy: config?.energyVad?.minEnergy ?? 3e-3
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/index.mjs CHANGED Viewed

@@ -1,13 +1,13 @@
 import "./chunk-WBQAMGXK.mjs";
 import {
   attachProcessingToTrack
-} from "./chunk-Q2I22TJG.mjs";
+} from "./chunk-6F2HZUYO.mjs";
 import {
   createAudioPipeline
-} from "./chunk-SMZJFNRU.mjs";
+} from "./chunk-R5M2DGAQ.mjs";
 import {
   VADStateMachine
-} from "./chunk-DYY2MXMU.mjs";
+} from "./chunk-K4YLH73B.mjs";
 import {
   closeAudioContext,
   getAudioContext,
@@ -21,13 +21,13 @@ import {
   getVADPlugin,
   registerNoiseSuppressionPlugin,
   registerVADPlugin
-} from "./chunk-XZSFQJW4.mjs";
+} from "./chunk-UFKIAMG3.mjs";
 import {
   RNNoisePlugin
 } from "./chunk-XO6B3D4A.mjs";
 import {
   EnergyVADPlugin
-} from "./chunk-KEWK2OKV.mjs";
+} from "./chunk-2G2JFHJY.mjs";
 export {
   EnergyVADPlugin,
   RNNoisePlugin,

package/dist/livekit/integration.js CHANGED Viewed

@@ -127,11 +127,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
-  const minSNR = energyParams.minSNR ?? 10;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 2e-3;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.02;
+  const minSNR = energyParams.minSNR ?? 12;
   const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minEnergy = energyParams.minEnergy ?? 3e-3;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -160,8 +160,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -169,36 +172,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
-    if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
+    // Adaptive noise floor estimation using SMOOTHED energy (not instantaneous)
+    // This prevents sharp transients from affecting the noise floor
+    if (this.energy < this.noiseFloor) {
+      // Signal is quieter than noise floor, adapt downwards slowly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + this.energy * this.noiseFloorAdaptRateQuiet;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      // Calculate SNR based on smoothed energy
+      const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
+      const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.05;
-      } else if (snrDb > 20) {
-        multiplier = 0.2;
+      // Only adapt upwards if:
+      // 1. SNR is low (< 10dB) - likely just background noise
+      // 2. AND crest factor is low (< 15dB) - not a sharp transient
+      if (smoothedSnrDb < 10 && crestFactorDb < 15) {
+        // This is persistent background noise, adapt upwards
+        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
+      } else {
+        // Either high SNR (speech) or high crest factor (click) - adapt very slowly
+        const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
+        this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
       }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -207,11 +213,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -334,17 +349,17 @@ var VADStateMachine = class {
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
-      // Increased to filter keyboard clicks
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
+      // Aggressive transient rejection
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
-        minSNR: config?.energyVad?.minSNR ?? 10,
+        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 2e-3,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.02,
+        minSNR: config?.energyVad?.minSNR ?? 12,
         snrRange: config?.energyVad?.snrRange ?? 10,
-        minEnergy: config?.energyVad?.minEnergy ?? 1e-3
+        minEnergy: config?.energyVad?.minEnergy ?? 3e-3
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/livekit/integration.mjs CHANGED Viewed

@@ -1,12 +1,12 @@
 import {
   attachProcessingToTrack
-} from "../chunk-Q2I22TJG.mjs";
-import "../chunk-SMZJFNRU.mjs";
-import "../chunk-DYY2MXMU.mjs";
+} from "../chunk-6F2HZUYO.mjs";
+import "../chunk-R5M2DGAQ.mjs";
+import "../chunk-K4YLH73B.mjs";
 import "../chunk-OZ7KMC4S.mjs";
-import "../chunk-XZSFQJW4.mjs";
+import "../chunk-UFKIAMG3.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-KEWK2OKV.mjs";
+import "../chunk-2G2JFHJY.mjs";
 export {
   attachProcessingToTrack
 };

package/dist/pipeline/audio-pipeline.js CHANGED Viewed

@@ -125,11 +125,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
-  const minSNR = energyParams.minSNR ?? 10;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 2e-3;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.02;
+  const minSNR = energyParams.minSNR ?? 12;
   const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minEnergy = energyParams.minEnergy ?? 3e-3;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -158,8 +158,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -167,36 +170,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
-    if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
+    // Adaptive noise floor estimation using SMOOTHED energy (not instantaneous)
+    // This prevents sharp transients from affecting the noise floor
+    if (this.energy < this.noiseFloor) {
+      // Signal is quieter than noise floor, adapt downwards slowly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + this.energy * this.noiseFloorAdaptRateQuiet;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      // Calculate SNR based on smoothed energy
+      const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
+      const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.05;
-      } else if (snrDb > 20) {
-        multiplier = 0.2;
+      // Only adapt upwards if:
+      // 1. SNR is low (< 10dB) - likely just background noise
+      // 2. AND crest factor is low (< 15dB) - not a sharp transient
+      if (smoothedSnrDb < 10 && crestFactorDb < 15) {
+        // This is persistent background noise, adapt upwards
+        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
+      } else {
+        // Either high SNR (speech) or high crest factor (click) - adapt very slowly
+        const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
+        this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
       }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -205,11 +211,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -332,17 +347,17 @@ var VADStateMachine = class {
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
-      // Increased to filter keyboard clicks
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
+      // Aggressive transient rejection
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
-        minSNR: config?.energyVad?.minSNR ?? 10,
+        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 2e-3,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.02,
+        minSNR: config?.energyVad?.minSNR ?? 12,
         snrRange: config?.energyVad?.snrRange ?? 10,
-        minEnergy: config?.energyVad?.minEnergy ?? 1e-3
+        minEnergy: config?.energyVad?.minEnergy ?? 3e-3
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/pipeline/audio-pipeline.mjs CHANGED Viewed

@@ -1,11 +1,11 @@
 import {
   createAudioPipeline
-} from "../chunk-SMZJFNRU.mjs";
-import "../chunk-DYY2MXMU.mjs";
+} from "../chunk-R5M2DGAQ.mjs";
+import "../chunk-K4YLH73B.mjs";
 import "../chunk-OZ7KMC4S.mjs";
-import "../chunk-XZSFQJW4.mjs";
+import "../chunk-UFKIAMG3.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-KEWK2OKV.mjs";
+import "../chunk-2G2JFHJY.mjs";
 export {
   createAudioPipeline
 };

package/dist/types.d.mts CHANGED Viewed

@@ -70,8 +70,8 @@ interface AudioProcessingConfig {
         preRollMs?: number;
         /**
          * Minimum speech duration in ms to consider it valid speech.
-         * Filters out very brief noise spikes like keyboard clicks.
-         * Default: 150ms
+         * Filters out brief transients like keyboard clicks.
+         * Default: 250ms (aggressive transient rejection)
          */
         minSpeechDurationMs?: number;
         /**
@@ -97,17 +97,18 @@ interface AudioProcessingConfig {
             initialNoiseFloor?: number;
             /**
              * Rate at which noise floor adapts to quiet signals (0-1).
-             * Default: 0.01
+             * Default: 0.002 (very slow downward drift)
              */
             noiseFloorAdaptRateQuiet?: number;
             /**
              * Rate at which noise floor adapts to loud signals (0-1).
-             * Default: 0.05 (faster tracking of rising noise)
+             * Applied to low-energy, low-crest-factor signals (background noise).
+             * Default: 0.02
              */
             noiseFloorAdaptRateLoud?: number;
             /**
              * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
-             * Default: 10.0 (more aggressive noise rejection)
+             * Default: 12.0 (aggressive noise rejection)
              */
             minSNR?: number;
             /**
@@ -117,8 +118,8 @@ interface AudioProcessingConfig {
             snrRange?: number;
             /**
              * Minimum absolute RMS energy to consider as speech.
-             * Prevents triggering on very quiet background noise in silent rooms.
-             * Default: 0.001 (approx -60dB)
+             * Prevents triggering on very quiet background noise.
+             * Default: 0.003 (approx -50dB, voice-appropriate level)
              */
             minEnergy?: number;
         };

package/dist/types.d.ts CHANGED Viewed

@@ -70,8 +70,8 @@ interface AudioProcessingConfig {
         preRollMs?: number;
         /**
          * Minimum speech duration in ms to consider it valid speech.
-         * Filters out very brief noise spikes like keyboard clicks.
-         * Default: 150ms
+         * Filters out brief transients like keyboard clicks.
+         * Default: 250ms (aggressive transient rejection)
          */
         minSpeechDurationMs?: number;
         /**
@@ -97,17 +97,18 @@ interface AudioProcessingConfig {
             initialNoiseFloor?: number;
             /**
              * Rate at which noise floor adapts to quiet signals (0-1).
-             * Default: 0.01
+             * Default: 0.002 (very slow downward drift)
              */
             noiseFloorAdaptRateQuiet?: number;
             /**
              * Rate at which noise floor adapts to loud signals (0-1).
-             * Default: 0.05 (faster tracking of rising noise)
+             * Applied to low-energy, low-crest-factor signals (background noise).
+             * Default: 0.02
              */
             noiseFloorAdaptRateLoud?: number;
             /**
              * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
-             * Default: 10.0 (more aggressive noise rejection)
+             * Default: 12.0 (aggressive noise rejection)
              */
             minSNR?: number;
             /**
@@ -117,8 +118,8 @@ interface AudioProcessingConfig {
             snrRange?: number;
             /**
              * Minimum absolute RMS energy to consider as speech.
-             * Prevents triggering on very quiet background noise in silent rooms.
-             * Default: 0.001 (approx -60dB)
+             * Prevents triggering on very quiet background noise.
+             * Default: 0.003 (approx -50dB, voice-appropriate level)
              */
             minEnergy?: number;
         };

package/dist/vad/vad-node.js CHANGED Viewed

@@ -27,11 +27,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
-  const minSNR = energyParams.minSNR ?? 10;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 2e-3;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.02;
+  const minSNR = energyParams.minSNR ?? 12;
   const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minEnergy = energyParams.minEnergy ?? 3e-3;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -60,8 +60,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -69,36 +72,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use the instantaneous RMS for noise floor tracking to react quickly to silence
-    if (instantRms < this.noiseFloor) {
-      // If signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
+    // Adaptive noise floor estimation using SMOOTHED energy (not instantaneous)
+    // This prevents sharp transients from affecting the noise floor
+    if (this.energy < this.noiseFloor) {
+      // Signal is quieter than noise floor, adapt downwards slowly
+      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + this.energy * this.noiseFloorAdaptRateQuiet;
     } else {
-      // If signal is louder, adapt upwards
-      // We use a multi-stage adaptation rate:
-      // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
-      // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
-      // 3. Otherwise, adapt at the normal loud rate
-      const snr = instantRms / (this.noiseFloor + 1e-6);
-      const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
+      // Calculate SNR based on smoothed energy
+      const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
+      const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
-      let multiplier = 1.0;
-      if (this.isSpeaking) {
-        multiplier = 0.05;
-      } else if (snrDb > 20) {
-        multiplier = 0.2;
+      // Only adapt upwards if:
+      // 1. SNR is low (< 10dB) - likely just background noise
+      // 2. AND crest factor is low (< 15dB) - not a sharp transient
+      if (smoothedSnrDb < 10 && crestFactorDb < 15) {
+        // This is persistent background noise, adapt upwards
+        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
+      } else {
+        // Either high SNR (speech) or high crest factor (click) - adapt very slowly
+        const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
+        this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
       }
-      const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
-      this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
     }
     // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
-    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -107,11 +113,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });

package/dist/vad/vad-node.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   EnergyVADPlugin
-} from "../chunk-KEWK2OKV.mjs";
+} from "../chunk-2G2JFHJY.mjs";
 export {
   EnergyVADPlugin
 };

package/dist/vad/vad-state.js CHANGED Viewed

@@ -44,17 +44,17 @@ var VADStateMachine = class {
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
-      // Increased to filter keyboard clicks
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
+      // Aggressive transient rejection
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
-        minSNR: config?.energyVad?.minSNR ?? 10,
+        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 2e-3,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.02,
+        minSNR: config?.energyVad?.minSNR ?? 12,
         snrRange: config?.energyVad?.snrRange ?? 10,
-        minEnergy: config?.energyVad?.minEnergy ?? 1e-3
+        minEnergy: config?.energyVad?.minEnergy ?? 3e-3
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/vad/vad-state.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   VADStateMachine
-} from "../chunk-DYY2MXMU.mjs";
+} from "../chunk-K4YLH73B.mjs";
 export {
   VADStateMachine
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tensamin/audio",
-  "version": "0.1.12",
+  "version": "0.1.14",
   "main": "dist/index.js",
   "module": "dist/index.mjs",
   "types": "dist/index.d.ts",