npm - @tensamin/audio - Versions diffs - 0.1.13 → 0.1.15 - Mend

@tensamin/audio 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +5 -7
package/dist/{chunk-FKR6NWZF.mjs → chunk-GLKAWCEW.mjs} +31 -34
package/dist/{chunk-DLLK6K76.mjs → chunk-KLBA2CPE.mjs} +5 -7
package/dist/{chunk-K6X52R7N.mjs → chunk-QQFKHTCQ.mjs} +1 -1
package/dist/{chunk-OXV7BHX5.mjs → chunk-U26F3GJN.mjs} +1 -1
package/dist/{chunk-RD4GDIPO.mjs → chunk-WQVMSR7V.mjs} +5 -6
package/dist/extensibility/plugins.js +31 -34
package/dist/extensibility/plugins.mjs +2 -2
package/dist/index.js +39 -45
package/dist/index.mjs +5 -5
package/dist/livekit/integration.js +39 -45
package/dist/livekit/integration.mjs +5 -5
package/dist/pipeline/audio-pipeline.js +39 -45
package/dist/pipeline/audio-pipeline.mjs +4 -4
package/dist/types.d.mts +6 -17
package/dist/types.d.ts +6 -17
package/dist/vad/vad-node.js +31 -34
package/dist/vad/vad-node.mjs +1 -1
package/dist/vad/vad-state.js +5 -7
package/dist/vad/vad-state.mjs +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -101,11 +101,9 @@ vad: {
   energyVad?: {
     smoothing: number;                 // Default: 0.95
     initialNoiseFloor: number;         // Default: 0.001
-    noiseFloorAdaptRateQuiet: number;  // Default: 0.01
-    noiseFloorAdaptRateLoud: number;   // Default: 0.1
-    minSNR: number;                    // Default: 10.0 (dB)
-    snrRange: number;                  // Default: 10.0 (dB)
-    minEnergy: number;                 // Default: 0.001
+    minSNR: number;                    // Default: 8.0 (dB)
+    snrRange: number;                  // Default: 12.0 (dB)
+    minEnergy: number;                 // Default: 0.01
   };
 }
 ```
@@ -116,7 +114,7 @@ vad: {
 - `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
 - `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
 - `preRollMs`: Audio buffer duration before speech onset
-- `minSpeechDurationMs`: Minimum duration to consider as valid speech (Default: 150ms)
+- `minSpeechDurationMs`: Minimum duration to consider as valid speech (Default: 250ms)
 - `minSilenceDurationMs`: Minimum silence duration between speech segments
 **Energy VAD Parameters:**
@@ -124,7 +122,7 @@ vad: {
 - `smoothing`: Energy calculation smoothing factor (0-1)
 - `minSNR`: Minimum signal-to-noise ratio in dB for speech detection
 - `snrRange`: Range in dB for probability scaling from minSNR
-- `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.001, ~-60dB)
+- `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.01, ~-40dB)
 ### Output Control

package/dist/{chunk-FKR6NWZF.mjs → chunk-GLKAWCEW.mjs} RENAMED Viewed

@@ -3,11 +3,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
-  const minSNR = energyParams.minSNR ?? 10;
-  const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minSNR = energyParams.minSNR ?? 8;
+  const snrRange = energyParams.snrRange ?? 12;
+  const minEnergy = energyParams.minEnergy ?? 0.01;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -15,8 +13,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.smoothing = ${smoothing};
     this.energy = 0;
     this.noiseFloor = ${initialNoiseFloor};
-    this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
-    this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
     this.minEnergy = ${minEnergy};
@@ -36,8 +32,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -45,32 +44,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use a TWO-PASS approach to avoid circular dependencies:
-    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
-    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
-    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
-    // Adapt the noise floor based on instantaneous SNR
-    if (instantRms < this.noiseFloor) {
-      // Signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
-    } else if (instantSnrDb < 12) {
-      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
-      // Adapt upwards at normal rate to track rising noise
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
-    } else {
-      // Signal has high SNR (>= 12dB) - likely speech or transient
-      // Adapt VERY slowly to avoid "chasing" speech
-      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
-      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
-    }
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
-    // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    // FIXED noise floor with minimal adaptation
+    // Only adapt within strict bounds to prevent drift
+    const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
+    this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
+    // Hard clamp to prevent any drift outside acceptable range
+    this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
-    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -79,11 +67,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });

package/dist/{chunk-DLLK6K76.mjs → chunk-KLBA2CPE.mjs} RENAMED Viewed

@@ -20,17 +20,15 @@ var VADStateMachine = class {
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
-      // Increased to filter keyboard clicks
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
+      // Aggressive transient rejection
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
-        minSNR: config?.energyVad?.minSNR ?? 10,
-        snrRange: config?.energyVad?.snrRange ?? 10,
-        minEnergy: config?.energyVad?.minEnergy ?? 1e-3
+        minSNR: config?.energyVad?.minSNR ?? 8,
+        snrRange: config?.energyVad?.snrRange ?? 12,
+        minEnergy: config?.energyVad?.minEnergy ?? 0.01
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/{chunk-K6X52R7N.mjs → chunk-QQFKHTCQ.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   createAudioPipeline
-} from "./chunk-RD4GDIPO.mjs";
+} from "./chunk-WQVMSR7V.mjs";
 // src/livekit/integration.ts
 async function attachProcessingToTrack(track, config = {}) {

package/dist/{chunk-OXV7BHX5.mjs → chunk-U26F3GJN.mjs} RENAMED Viewed

@@ -3,7 +3,7 @@ import {
 } from "./chunk-XO6B3D4A.mjs";
 import {
   EnergyVADPlugin
-} from "./chunk-FKR6NWZF.mjs";
+} from "./chunk-GLKAWCEW.mjs";
 // src/extensibility/plugins.ts
 var nsPlugins = /* @__PURE__ */ new Map();

package/dist/{chunk-RD4GDIPO.mjs → chunk-WQVMSR7V.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   VADStateMachine
-} from "./chunk-DLLK6K76.mjs";
+} from "./chunk-KLBA2CPE.mjs";
 import {
   getAudioContext,
   registerPipeline,
@@ -9,7 +9,7 @@ import {
 import {
   getNoiseSuppressionPlugin,
   getVADPlugin
-} from "./chunk-OXV7BHX5.mjs";
+} from "./chunk-U26F3GJN.mjs";
 // src/pipeline/audio-pipeline.ts
 import mitt from "mitt";
@@ -37,10 +37,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
       energyVad: {
         smoothing: 0.95,
         initialNoiseFloor: 1e-3,
-        noiseFloorAdaptRateQuiet: 0.01,
-        noiseFloorAdaptRateLoud: 1e-3,
-        minSNR: 2,
-        snrRange: 8
+        minSNR: 8,
+        snrRange: 12,
+        minEnergy: 0.01
       },
       ...config.vad
     },

package/dist/extensibility/plugins.js CHANGED Viewed

@@ -106,11 +106,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
-  const minSNR = energyParams.minSNR ?? 10;
-  const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minSNR = energyParams.minSNR ?? 8;
+  const snrRange = energyParams.snrRange ?? 12;
+  const minEnergy = energyParams.minEnergy ?? 0.01;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -118,8 +116,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.smoothing = ${smoothing};
     this.energy = 0;
     this.noiseFloor = ${initialNoiseFloor};
-    this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
-    this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
     this.minEnergy = ${minEnergy};
@@ -139,8 +135,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -148,32 +147,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use a TWO-PASS approach to avoid circular dependencies:
-    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
-    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
-    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
-    // Adapt the noise floor based on instantaneous SNR
-    if (instantRms < this.noiseFloor) {
-      // Signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
-    } else if (instantSnrDb < 12) {
-      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
-      // Adapt upwards at normal rate to track rising noise
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
-    } else {
-      // Signal has high SNR (>= 12dB) - likely speech or transient
-      // Adapt VERY slowly to avoid "chasing" speech
-      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
-      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
-    }
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
-    // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    // FIXED noise floor with minimal adaptation
+    // Only adapt within strict bounds to prevent drift
+    const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
+    this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
+    // Hard clamp to prevent any drift outside acceptable range
+    this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
-    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -182,11 +170,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });

package/dist/extensibility/plugins.mjs CHANGED Viewed

@@ -3,9 +3,9 @@ import {
   getVADPlugin,
   registerNoiseSuppressionPlugin,
   registerVADPlugin
-} from "../chunk-OXV7BHX5.mjs";
+} from "../chunk-U26F3GJN.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-FKR6NWZF.mjs";
+import "../chunk-GLKAWCEW.mjs";
 export {
   getNoiseSuppressionPlugin,
   getVADPlugin,

package/dist/index.js CHANGED Viewed

@@ -158,11 +158,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
-  const minSNR = energyParams.minSNR ?? 10;
-  const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minSNR = energyParams.minSNR ?? 8;
+  const snrRange = energyParams.snrRange ?? 12;
+  const minEnergy = energyParams.minEnergy ?? 0.01;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -170,8 +168,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.smoothing = ${smoothing};
     this.energy = 0;
     this.noiseFloor = ${initialNoiseFloor};
-    this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
-    this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
     this.minEnergy = ${minEnergy};
@@ -191,8 +187,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -200,32 +199,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use a TWO-PASS approach to avoid circular dependencies:
-    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
-    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
-    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
-    // Adapt the noise floor based on instantaneous SNR
-    if (instantRms < this.noiseFloor) {
-      // Signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
-    } else if (instantSnrDb < 12) {
-      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
-      // Adapt upwards at normal rate to track rising noise
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
-    } else {
-      // Signal has high SNR (>= 12dB) - likely speech or transient
-      // Adapt VERY slowly to avoid "chasing" speech
-      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
-      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
-    }
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
-    // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    // FIXED noise floor with minimal adaptation
+    // Only adapt within strict bounds to prevent drift
+    const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
+    this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
+    // Hard clamp to prevent any drift outside acceptable range
+    this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
-    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -234,11 +222,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -367,17 +364,15 @@ var VADStateMachine = class {
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
-      // Increased to filter keyboard clicks
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
+      // Aggressive transient rejection
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
-        minSNR: config?.energyVad?.minSNR ?? 10,
-        snrRange: config?.energyVad?.snrRange ?? 10,
-        minEnergy: config?.energyVad?.minEnergy ?? 1e-3
+        minSNR: config?.energyVad?.minSNR ?? 8,
+        snrRange: config?.energyVad?.snrRange ?? 12,
+        minEnergy: config?.energyVad?.minEnergy ?? 0.01
       }
     };
     this.lastSilenceTime = Date.now();
@@ -470,10 +465,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
       energyVad: {
         smoothing: 0.95,
         initialNoiseFloor: 1e-3,
-        noiseFloorAdaptRateQuiet: 0.01,
-        noiseFloorAdaptRateLoud: 1e-3,
-        minSNR: 2,
-        snrRange: 8
+        minSNR: 8,
+        snrRange: 12,
+        minEnergy: 0.01
       },
       ...config.vad
     },

package/dist/index.mjs CHANGED Viewed

@@ -1,13 +1,13 @@
 import "./chunk-WBQAMGXK.mjs";
 import {
   attachProcessingToTrack
-} from "./chunk-K6X52R7N.mjs";
+} from "./chunk-QQFKHTCQ.mjs";
 import {
   createAudioPipeline
-} from "./chunk-RD4GDIPO.mjs";
+} from "./chunk-WQVMSR7V.mjs";
 import {
   VADStateMachine
-} from "./chunk-DLLK6K76.mjs";
+} from "./chunk-KLBA2CPE.mjs";
 import {
   closeAudioContext,
   getAudioContext,
@@ -21,13 +21,13 @@ import {
   getVADPlugin,
   registerNoiseSuppressionPlugin,
   registerVADPlugin
-} from "./chunk-OXV7BHX5.mjs";
+} from "./chunk-U26F3GJN.mjs";
 import {
   RNNoisePlugin
 } from "./chunk-XO6B3D4A.mjs";
 import {
   EnergyVADPlugin
-} from "./chunk-FKR6NWZF.mjs";
+} from "./chunk-GLKAWCEW.mjs";
 export {
   EnergyVADPlugin,
   RNNoisePlugin,

package/dist/livekit/integration.js CHANGED Viewed

@@ -127,11 +127,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
-  const minSNR = energyParams.minSNR ?? 10;
-  const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minSNR = energyParams.minSNR ?? 8;
+  const snrRange = energyParams.snrRange ?? 12;
+  const minEnergy = energyParams.minEnergy ?? 0.01;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -139,8 +137,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.smoothing = ${smoothing};
     this.energy = 0;
     this.noiseFloor = ${initialNoiseFloor};
-    this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
-    this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
     this.minEnergy = ${minEnergy};
@@ -160,8 +156,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -169,32 +168,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use a TWO-PASS approach to avoid circular dependencies:
-    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
-    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
-    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
-    // Adapt the noise floor based on instantaneous SNR
-    if (instantRms < this.noiseFloor) {
-      // Signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
-    } else if (instantSnrDb < 12) {
-      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
-      // Adapt upwards at normal rate to track rising noise
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
-    } else {
-      // Signal has high SNR (>= 12dB) - likely speech or transient
-      // Adapt VERY slowly to avoid "chasing" speech
-      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
-      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
-    }
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
-    // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    // FIXED noise floor with minimal adaptation
+    // Only adapt within strict bounds to prevent drift
+    const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
+    this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
+    // Hard clamp to prevent any drift outside acceptable range
+    this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
-    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -203,11 +191,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -330,17 +327,15 @@ var VADStateMachine = class {
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
-      // Increased to filter keyboard clicks
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
+      // Aggressive transient rejection
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
-        minSNR: config?.energyVad?.minSNR ?? 10,
-        snrRange: config?.energyVad?.snrRange ?? 10,
-        minEnergy: config?.energyVad?.minEnergy ?? 1e-3
+        minSNR: config?.energyVad?.minSNR ?? 8,
+        snrRange: config?.energyVad?.snrRange ?? 12,
+        minEnergy: config?.energyVad?.minEnergy ?? 0.01
       }
     };
     this.lastSilenceTime = Date.now();
@@ -433,10 +428,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
       energyVad: {
         smoothing: 0.95,
         initialNoiseFloor: 1e-3,
-        noiseFloorAdaptRateQuiet: 0.01,
-        noiseFloorAdaptRateLoud: 1e-3,
-        minSNR: 2,
-        snrRange: 8
+        minSNR: 8,
+        snrRange: 12,
+        minEnergy: 0.01
       },
       ...config.vad
     },

package/dist/livekit/integration.mjs CHANGED Viewed

@@ -1,12 +1,12 @@
 import {
   attachProcessingToTrack
-} from "../chunk-K6X52R7N.mjs";
-import "../chunk-RD4GDIPO.mjs";
-import "../chunk-DLLK6K76.mjs";
+} from "../chunk-QQFKHTCQ.mjs";
+import "../chunk-WQVMSR7V.mjs";
+import "../chunk-KLBA2CPE.mjs";
 import "../chunk-OZ7KMC4S.mjs";
-import "../chunk-OXV7BHX5.mjs";
+import "../chunk-U26F3GJN.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-FKR6NWZF.mjs";
+import "../chunk-GLKAWCEW.mjs";
 export {
   attachProcessingToTrack
 };

package/dist/pipeline/audio-pipeline.js CHANGED Viewed

@@ -125,11 +125,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
-  const minSNR = energyParams.minSNR ?? 10;
-  const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minSNR = energyParams.minSNR ?? 8;
+  const snrRange = energyParams.snrRange ?? 12;
+  const minEnergy = energyParams.minEnergy ?? 0.01;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -137,8 +135,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.smoothing = ${smoothing};
     this.energy = 0;
     this.noiseFloor = ${initialNoiseFloor};
-    this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
-    this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
     this.minEnergy = ${minEnergy};
@@ -158,8 +154,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -167,32 +166,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use a TWO-PASS approach to avoid circular dependencies:
-    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
-    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
-    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
-    // Adapt the noise floor based on instantaneous SNR
-    if (instantRms < this.noiseFloor) {
-      // Signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
-    } else if (instantSnrDb < 12) {
-      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
-      // Adapt upwards at normal rate to track rising noise
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
-    } else {
-      // Signal has high SNR (>= 12dB) - likely speech or transient
-      // Adapt VERY slowly to avoid "chasing" speech
-      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
-      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
-    }
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
-    // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    // FIXED noise floor with minimal adaptation
+    // Only adapt within strict bounds to prevent drift
+    const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
+    this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
+    // Hard clamp to prevent any drift outside acceptable range
+    this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
-    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -201,11 +189,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -328,17 +325,15 @@ var VADStateMachine = class {
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
-      // Increased to filter keyboard clicks
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
+      // Aggressive transient rejection
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
-        minSNR: config?.energyVad?.minSNR ?? 10,
-        snrRange: config?.energyVad?.snrRange ?? 10,
-        minEnergy: config?.energyVad?.minEnergy ?? 1e-3
+        minSNR: config?.energyVad?.minSNR ?? 8,
+        snrRange: config?.energyVad?.snrRange ?? 12,
+        minEnergy: config?.energyVad?.minEnergy ?? 0.01
       }
     };
     this.lastSilenceTime = Date.now();
@@ -431,10 +426,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
       energyVad: {
         smoothing: 0.95,
         initialNoiseFloor: 1e-3,
-        noiseFloorAdaptRateQuiet: 0.01,
-        noiseFloorAdaptRateLoud: 1e-3,
-        minSNR: 2,
-        snrRange: 8
+        minSNR: 8,
+        snrRange: 12,
+        minEnergy: 0.01
       },
       ...config.vad
     },

package/dist/pipeline/audio-pipeline.mjs CHANGED Viewed

@@ -1,11 +1,11 @@
 import {
   createAudioPipeline
-} from "../chunk-RD4GDIPO.mjs";
-import "../chunk-DLLK6K76.mjs";
+} from "../chunk-WQVMSR7V.mjs";
+import "../chunk-KLBA2CPE.mjs";
 import "../chunk-OZ7KMC4S.mjs";
-import "../chunk-OXV7BHX5.mjs";
+import "../chunk-U26F3GJN.mjs";
 import "../chunk-XO6B3D4A.mjs";
-import "../chunk-FKR6NWZF.mjs";
+import "../chunk-GLKAWCEW.mjs";
 export {
   createAudioPipeline
 };

package/dist/types.d.mts CHANGED Viewed

@@ -70,8 +70,8 @@ interface AudioProcessingConfig {
         preRollMs?: number;
         /**
          * Minimum speech duration in ms to consider it valid speech.
-         * Filters out very brief noise spikes like keyboard clicks.
-         * Default: 150ms
+         * Filters out brief transients like keyboard clicks.
+         * Default: 250ms (aggressive transient rejection)
          */
         minSpeechDurationMs?: number;
         /**
@@ -95,31 +95,20 @@ interface AudioProcessingConfig {
              * Default: 0.001
              */
             initialNoiseFloor?: number;
-            /**
-             * Rate at which noise floor adapts to quiet signals (0-1).
-             * Default: 0.01
-             */
-            noiseFloorAdaptRateQuiet?: number;
-            /**
-             * Rate at which noise floor adapts to loud signals (0-1).
-             * Applied when instantaneous SNR < 12dB (background noise).
-             * Default: 0.1 (fast tracking of rising noise)
-             */
-            noiseFloorAdaptRateLoud?: number;
             /**
              * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
-             * Default: 10.0 (more aggressive noise rejection)
+             * Default: 8.0
              */
             minSNR?: number;
             /**
              * SNR range in dB for probability scaling.
-             * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
+             * Default: 12.0 (probability scales from minSNR to minSNR+snrRange)
              */
             snrRange?: number;
             /**
              * Minimum absolute RMS energy to consider as speech.
-             * Prevents triggering on very quiet background noise in silent rooms.
-             * Default: 0.001 (approx -60dB)
+             * Prevents triggering on quiet background noise.
+             * Default: 0.01 (approx -40dB, typical voice level)
              */
             minEnergy?: number;
         };

package/dist/types.d.ts CHANGED Viewed

@@ -70,8 +70,8 @@ interface AudioProcessingConfig {
         preRollMs?: number;
         /**
          * Minimum speech duration in ms to consider it valid speech.
-         * Filters out very brief noise spikes like keyboard clicks.
-         * Default: 150ms
+         * Filters out brief transients like keyboard clicks.
+         * Default: 250ms (aggressive transient rejection)
          */
         minSpeechDurationMs?: number;
         /**
@@ -95,31 +95,20 @@ interface AudioProcessingConfig {
              * Default: 0.001
              */
             initialNoiseFloor?: number;
-            /**
-             * Rate at which noise floor adapts to quiet signals (0-1).
-             * Default: 0.01
-             */
-            noiseFloorAdaptRateQuiet?: number;
-            /**
-             * Rate at which noise floor adapts to loud signals (0-1).
-             * Applied when instantaneous SNR < 12dB (background noise).
-             * Default: 0.1 (fast tracking of rising noise)
-             */
-            noiseFloorAdaptRateLoud?: number;
             /**
              * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
-             * Default: 10.0 (more aggressive noise rejection)
+             * Default: 8.0
              */
             minSNR?: number;
             /**
              * SNR range in dB for probability scaling.
-             * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
+             * Default: 12.0 (probability scales from minSNR to minSNR+snrRange)
              */
             snrRange?: number;
             /**
              * Minimum absolute RMS energy to consider as speech.
-             * Prevents triggering on very quiet background noise in silent rooms.
-             * Default: 0.001 (approx -60dB)
+             * Prevents triggering on quiet background noise.
+             * Default: 0.01 (approx -40dB, typical voice level)
              */
             minEnergy?: number;
         };

package/dist/vad/vad-node.js CHANGED Viewed

@@ -27,11 +27,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
   const energyParams = vadConfig?.energyVad || {};
   const smoothing = energyParams.smoothing ?? 0.95;
   const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
-  const minSNR = energyParams.minSNR ?? 10;
-  const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 1e-3;
+  const minSNR = energyParams.minSNR ?? 8;
+  const snrRange = energyParams.snrRange ?? 12;
+  const minEnergy = energyParams.minEnergy ?? 0.01;
   return `
 class EnergyVadProcessor extends AudioWorkletProcessor {
   constructor() {
@@ -39,8 +37,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     this.smoothing = ${smoothing};
     this.energy = 0;
     this.noiseFloor = ${initialNoiseFloor};
-    this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
-    this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
     this.minSNR = ${minSNR};
     this.snrRange = ${snrRange};
     this.minEnergy = ${minEnergy};
@@ -60,8 +56,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Calculate instantaneous RMS (Root Mean Square) energy
     let sum = 0;
+    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
+      const sample = Math.abs(channel[i]);
       sum += channel[i] * channel[i];
+      peak = Math.max(peak, sample);
     }
     const instantRms = Math.sqrt(sum / channel.length);
@@ -69,32 +68,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // this.energy acts as the smoothed RMS value
     this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Adaptive noise floor estimation
-    // We use a TWO-PASS approach to avoid circular dependencies:
-    // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
-    const instantSnr = instantRms / (this.noiseFloor + 1e-6);
-    const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
-    // Adapt the noise floor based on instantaneous SNR
-    if (instantRms < this.noiseFloor) {
-      // Signal is quieter than noise floor, adapt downwards quickly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
-    } else if (instantSnrDb < 12) {
-      // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
-      // Adapt upwards at normal rate to track rising noise
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
-    } else {
-      // Signal has high SNR (>= 12dB) - likely speech or transient
-      // Adapt VERY slowly to avoid "chasing" speech
-      const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
-      this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
-    }
+    // Calculate Crest Factor (peak-to-RMS ratio)
+    // Voice typically has crest factor of 2-4 (6-12dB)
+    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
+    const crestFactor = peak / (instantRms + 1e-10);
+    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
-    // Ensure noise floor doesn't drop to absolute zero
-    // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
-    this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
+    // FIXED noise floor with minimal adaptation
+    // Only adapt within strict bounds to prevent drift
+    const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
+    this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
+    // Hard clamp to prevent any drift outside acceptable range
+    this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
-    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
+    // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
     const snr = this.energy / (this.noiseFloor + 1e-6);
     const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
@@ -103,11 +91,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
     // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
     let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold
-    // We use a soft threshold to avoid abrupt cutting
+    // Apply absolute energy threshold with soft knee
     if (this.energy < this.minEnergy) {
       const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2); // Quadratic falloff
+      probability *= Math.pow(energyRatio, 2);
+    }
+    // Apply crest factor penalty
+    // Reject signals with high crest factor (sharp transients like keyboard clicks)
+    // Voice: 6-12dB, Keyboard: 20-30dB
+    // We penalize anything above 14dB
+    if (crestFactorDb > 14) {
+      const excess = crestFactorDb - 14;
+      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
+      probability *= penalty;
     }
     this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });

package/dist/vad/vad-node.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   EnergyVADPlugin
-} from "../chunk-FKR6NWZF.mjs";
+} from "../chunk-GLKAWCEW.mjs";
 export {
   EnergyVADPlugin
 };

package/dist/vad/vad-state.js CHANGED Viewed

@@ -44,17 +44,15 @@ var VADStateMachine = class {
       // Smooth for natural speech
       preRollMs: config?.preRollMs ?? 250,
       // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
-      // Increased to filter keyboard clicks
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
+      // Aggressive transient rejection
       minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
       energyVad: {
         smoothing: config?.energyVad?.smoothing ?? 0.95,
         initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
-        minSNR: config?.energyVad?.minSNR ?? 10,
-        snrRange: config?.energyVad?.snrRange ?? 10,
-        minEnergy: config?.energyVad?.minEnergy ?? 1e-3
+        minSNR: config?.energyVad?.minSNR ?? 8,
+        snrRange: config?.energyVad?.snrRange ?? 12,
+        minEnergy: config?.energyVad?.minEnergy ?? 0.01
       }
     };
     this.lastSilenceTime = Date.now();

package/dist/vad/vad-state.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
   VADStateMachine
-} from "../chunk-DLLK6K76.mjs";
+} from "../chunk-KLBA2CPE.mjs";
 export {
   VADStateMachine
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tensamin/audio",
-  "version": "0.1.13",
+  "version": "0.1.15",
   "main": "dist/index.js",
   "module": "dist/index.mjs",
   "types": "dist/index.d.ts",