@tensamin/audio 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -101,11 +101,9 @@ vad: {
101
101
  energyVad?: {
102
102
  smoothing: number; // Default: 0.95
103
103
  initialNoiseFloor: number; // Default: 0.001
104
- noiseFloorAdaptRateQuiet: number; // Default: 0.01
105
- noiseFloorAdaptRateLoud: number; // Default: 0.1
106
- minSNR: number; // Default: 10.0 (dB)
107
- snrRange: number; // Default: 10.0 (dB)
108
- minEnergy: number; // Default: 0.001
104
+ minSNR: number; // Default: 8.0 (dB)
105
+ snrRange: number; // Default: 12.0 (dB)
106
+ minEnergy: number; // Default: 0.01
109
107
  };
110
108
  }
111
109
  ```
@@ -116,7 +114,7 @@ vad: {
116
114
  - `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
117
115
  - `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
118
116
  - `preRollMs`: Audio buffer duration before speech onset
119
- - `minSpeechDurationMs`: Minimum duration to consider as valid speech (Default: 150ms)
117
+ - `minSpeechDurationMs`: Minimum duration to consider as valid speech (Default: 250ms)
120
118
  - `minSilenceDurationMs`: Minimum silence duration between speech segments
121
119
 
122
120
  **Energy VAD Parameters:**
@@ -124,7 +122,7 @@ vad: {
124
122
  - `smoothing`: Energy calculation smoothing factor (0-1)
125
123
  - `minSNR`: Minimum signal-to-noise ratio in dB for speech detection
126
124
  - `snrRange`: Range in dB for probability scaling from minSNR
127
- - `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.001, ~-60dB)
125
+ - `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.01, ~-40dB)
128
126
 
129
127
  ### Output Control
130
128
 
@@ -3,11 +3,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
3
3
  const energyParams = vadConfig?.energyVad || {};
4
4
  const smoothing = energyParams.smoothing ?? 0.95;
5
5
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
6
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
7
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
8
- const minSNR = energyParams.minSNR ?? 10;
9
- const snrRange = energyParams.snrRange ?? 10;
10
- const minEnergy = energyParams.minEnergy ?? 1e-3;
6
+ const minSNR = energyParams.minSNR ?? 8;
7
+ const snrRange = energyParams.snrRange ?? 12;
8
+ const minEnergy = energyParams.minEnergy ?? 0.01;
11
9
  return `
12
10
  class EnergyVadProcessor extends AudioWorkletProcessor {
13
11
  constructor() {
@@ -15,8 +13,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
15
13
  this.smoothing = ${smoothing};
16
14
  this.energy = 0;
17
15
  this.noiseFloor = ${initialNoiseFloor};
18
- this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
19
- this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
20
16
  this.minSNR = ${minSNR};
21
17
  this.snrRange = ${snrRange};
22
18
  this.minEnergy = ${minEnergy};
@@ -36,8 +32,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
36
32
 
37
33
  // Calculate instantaneous RMS (Root Mean Square) energy
38
34
  let sum = 0;
35
+ let peak = 0;
39
36
  for (let i = 0; i < channel.length; i++) {
37
+ const sample = Math.abs(channel[i]);
40
38
  sum += channel[i] * channel[i];
39
+ peak = Math.max(peak, sample);
41
40
  }
42
41
  const instantRms = Math.sqrt(sum / channel.length);
43
42
 
@@ -45,32 +44,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
45
44
  // this.energy acts as the smoothed RMS value
46
45
  this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
47
46
 
48
- // Adaptive noise floor estimation
49
- // We use a TWO-PASS approach to avoid circular dependencies:
50
- // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
51
- const instantSnr = instantRms / (this.noiseFloor + 1e-6);
52
- const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
53
-
54
- // Adapt the noise floor based on instantaneous SNR
55
- if (instantRms < this.noiseFloor) {
56
- // Signal is quieter than noise floor, adapt downwards quickly
57
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
58
- } else if (instantSnrDb < 12) {
59
- // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
60
- // Adapt upwards at normal rate to track rising noise
61
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
62
- } else {
63
- // Signal has high SNR (>= 12dB) - likely speech or transient
64
- // Adapt VERY slowly to avoid "chasing" speech
65
- const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
66
- this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
67
- }
47
+ // Calculate Crest Factor (peak-to-RMS ratio)
48
+ // Voice typically has crest factor of 2-4 (6-12dB)
49
+ // Keyboard clicks have crest factor of 10-30+ (20-30dB)
50
+ const crestFactor = peak / (instantRms + 1e-10);
51
+ const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
68
52
 
69
- // Ensure noise floor doesn't drop to absolute zero
70
- // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
71
- this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
53
+ // FIXED noise floor with minimal adaptation
54
+ // Only adapt within strict bounds to prevent drift
55
+ const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
56
+ this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
57
+
58
+ // Hard clamp to prevent any drift outside acceptable range
59
+ this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
72
60
 
73
- // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
61
+ // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
74
62
  const snr = this.energy / (this.noiseFloor + 1e-6);
75
63
  const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
76
64
 
@@ -79,11 +67,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
79
67
  // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
80
68
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
81
69
 
82
- // Apply absolute energy threshold
83
- // We use a soft threshold to avoid abrupt cutting
70
+ // Apply absolute energy threshold with soft knee
84
71
  if (this.energy < this.minEnergy) {
85
72
  const energyRatio = this.energy / (this.minEnergy + 1e-6);
86
- probability *= Math.pow(energyRatio, 2); // Quadratic falloff
73
+ probability *= Math.pow(energyRatio, 2);
74
+ }
75
+
76
+ // Apply crest factor penalty
77
+ // Reject signals with high crest factor (sharp transients like keyboard clicks)
78
+ // Voice: 6-12dB, Keyboard: 20-30dB
79
+ // We penalize anything above 14dB
80
+ if (crestFactorDb > 14) {
81
+ const excess = crestFactorDb - 14;
82
+ const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
83
+ probability *= penalty;
87
84
  }
88
85
 
89
86
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -20,17 +20,15 @@ var VADStateMachine = class {
20
20
  // Smooth for natural speech
21
21
  preRollMs: config?.preRollMs ?? 250,
22
22
  // Generous pre-roll
23
- minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
24
- // Increased to filter keyboard clicks
23
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
24
+ // Aggressive transient rejection
25
25
  minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
26
26
  energyVad: {
27
27
  smoothing: config?.energyVad?.smoothing ?? 0.95,
28
28
  initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
29
- noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
30
- noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
31
- minSNR: config?.energyVad?.minSNR ?? 10,
32
- snrRange: config?.energyVad?.snrRange ?? 10,
33
- minEnergy: config?.energyVad?.minEnergy ?? 1e-3
29
+ minSNR: config?.energyVad?.minSNR ?? 8,
30
+ snrRange: config?.energyVad?.snrRange ?? 12,
31
+ minEnergy: config?.energyVad?.minEnergy ?? 0.01
34
32
  }
35
33
  };
36
34
  this.lastSilenceTime = Date.now();
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "./chunk-RD4GDIPO.mjs";
3
+ } from "./chunk-WQVMSR7V.mjs";
4
4
 
5
5
  // src/livekit/integration.ts
6
6
  async function attachProcessingToTrack(track, config = {}) {
@@ -3,7 +3,7 @@ import {
3
3
  } from "./chunk-XO6B3D4A.mjs";
4
4
  import {
5
5
  EnergyVADPlugin
6
- } from "./chunk-FKR6NWZF.mjs";
6
+ } from "./chunk-GLKAWCEW.mjs";
7
7
 
8
8
  // src/extensibility/plugins.ts
9
9
  var nsPlugins = /* @__PURE__ */ new Map();
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  VADStateMachine
3
- } from "./chunk-DLLK6K76.mjs";
3
+ } from "./chunk-KLBA2CPE.mjs";
4
4
  import {
5
5
  getAudioContext,
6
6
  registerPipeline,
@@ -9,7 +9,7 @@ import {
9
9
  import {
10
10
  getNoiseSuppressionPlugin,
11
11
  getVADPlugin
12
- } from "./chunk-OXV7BHX5.mjs";
12
+ } from "./chunk-U26F3GJN.mjs";
13
13
 
14
14
  // src/pipeline/audio-pipeline.ts
15
15
  import mitt from "mitt";
@@ -37,10 +37,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
37
37
  energyVad: {
38
38
  smoothing: 0.95,
39
39
  initialNoiseFloor: 1e-3,
40
- noiseFloorAdaptRateQuiet: 0.01,
41
- noiseFloorAdaptRateLoud: 1e-3,
42
- minSNR: 2,
43
- snrRange: 8
40
+ minSNR: 8,
41
+ snrRange: 12,
42
+ minEnergy: 0.01
44
43
  },
45
44
  ...config.vad
46
45
  },
@@ -106,11 +106,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
106
106
  const energyParams = vadConfig?.energyVad || {};
107
107
  const smoothing = energyParams.smoothing ?? 0.95;
108
108
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
109
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
110
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
111
- const minSNR = energyParams.minSNR ?? 10;
112
- const snrRange = energyParams.snrRange ?? 10;
113
- const minEnergy = energyParams.minEnergy ?? 1e-3;
109
+ const minSNR = energyParams.minSNR ?? 8;
110
+ const snrRange = energyParams.snrRange ?? 12;
111
+ const minEnergy = energyParams.minEnergy ?? 0.01;
114
112
  return `
115
113
  class EnergyVadProcessor extends AudioWorkletProcessor {
116
114
  constructor() {
@@ -118,8 +116,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
118
116
  this.smoothing = ${smoothing};
119
117
  this.energy = 0;
120
118
  this.noiseFloor = ${initialNoiseFloor};
121
- this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
122
- this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
123
119
  this.minSNR = ${minSNR};
124
120
  this.snrRange = ${snrRange};
125
121
  this.minEnergy = ${minEnergy};
@@ -139,8 +135,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
139
135
 
140
136
  // Calculate instantaneous RMS (Root Mean Square) energy
141
137
  let sum = 0;
138
+ let peak = 0;
142
139
  for (let i = 0; i < channel.length; i++) {
140
+ const sample = Math.abs(channel[i]);
143
141
  sum += channel[i] * channel[i];
142
+ peak = Math.max(peak, sample);
144
143
  }
145
144
  const instantRms = Math.sqrt(sum / channel.length);
146
145
 
@@ -148,32 +147,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
148
147
  // this.energy acts as the smoothed RMS value
149
148
  this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
150
149
 
151
- // Adaptive noise floor estimation
152
- // We use a TWO-PASS approach to avoid circular dependencies:
153
- // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
154
- const instantSnr = instantRms / (this.noiseFloor + 1e-6);
155
- const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
156
-
157
- // Adapt the noise floor based on instantaneous SNR
158
- if (instantRms < this.noiseFloor) {
159
- // Signal is quieter than noise floor, adapt downwards quickly
160
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
161
- } else if (instantSnrDb < 12) {
162
- // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
163
- // Adapt upwards at normal rate to track rising noise
164
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
165
- } else {
166
- // Signal has high SNR (>= 12dB) - likely speech or transient
167
- // Adapt VERY slowly to avoid "chasing" speech
168
- const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
169
- this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
170
- }
150
+ // Calculate Crest Factor (peak-to-RMS ratio)
151
+ // Voice typically has crest factor of 2-4 (6-12dB)
152
+ // Keyboard clicks have crest factor of 10-30+ (20-30dB)
153
+ const crestFactor = peak / (instantRms + 1e-10);
154
+ const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
171
155
 
172
- // Ensure noise floor doesn't drop to absolute zero
173
- // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
174
- this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
156
+ // FIXED noise floor with minimal adaptation
157
+ // Only adapt within strict bounds to prevent drift
158
+ const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
159
+ this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
160
+
161
+ // Hard clamp to prevent any drift outside acceptable range
162
+ this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
175
163
 
176
- // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
164
+ // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
177
165
  const snr = this.energy / (this.noiseFloor + 1e-6);
178
166
  const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
179
167
 
@@ -182,11 +170,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
182
170
  // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
183
171
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
184
172
 
185
- // Apply absolute energy threshold
186
- // We use a soft threshold to avoid abrupt cutting
173
+ // Apply absolute energy threshold with soft knee
187
174
  if (this.energy < this.minEnergy) {
188
175
  const energyRatio = this.energy / (this.minEnergy + 1e-6);
189
- probability *= Math.pow(energyRatio, 2); // Quadratic falloff
176
+ probability *= Math.pow(energyRatio, 2);
177
+ }
178
+
179
+ // Apply crest factor penalty
180
+ // Reject signals with high crest factor (sharp transients like keyboard clicks)
181
+ // Voice: 6-12dB, Keyboard: 20-30dB
182
+ // We penalize anything above 14dB
183
+ if (crestFactorDb > 14) {
184
+ const excess = crestFactorDb - 14;
185
+ const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
186
+ probability *= penalty;
190
187
  }
191
188
 
192
189
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -3,9 +3,9 @@ import {
3
3
  getVADPlugin,
4
4
  registerNoiseSuppressionPlugin,
5
5
  registerVADPlugin
6
- } from "../chunk-OXV7BHX5.mjs";
6
+ } from "../chunk-U26F3GJN.mjs";
7
7
  import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-FKR6NWZF.mjs";
8
+ import "../chunk-GLKAWCEW.mjs";
9
9
  export {
10
10
  getNoiseSuppressionPlugin,
11
11
  getVADPlugin,
package/dist/index.js CHANGED
@@ -158,11 +158,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
158
158
  const energyParams = vadConfig?.energyVad || {};
159
159
  const smoothing = energyParams.smoothing ?? 0.95;
160
160
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
161
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
162
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
163
- const minSNR = energyParams.minSNR ?? 10;
164
- const snrRange = energyParams.snrRange ?? 10;
165
- const minEnergy = energyParams.minEnergy ?? 1e-3;
161
+ const minSNR = energyParams.minSNR ?? 8;
162
+ const snrRange = energyParams.snrRange ?? 12;
163
+ const minEnergy = energyParams.minEnergy ?? 0.01;
166
164
  return `
167
165
  class EnergyVadProcessor extends AudioWorkletProcessor {
168
166
  constructor() {
@@ -170,8 +168,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
170
168
  this.smoothing = ${smoothing};
171
169
  this.energy = 0;
172
170
  this.noiseFloor = ${initialNoiseFloor};
173
- this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
174
- this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
175
171
  this.minSNR = ${minSNR};
176
172
  this.snrRange = ${snrRange};
177
173
  this.minEnergy = ${minEnergy};
@@ -191,8 +187,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
191
187
 
192
188
  // Calculate instantaneous RMS (Root Mean Square) energy
193
189
  let sum = 0;
190
+ let peak = 0;
194
191
  for (let i = 0; i < channel.length; i++) {
192
+ const sample = Math.abs(channel[i]);
195
193
  sum += channel[i] * channel[i];
194
+ peak = Math.max(peak, sample);
196
195
  }
197
196
  const instantRms = Math.sqrt(sum / channel.length);
198
197
 
@@ -200,32 +199,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
200
199
  // this.energy acts as the smoothed RMS value
201
200
  this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
202
201
 
203
- // Adaptive noise floor estimation
204
- // We use a TWO-PASS approach to avoid circular dependencies:
205
- // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
206
- const instantSnr = instantRms / (this.noiseFloor + 1e-6);
207
- const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
208
-
209
- // Adapt the noise floor based on instantaneous SNR
210
- if (instantRms < this.noiseFloor) {
211
- // Signal is quieter than noise floor, adapt downwards quickly
212
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
213
- } else if (instantSnrDb < 12) {
214
- // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
215
- // Adapt upwards at normal rate to track rising noise
216
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
217
- } else {
218
- // Signal has high SNR (>= 12dB) - likely speech or transient
219
- // Adapt VERY slowly to avoid "chasing" speech
220
- const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
221
- this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
222
- }
202
+ // Calculate Crest Factor (peak-to-RMS ratio)
203
+ // Voice typically has crest factor of 2-4 (6-12dB)
204
+ // Keyboard clicks have crest factor of 10-30+ (20-30dB)
205
+ const crestFactor = peak / (instantRms + 1e-10);
206
+ const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
223
207
 
224
- // Ensure noise floor doesn't drop to absolute zero
225
- // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
226
- this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
208
+ // FIXED noise floor with minimal adaptation
209
+ // Only adapt within strict bounds to prevent drift
210
+ const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
211
+ this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
212
+
213
+ // Hard clamp to prevent any drift outside acceptable range
214
+ this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
227
215
 
228
- // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
216
+ // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
229
217
  const snr = this.energy / (this.noiseFloor + 1e-6);
230
218
  const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
231
219
 
@@ -234,11 +222,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
234
222
  // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
235
223
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
236
224
 
237
- // Apply absolute energy threshold
238
- // We use a soft threshold to avoid abrupt cutting
225
+ // Apply absolute energy threshold with soft knee
239
226
  if (this.energy < this.minEnergy) {
240
227
  const energyRatio = this.energy / (this.minEnergy + 1e-6);
241
- probability *= Math.pow(energyRatio, 2); // Quadratic falloff
228
+ probability *= Math.pow(energyRatio, 2);
229
+ }
230
+
231
+ // Apply crest factor penalty
232
+ // Reject signals with high crest factor (sharp transients like keyboard clicks)
233
+ // Voice: 6-12dB, Keyboard: 20-30dB
234
+ // We penalize anything above 14dB
235
+ if (crestFactorDb > 14) {
236
+ const excess = crestFactorDb - 14;
237
+ const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
238
+ probability *= penalty;
242
239
  }
243
240
 
244
241
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -367,17 +364,15 @@ var VADStateMachine = class {
367
364
  // Smooth for natural speech
368
365
  preRollMs: config?.preRollMs ?? 250,
369
366
  // Generous pre-roll
370
- minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
371
- // Increased to filter keyboard clicks
367
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
368
+ // Aggressive transient rejection
372
369
  minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
373
370
  energyVad: {
374
371
  smoothing: config?.energyVad?.smoothing ?? 0.95,
375
372
  initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
376
- noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
377
- noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
378
- minSNR: config?.energyVad?.minSNR ?? 10,
379
- snrRange: config?.energyVad?.snrRange ?? 10,
380
- minEnergy: config?.energyVad?.minEnergy ?? 1e-3
373
+ minSNR: config?.energyVad?.minSNR ?? 8,
374
+ snrRange: config?.energyVad?.snrRange ?? 12,
375
+ minEnergy: config?.energyVad?.minEnergy ?? 0.01
381
376
  }
382
377
  };
383
378
  this.lastSilenceTime = Date.now();
@@ -470,10 +465,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
470
465
  energyVad: {
471
466
  smoothing: 0.95,
472
467
  initialNoiseFloor: 1e-3,
473
- noiseFloorAdaptRateQuiet: 0.01,
474
- noiseFloorAdaptRateLoud: 1e-3,
475
- minSNR: 2,
476
- snrRange: 8
468
+ minSNR: 8,
469
+ snrRange: 12,
470
+ minEnergy: 0.01
477
471
  },
478
472
  ...config.vad
479
473
  },
package/dist/index.mjs CHANGED
@@ -1,13 +1,13 @@
1
1
  import "./chunk-WBQAMGXK.mjs";
2
2
  import {
3
3
  attachProcessingToTrack
4
- } from "./chunk-K6X52R7N.mjs";
4
+ } from "./chunk-QQFKHTCQ.mjs";
5
5
  import {
6
6
  createAudioPipeline
7
- } from "./chunk-RD4GDIPO.mjs";
7
+ } from "./chunk-WQVMSR7V.mjs";
8
8
  import {
9
9
  VADStateMachine
10
- } from "./chunk-DLLK6K76.mjs";
10
+ } from "./chunk-KLBA2CPE.mjs";
11
11
  import {
12
12
  closeAudioContext,
13
13
  getAudioContext,
@@ -21,13 +21,13 @@ import {
21
21
  getVADPlugin,
22
22
  registerNoiseSuppressionPlugin,
23
23
  registerVADPlugin
24
- } from "./chunk-OXV7BHX5.mjs";
24
+ } from "./chunk-U26F3GJN.mjs";
25
25
  import {
26
26
  RNNoisePlugin
27
27
  } from "./chunk-XO6B3D4A.mjs";
28
28
  import {
29
29
  EnergyVADPlugin
30
- } from "./chunk-FKR6NWZF.mjs";
30
+ } from "./chunk-GLKAWCEW.mjs";
31
31
  export {
32
32
  EnergyVADPlugin,
33
33
  RNNoisePlugin,
@@ -127,11 +127,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
127
127
  const energyParams = vadConfig?.energyVad || {};
128
128
  const smoothing = energyParams.smoothing ?? 0.95;
129
129
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
130
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
131
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
132
- const minSNR = energyParams.minSNR ?? 10;
133
- const snrRange = energyParams.snrRange ?? 10;
134
- const minEnergy = energyParams.minEnergy ?? 1e-3;
130
+ const minSNR = energyParams.minSNR ?? 8;
131
+ const snrRange = energyParams.snrRange ?? 12;
132
+ const minEnergy = energyParams.minEnergy ?? 0.01;
135
133
  return `
136
134
  class EnergyVadProcessor extends AudioWorkletProcessor {
137
135
  constructor() {
@@ -139,8 +137,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
139
137
  this.smoothing = ${smoothing};
140
138
  this.energy = 0;
141
139
  this.noiseFloor = ${initialNoiseFloor};
142
- this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
143
- this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
144
140
  this.minSNR = ${minSNR};
145
141
  this.snrRange = ${snrRange};
146
142
  this.minEnergy = ${minEnergy};
@@ -160,8 +156,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
160
156
 
161
157
  // Calculate instantaneous RMS (Root Mean Square) energy
162
158
  let sum = 0;
159
+ let peak = 0;
163
160
  for (let i = 0; i < channel.length; i++) {
161
+ const sample = Math.abs(channel[i]);
164
162
  sum += channel[i] * channel[i];
163
+ peak = Math.max(peak, sample);
165
164
  }
166
165
  const instantRms = Math.sqrt(sum / channel.length);
167
166
 
@@ -169,32 +168,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
169
168
  // this.energy acts as the smoothed RMS value
170
169
  this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
171
170
 
172
- // Adaptive noise floor estimation
173
- // We use a TWO-PASS approach to avoid circular dependencies:
174
- // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
175
- const instantSnr = instantRms / (this.noiseFloor + 1e-6);
176
- const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
177
-
178
- // Adapt the noise floor based on instantaneous SNR
179
- if (instantRms < this.noiseFloor) {
180
- // Signal is quieter than noise floor, adapt downwards quickly
181
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
182
- } else if (instantSnrDb < 12) {
183
- // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
184
- // Adapt upwards at normal rate to track rising noise
185
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
186
- } else {
187
- // Signal has high SNR (>= 12dB) - likely speech or transient
188
- // Adapt VERY slowly to avoid "chasing" speech
189
- const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
190
- this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
191
- }
171
+ // Calculate Crest Factor (peak-to-RMS ratio)
172
+ // Voice typically has crest factor of 2-4 (6-12dB)
173
+ // Keyboard clicks have crest factor of 10-30+ (20-30dB)
174
+ const crestFactor = peak / (instantRms + 1e-10);
175
+ const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
192
176
 
193
- // Ensure noise floor doesn't drop to absolute zero
194
- // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
195
- this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
177
+ // FIXED noise floor with minimal adaptation
178
+ // Only adapt within strict bounds to prevent drift
179
+ const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
180
+ this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
181
+
182
+ // Hard clamp to prevent any drift outside acceptable range
183
+ this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
196
184
 
197
- // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
185
+ // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
198
186
  const snr = this.energy / (this.noiseFloor + 1e-6);
199
187
  const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
200
188
 
@@ -203,11 +191,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
203
191
  // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
204
192
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
205
193
 
206
- // Apply absolute energy threshold
207
- // We use a soft threshold to avoid abrupt cutting
194
+ // Apply absolute energy threshold with soft knee
208
195
  if (this.energy < this.minEnergy) {
209
196
  const energyRatio = this.energy / (this.minEnergy + 1e-6);
210
- probability *= Math.pow(energyRatio, 2); // Quadratic falloff
197
+ probability *= Math.pow(energyRatio, 2);
198
+ }
199
+
200
+ // Apply crest factor penalty
201
+ // Reject signals with high crest factor (sharp transients like keyboard clicks)
202
+ // Voice: 6-12dB, Keyboard: 20-30dB
203
+ // We penalize anything above 14dB
204
+ if (crestFactorDb > 14) {
205
+ const excess = crestFactorDb - 14;
206
+ const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
207
+ probability *= penalty;
211
208
  }
212
209
 
213
210
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -330,17 +327,15 @@ var VADStateMachine = class {
330
327
  // Smooth for natural speech
331
328
  preRollMs: config?.preRollMs ?? 250,
332
329
  // Generous pre-roll
333
- minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
334
- // Increased to filter keyboard clicks
330
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
331
+ // Aggressive transient rejection
335
332
  minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
336
333
  energyVad: {
337
334
  smoothing: config?.energyVad?.smoothing ?? 0.95,
338
335
  initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
339
- noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
340
- noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
341
- minSNR: config?.energyVad?.minSNR ?? 10,
342
- snrRange: config?.energyVad?.snrRange ?? 10,
343
- minEnergy: config?.energyVad?.minEnergy ?? 1e-3
336
+ minSNR: config?.energyVad?.minSNR ?? 8,
337
+ snrRange: config?.energyVad?.snrRange ?? 12,
338
+ minEnergy: config?.energyVad?.minEnergy ?? 0.01
344
339
  }
345
340
  };
346
341
  this.lastSilenceTime = Date.now();
@@ -433,10 +428,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
433
428
  energyVad: {
434
429
  smoothing: 0.95,
435
430
  initialNoiseFloor: 1e-3,
436
- noiseFloorAdaptRateQuiet: 0.01,
437
- noiseFloorAdaptRateLoud: 1e-3,
438
- minSNR: 2,
439
- snrRange: 8
431
+ minSNR: 8,
432
+ snrRange: 12,
433
+ minEnergy: 0.01
440
434
  },
441
435
  ...config.vad
442
436
  },
@@ -1,12 +1,12 @@
1
1
  import {
2
2
  attachProcessingToTrack
3
- } from "../chunk-K6X52R7N.mjs";
4
- import "../chunk-RD4GDIPO.mjs";
5
- import "../chunk-DLLK6K76.mjs";
3
+ } from "../chunk-QQFKHTCQ.mjs";
4
+ import "../chunk-WQVMSR7V.mjs";
5
+ import "../chunk-KLBA2CPE.mjs";
6
6
  import "../chunk-OZ7KMC4S.mjs";
7
- import "../chunk-OXV7BHX5.mjs";
7
+ import "../chunk-U26F3GJN.mjs";
8
8
  import "../chunk-XO6B3D4A.mjs";
9
- import "../chunk-FKR6NWZF.mjs";
9
+ import "../chunk-GLKAWCEW.mjs";
10
10
  export {
11
11
  attachProcessingToTrack
12
12
  };
@@ -125,11 +125,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
125
125
  const energyParams = vadConfig?.energyVad || {};
126
126
  const smoothing = energyParams.smoothing ?? 0.95;
127
127
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
128
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
129
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
130
- const minSNR = energyParams.minSNR ?? 10;
131
- const snrRange = energyParams.snrRange ?? 10;
132
- const minEnergy = energyParams.minEnergy ?? 1e-3;
128
+ const minSNR = energyParams.minSNR ?? 8;
129
+ const snrRange = energyParams.snrRange ?? 12;
130
+ const minEnergy = energyParams.minEnergy ?? 0.01;
133
131
  return `
134
132
  class EnergyVadProcessor extends AudioWorkletProcessor {
135
133
  constructor() {
@@ -137,8 +135,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
137
135
  this.smoothing = ${smoothing};
138
136
  this.energy = 0;
139
137
  this.noiseFloor = ${initialNoiseFloor};
140
- this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
141
- this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
142
138
  this.minSNR = ${minSNR};
143
139
  this.snrRange = ${snrRange};
144
140
  this.minEnergy = ${minEnergy};
@@ -158,8 +154,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
158
154
 
159
155
  // Calculate instantaneous RMS (Root Mean Square) energy
160
156
  let sum = 0;
157
+ let peak = 0;
161
158
  for (let i = 0; i < channel.length; i++) {
159
+ const sample = Math.abs(channel[i]);
162
160
  sum += channel[i] * channel[i];
161
+ peak = Math.max(peak, sample);
163
162
  }
164
163
  const instantRms = Math.sqrt(sum / channel.length);
165
164
 
@@ -167,32 +166,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
167
166
  // this.energy acts as the smoothed RMS value
168
167
  this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
169
168
 
170
- // Adaptive noise floor estimation
171
- // We use a TWO-PASS approach to avoid circular dependencies:
172
- // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
173
- const instantSnr = instantRms / (this.noiseFloor + 1e-6);
174
- const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
175
-
176
- // Adapt the noise floor based on instantaneous SNR
177
- if (instantRms < this.noiseFloor) {
178
- // Signal is quieter than noise floor, adapt downwards quickly
179
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
180
- } else if (instantSnrDb < 12) {
181
- // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
182
- // Adapt upwards at normal rate to track rising noise
183
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
184
- } else {
185
- // Signal has high SNR (>= 12dB) - likely speech or transient
186
- // Adapt VERY slowly to avoid "chasing" speech
187
- const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
188
- this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
189
- }
169
+ // Calculate Crest Factor (peak-to-RMS ratio)
170
+ // Voice typically has crest factor of 2-4 (6-12dB)
171
+ // Keyboard clicks have crest factor of 10-30+ (20-30dB)
172
+ const crestFactor = peak / (instantRms + 1e-10);
173
+ const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
190
174
 
191
- // Ensure noise floor doesn't drop to absolute zero
192
- // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
193
- this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
175
+ // FIXED noise floor with minimal adaptation
176
+ // Only adapt within strict bounds to prevent drift
177
+ const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
178
+ this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
179
+
180
+ // Hard clamp to prevent any drift outside acceptable range
181
+ this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
194
182
 
195
- // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
183
+ // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
196
184
  const snr = this.energy / (this.noiseFloor + 1e-6);
197
185
  const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
198
186
 
@@ -201,11 +189,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
201
189
  // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
202
190
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
203
191
 
204
- // Apply absolute energy threshold
205
- // We use a soft threshold to avoid abrupt cutting
192
+ // Apply absolute energy threshold with soft knee
206
193
  if (this.energy < this.minEnergy) {
207
194
  const energyRatio = this.energy / (this.minEnergy + 1e-6);
208
- probability *= Math.pow(energyRatio, 2); // Quadratic falloff
195
+ probability *= Math.pow(energyRatio, 2);
196
+ }
197
+
198
+ // Apply crest factor penalty
199
+ // Reject signals with high crest factor (sharp transients like keyboard clicks)
200
+ // Voice: 6-12dB, Keyboard: 20-30dB
201
+ // We penalize anything above 14dB
202
+ if (crestFactorDb > 14) {
203
+ const excess = crestFactorDb - 14;
204
+ const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
205
+ probability *= penalty;
209
206
  }
210
207
 
211
208
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -328,17 +325,15 @@ var VADStateMachine = class {
328
325
  // Smooth for natural speech
329
326
  preRollMs: config?.preRollMs ?? 250,
330
327
  // Generous pre-roll
331
- minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
332
- // Increased to filter keyboard clicks
328
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
329
+ // Aggressive transient rejection
333
330
  minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
334
331
  energyVad: {
335
332
  smoothing: config?.energyVad?.smoothing ?? 0.95,
336
333
  initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
337
- noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
338
- noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
339
- minSNR: config?.energyVad?.minSNR ?? 10,
340
- snrRange: config?.energyVad?.snrRange ?? 10,
341
- minEnergy: config?.energyVad?.minEnergy ?? 1e-3
334
+ minSNR: config?.energyVad?.minSNR ?? 8,
335
+ snrRange: config?.energyVad?.snrRange ?? 12,
336
+ minEnergy: config?.energyVad?.minEnergy ?? 0.01
342
337
  }
343
338
  };
344
339
  this.lastSilenceTime = Date.now();
@@ -431,10 +426,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
431
426
  energyVad: {
432
427
  smoothing: 0.95,
433
428
  initialNoiseFloor: 1e-3,
434
- noiseFloorAdaptRateQuiet: 0.01,
435
- noiseFloorAdaptRateLoud: 1e-3,
436
- minSNR: 2,
437
- snrRange: 8
429
+ minSNR: 8,
430
+ snrRange: 12,
431
+ minEnergy: 0.01
438
432
  },
439
433
  ...config.vad
440
434
  },
@@ -1,11 +1,11 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "../chunk-RD4GDIPO.mjs";
4
- import "../chunk-DLLK6K76.mjs";
3
+ } from "../chunk-WQVMSR7V.mjs";
4
+ import "../chunk-KLBA2CPE.mjs";
5
5
  import "../chunk-OZ7KMC4S.mjs";
6
- import "../chunk-OXV7BHX5.mjs";
6
+ import "../chunk-U26F3GJN.mjs";
7
7
  import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-FKR6NWZF.mjs";
8
+ import "../chunk-GLKAWCEW.mjs";
9
9
  export {
10
10
  createAudioPipeline
11
11
  };
package/dist/types.d.mts CHANGED
@@ -70,8 +70,8 @@ interface AudioProcessingConfig {
70
70
  preRollMs?: number;
71
71
  /**
72
72
  * Minimum speech duration in ms to consider it valid speech.
73
- * Filters out very brief noise spikes like keyboard clicks.
74
- * Default: 150ms
73
+ * Filters out brief transients like keyboard clicks.
74
+ * Default: 250ms (aggressive transient rejection)
75
75
  */
76
76
  minSpeechDurationMs?: number;
77
77
  /**
@@ -95,31 +95,20 @@ interface AudioProcessingConfig {
95
95
  * Default: 0.001
96
96
  */
97
97
  initialNoiseFloor?: number;
98
- /**
99
- * Rate at which noise floor adapts to quiet signals (0-1).
100
- * Default: 0.01
101
- */
102
- noiseFloorAdaptRateQuiet?: number;
103
- /**
104
- * Rate at which noise floor adapts to loud signals (0-1).
105
- * Applied when instantaneous SNR < 12dB (background noise).
106
- * Default: 0.1 (fast tracking of rising noise)
107
- */
108
- noiseFloorAdaptRateLoud?: number;
109
98
  /**
110
99
  * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
111
- * Default: 10.0 (more aggressive noise rejection)
100
+ * Default: 8.0
112
101
  */
113
102
  minSNR?: number;
114
103
  /**
115
104
  * SNR range in dB for probability scaling.
116
- * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
105
+ * Default: 12.0 (probability scales from minSNR to minSNR+snrRange)
117
106
  */
118
107
  snrRange?: number;
119
108
  /**
120
109
  * Minimum absolute RMS energy to consider as speech.
121
- * Prevents triggering on very quiet background noise in silent rooms.
122
- * Default: 0.001 (approx -60dB)
110
+ * Prevents triggering on quiet background noise.
111
+ * Default: 0.01 (approx -40dB, typical voice level)
123
112
  */
124
113
  minEnergy?: number;
125
114
  };
package/dist/types.d.ts CHANGED
@@ -70,8 +70,8 @@ interface AudioProcessingConfig {
70
70
  preRollMs?: number;
71
71
  /**
72
72
  * Minimum speech duration in ms to consider it valid speech.
73
- * Filters out very brief noise spikes like keyboard clicks.
74
- * Default: 150ms
73
+ * Filters out brief transients like keyboard clicks.
74
+ * Default: 250ms (aggressive transient rejection)
75
75
  */
76
76
  minSpeechDurationMs?: number;
77
77
  /**
@@ -95,31 +95,20 @@ interface AudioProcessingConfig {
95
95
  * Default: 0.001
96
96
  */
97
97
  initialNoiseFloor?: number;
98
- /**
99
- * Rate at which noise floor adapts to quiet signals (0-1).
100
- * Default: 0.01
101
- */
102
- noiseFloorAdaptRateQuiet?: number;
103
- /**
104
- * Rate at which noise floor adapts to loud signals (0-1).
105
- * Applied when instantaneous SNR < 12dB (background noise).
106
- * Default: 0.1 (fast tracking of rising noise)
107
- */
108
- noiseFloorAdaptRateLoud?: number;
109
98
  /**
110
99
  * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
111
- * Default: 10.0 (more aggressive noise rejection)
100
+ * Default: 8.0
112
101
  */
113
102
  minSNR?: number;
114
103
  /**
115
104
  * SNR range in dB for probability scaling.
116
- * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
105
+ * Default: 12.0 (probability scales from minSNR to minSNR+snrRange)
117
106
  */
118
107
  snrRange?: number;
119
108
  /**
120
109
  * Minimum absolute RMS energy to consider as speech.
121
- * Prevents triggering on very quiet background noise in silent rooms.
122
- * Default: 0.001 (approx -60dB)
110
+ * Prevents triggering on quiet background noise.
111
+ * Default: 0.01 (approx -40dB, typical voice level)
123
112
  */
124
113
  minEnergy?: number;
125
114
  };
@@ -27,11 +27,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
27
27
  const energyParams = vadConfig?.energyVad || {};
28
28
  const smoothing = energyParams.smoothing ?? 0.95;
29
29
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
30
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
31
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.1;
32
- const minSNR = energyParams.minSNR ?? 10;
33
- const snrRange = energyParams.snrRange ?? 10;
34
- const minEnergy = energyParams.minEnergy ?? 1e-3;
30
+ const minSNR = energyParams.minSNR ?? 8;
31
+ const snrRange = energyParams.snrRange ?? 12;
32
+ const minEnergy = energyParams.minEnergy ?? 0.01;
35
33
  return `
36
34
  class EnergyVadProcessor extends AudioWorkletProcessor {
37
35
  constructor() {
@@ -39,8 +37,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
39
37
  this.smoothing = ${smoothing};
40
38
  this.energy = 0;
41
39
  this.noiseFloor = ${initialNoiseFloor};
42
- this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
43
- this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
44
40
  this.minSNR = ${minSNR};
45
41
  this.snrRange = ${snrRange};
46
42
  this.minEnergy = ${minEnergy};
@@ -60,8 +56,11 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
60
56
 
61
57
  // Calculate instantaneous RMS (Root Mean Square) energy
62
58
  let sum = 0;
59
+ let peak = 0;
63
60
  for (let i = 0; i < channel.length; i++) {
61
+ const sample = Math.abs(channel[i]);
64
62
  sum += channel[i] * channel[i];
63
+ peak = Math.max(peak, sample);
65
64
  }
66
65
  const instantRms = Math.sqrt(sum / channel.length);
67
66
 
@@ -69,32 +68,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
69
68
  // this.energy acts as the smoothed RMS value
70
69
  this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
71
70
 
72
- // Adaptive noise floor estimation
73
- // We use a TWO-PASS approach to avoid circular dependencies:
74
- // FIRST PASS: Calculate instantaneous SNR to decide how to adapt
75
- const instantSnr = instantRms / (this.noiseFloor + 1e-6);
76
- const instantSnrDb = 20 * Math.log10(Math.max(1e-6, instantSnr));
77
-
78
- // Adapt the noise floor based on instantaneous SNR
79
- if (instantRms < this.noiseFloor) {
80
- // Signal is quieter than noise floor, adapt downwards quickly
81
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
82
- } else if (instantSnrDb < 12) {
83
- // Signal is louder but SNR is low (< 12dB) - likely just louder background noise
84
- // Adapt upwards at normal rate to track rising noise
85
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + instantRms * this.noiseFloorAdaptRateLoud;
86
- } else {
87
- // Signal has high SNR (>= 12dB) - likely speech or transient
88
- // Adapt VERY slowly to avoid "chasing" speech
89
- const slowRate = this.noiseFloorAdaptRateLoud * 0.02;
90
- this.noiseFloor = this.noiseFloor * (1 - slowRate) + instantRms * slowRate;
91
- }
71
+ // Calculate Crest Factor (peak-to-RMS ratio)
72
+ // Voice typically has crest factor of 2-4 (6-12dB)
73
+ // Keyboard clicks have crest factor of 10-30+ (20-30dB)
74
+ const crestFactor = peak / (instantRms + 1e-10);
75
+ const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
92
76
 
93
- // Ensure noise floor doesn't drop to absolute zero
94
- // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
95
- this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
77
+ // FIXED noise floor with minimal adaptation
78
+ // Only adapt within strict bounds to prevent drift
79
+ const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
80
+ this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
81
+
82
+ // Hard clamp to prevent any drift outside acceptable range
83
+ this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
96
84
 
97
- // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
85
+ // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
98
86
  const snr = this.energy / (this.noiseFloor + 1e-6);
99
87
  const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
100
88
 
@@ -103,11 +91,20 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
103
91
  // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
104
92
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
105
93
 
106
- // Apply absolute energy threshold
107
- // We use a soft threshold to avoid abrupt cutting
94
+ // Apply absolute energy threshold with soft knee
108
95
  if (this.energy < this.minEnergy) {
109
96
  const energyRatio = this.energy / (this.minEnergy + 1e-6);
110
- probability *= Math.pow(energyRatio, 2); // Quadratic falloff
97
+ probability *= Math.pow(energyRatio, 2);
98
+ }
99
+
100
+ // Apply crest factor penalty
101
+ // Reject signals with high crest factor (sharp transients like keyboard clicks)
102
+ // Voice: 6-12dB, Keyboard: 20-30dB
103
+ // We penalize anything above 14dB
104
+ if (crestFactorDb > 14) {
105
+ const excess = crestFactorDb - 14;
106
+ const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
107
+ probability *= penalty;
111
108
  }
112
109
 
113
110
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  EnergyVADPlugin
3
- } from "../chunk-FKR6NWZF.mjs";
3
+ } from "../chunk-GLKAWCEW.mjs";
4
4
  export {
5
5
  EnergyVADPlugin
6
6
  };
@@ -44,17 +44,15 @@ var VADStateMachine = class {
44
44
  // Smooth for natural speech
45
45
  preRollMs: config?.preRollMs ?? 250,
46
46
  // Generous pre-roll
47
- minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
48
- // Increased to filter keyboard clicks
47
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
48
+ // Aggressive transient rejection
49
49
  minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
50
50
  energyVad: {
51
51
  smoothing: config?.energyVad?.smoothing ?? 0.95,
52
52
  initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
53
- noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 5e-3,
54
- noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.1,
55
- minSNR: config?.energyVad?.minSNR ?? 10,
56
- snrRange: config?.energyVad?.snrRange ?? 10,
57
- minEnergy: config?.energyVad?.minEnergy ?? 1e-3
53
+ minSNR: config?.energyVad?.minSNR ?? 8,
54
+ snrRange: config?.energyVad?.snrRange ?? 12,
55
+ minEnergy: config?.energyVad?.minEnergy ?? 0.01
58
56
  }
59
57
  };
60
58
  this.lastSilenceTime = Date.now();
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  VADStateMachine
3
- } from "../chunk-DLLK6K76.mjs";
3
+ } from "../chunk-KLBA2CPE.mjs";
4
4
  export {
5
5
  VADStateMachine
6
6
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tensamin/audio",
3
- "version": "0.1.13",
3
+ "version": "0.1.15",
4
4
  "main": "dist/index.js",
5
5
  "module": "dist/index.mjs",
6
6
  "types": "dist/index.d.ts",