@tensamin/audio 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -102,9 +102,9 @@ vad: {
102
102
  smoothing: number; // Default: 0.95
103
103
  initialNoiseFloor: number; // Default: 0.001
104
104
  noiseFloorAdaptRateQuiet: number; // Default: 0.05
105
- noiseFloorAdaptRateLoud: number; // Default: 0.005
106
- minSNR: number; // Default: 6.0 (dB)
107
- snrRange: number; // Default: 12.0 (dB)
105
+ noiseFloorAdaptRateLoud: number; // Default: 0.01
106
+ minSNR: number; // Default: 10.0 (dB)
107
+ snrRange: number; // Default: 10.0 (dB)
108
108
  minEnergy: number; // Default: 0.0005
109
109
  };
110
110
  }
@@ -112,9 +112,9 @@ vad: {
112
112
 
113
113
  **Threshold Parameters:**
114
114
 
115
- - `startThreshold`: Probability threshold to unmute audio (Default: 0.6, ~13.2dB SNR)
116
- - `stopThreshold`: Probability threshold to mute audio (Default: 0.45, ~11.4dB SNR)
117
- - `hangoverMs`: Delay before muting after speech stops
115
+ - `startThreshold`: Probability threshold to unmute audio (Default: 0.8, ~18dB SNR)
116
+ - `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
117
+ - `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
118
118
  - `preRollMs`: Audio buffer duration before speech onset
119
119
  - `minSpeechDurationMs`: Minimum duration to consider as valid speech
120
120
  - `minSilenceDurationMs`: Minimum silence duration between speech segments
@@ -4,9 +4,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
4
4
  const smoothing = energyParams.smoothing ?? 0.95;
5
5
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
6
6
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
7
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
8
- const minSNR = energyParams.minSNR ?? 6;
9
- const snrRange = energyParams.snrRange ?? 12;
7
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
8
+ const minSNR = energyParams.minSNR ?? 10;
9
+ const snrRange = energyParams.snrRange ?? 10;
10
10
  const minEnergy = energyParams.minEnergy ?? 5e-4;
11
11
  return `
12
12
  class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -52,9 +52,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
52
52
  this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
53
53
  } else {
54
54
  // If signal is louder, adapt upwards
55
- // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
56
- // If we are silent, adapt at the normal loud rate
57
- const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
55
+ // We use a multi-stage adaptation rate:
56
+ // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
57
+ // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
58
+ // 3. Otherwise, adapt at the normal loud rate
59
+ const snr = instantRms / (this.noiseFloor + 1e-6);
60
+ const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
61
+
62
+ let multiplier = 1.0;
63
+ if (this.isSpeaking) {
64
+ multiplier = 0.01;
65
+ } else if (snrDb > 20) {
66
+ multiplier = 0.1;
67
+ }
68
+
69
+ const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
58
70
  this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
59
71
  }
60
72
 
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  VADStateMachine
3
- } from "./chunk-2EX3FXSF.mjs";
3
+ } from "./chunk-KGCEV2VT.mjs";
4
4
  import {
5
5
  getAudioContext,
6
6
  registerPipeline,
@@ -9,7 +9,7 @@ import {
9
9
  import {
10
10
  getNoiseSuppressionPlugin,
11
11
  getVADPlugin
12
- } from "./chunk-BMVZ3KKG.mjs";
12
+ } from "./chunk-FOGC2MFA.mjs";
13
13
 
14
14
  // src/pipeline/audio-pipeline.ts
15
15
  import mitt from "mitt";
@@ -3,7 +3,7 @@ import {
3
3
  } from "./chunk-XO6B3D4A.mjs";
4
4
  import {
5
5
  EnergyVADPlugin
6
- } from "./chunk-2TKYGFMC.mjs";
6
+ } from "./chunk-3A2CTC4K.mjs";
7
7
 
8
8
  // src/extensibility/plugins.ts
9
9
  var nsPlugins = /* @__PURE__ */ new Map();
@@ -12,23 +12,24 @@ var VADStateMachine = class {
12
12
  enabled: config?.enabled ?? true,
13
13
  pluginName: config?.pluginName ?? "energy-vad",
14
14
  // Voice-optimized defaults
15
- startThreshold: config?.startThreshold ?? 0.6,
15
+ startThreshold: config?.startThreshold ?? 0.8,
16
16
  // Higher threshold to avoid noise
17
- stopThreshold: config?.stopThreshold ?? 0.45,
17
+ stopThreshold: config?.stopThreshold ?? 0.3,
18
18
  // Balanced for voice
19
- hangoverMs: config?.hangoverMs ?? 400,
19
+ hangoverMs: config?.hangoverMs ?? 300,
20
20
  // Smooth for natural speech
21
21
  preRollMs: config?.preRollMs ?? 250,
22
22
  // Generous pre-roll
23
- minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
23
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
24
+ // Increased to filter keyboard clicks
24
25
  minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
25
26
  energyVad: {
26
27
  smoothing: config?.energyVad?.smoothing ?? 0.95,
27
28
  initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
28
29
  noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
29
- noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 5e-3,
30
- minSNR: config?.energyVad?.minSNR ?? 6,
31
- snrRange: config?.energyVad?.snrRange ?? 12,
30
+ noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
31
+ minSNR: config?.energyVad?.minSNR ?? 10,
32
+ snrRange: config?.energyVad?.snrRange ?? 10,
32
33
  minEnergy: config?.energyVad?.minEnergy ?? 5e-4
33
34
  }
34
35
  };
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "./chunk-JP6DA62Y.mjs";
3
+ } from "./chunk-E7NH2QKZ.mjs";
4
4
 
5
5
  // src/livekit/integration.ts
6
6
  async function attachProcessingToTrack(track, config = {}) {
@@ -107,9 +107,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
107
107
  const smoothing = energyParams.smoothing ?? 0.95;
108
108
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
109
109
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
110
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
111
- const minSNR = energyParams.minSNR ?? 6;
112
- const snrRange = energyParams.snrRange ?? 12;
110
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
111
+ const minSNR = energyParams.minSNR ?? 10;
112
+ const snrRange = energyParams.snrRange ?? 10;
113
113
  const minEnergy = energyParams.minEnergy ?? 5e-4;
114
114
  return `
115
115
  class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -155,9 +155,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
155
155
  this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
156
156
  } else {
157
157
  // If signal is louder, adapt upwards
158
- // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
159
- // If we are silent, adapt at the normal loud rate
160
- const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
158
+ // We use a multi-stage adaptation rate:
159
+ // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
160
+ // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
161
+ // 3. Otherwise, adapt at the normal loud rate
162
+ const snr = instantRms / (this.noiseFloor + 1e-6);
163
+ const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
164
+
165
+ let multiplier = 1.0;
166
+ if (this.isSpeaking) {
167
+ multiplier = 0.01;
168
+ } else if (snrDb > 20) {
169
+ multiplier = 0.1;
170
+ }
171
+
172
+ const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
161
173
  this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
162
174
  }
163
175
 
@@ -3,9 +3,9 @@ import {
3
3
  getVADPlugin,
4
4
  registerNoiseSuppressionPlugin,
5
5
  registerVADPlugin
6
- } from "../chunk-BMVZ3KKG.mjs";
6
+ } from "../chunk-FOGC2MFA.mjs";
7
7
  import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-2TKYGFMC.mjs";
8
+ import "../chunk-3A2CTC4K.mjs";
9
9
  export {
10
10
  getNoiseSuppressionPlugin,
11
11
  getVADPlugin,
package/dist/index.js CHANGED
@@ -159,9 +159,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
159
159
  const smoothing = energyParams.smoothing ?? 0.95;
160
160
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
161
161
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
162
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
163
- const minSNR = energyParams.minSNR ?? 6;
164
- const snrRange = energyParams.snrRange ?? 12;
162
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
163
+ const minSNR = energyParams.minSNR ?? 10;
164
+ const snrRange = energyParams.snrRange ?? 10;
165
165
  const minEnergy = energyParams.minEnergy ?? 5e-4;
166
166
  return `
167
167
  class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -207,9 +207,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
207
207
  this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
208
208
  } else {
209
209
  // If signal is louder, adapt upwards
210
- // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
211
- // If we are silent, adapt at the normal loud rate
212
- const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
210
+ // We use a multi-stage adaptation rate:
211
+ // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
212
+ // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
213
+ // 3. Otherwise, adapt at the normal loud rate
214
+ const snr = instantRms / (this.noiseFloor + 1e-6);
215
+ const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
216
+
217
+ let multiplier = 1.0;
218
+ if (this.isSpeaking) {
219
+ multiplier = 0.01;
220
+ } else if (snrDb > 20) {
221
+ multiplier = 0.1;
222
+ }
223
+
224
+ const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
213
225
  this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
214
226
  }
215
227
 
@@ -348,23 +360,24 @@ var VADStateMachine = class {
348
360
  enabled: config?.enabled ?? true,
349
361
  pluginName: config?.pluginName ?? "energy-vad",
350
362
  // Voice-optimized defaults
351
- startThreshold: config?.startThreshold ?? 0.6,
363
+ startThreshold: config?.startThreshold ?? 0.8,
352
364
  // Higher threshold to avoid noise
353
- stopThreshold: config?.stopThreshold ?? 0.45,
365
+ stopThreshold: config?.stopThreshold ?? 0.3,
354
366
  // Balanced for voice
355
- hangoverMs: config?.hangoverMs ?? 400,
367
+ hangoverMs: config?.hangoverMs ?? 300,
356
368
  // Smooth for natural speech
357
369
  preRollMs: config?.preRollMs ?? 250,
358
370
  // Generous pre-roll
359
- minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
371
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
372
+ // Increased to filter keyboard clicks
360
373
  minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
361
374
  energyVad: {
362
375
  smoothing: config?.energyVad?.smoothing ?? 0.95,
363
376
  initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
364
377
  noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
365
- noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 5e-3,
366
- minSNR: config?.energyVad?.minSNR ?? 6,
367
- snrRange: config?.energyVad?.snrRange ?? 12,
378
+ noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
379
+ minSNR: config?.energyVad?.minSNR ?? 10,
380
+ snrRange: config?.energyVad?.snrRange ?? 10,
368
381
  minEnergy: config?.energyVad?.minEnergy ?? 5e-4
369
382
  }
370
383
  };
package/dist/index.mjs CHANGED
@@ -1,13 +1,13 @@
1
1
  import "./chunk-WBQAMGXK.mjs";
2
2
  import {
3
3
  attachProcessingToTrack
4
- } from "./chunk-UQG6Z5W3.mjs";
4
+ } from "./chunk-ZISGHJDU.mjs";
5
5
  import {
6
6
  createAudioPipeline
7
- } from "./chunk-JP6DA62Y.mjs";
7
+ } from "./chunk-E7NH2QKZ.mjs";
8
8
  import {
9
9
  VADStateMachine
10
- } from "./chunk-2EX3FXSF.mjs";
10
+ } from "./chunk-KGCEV2VT.mjs";
11
11
  import {
12
12
  closeAudioContext,
13
13
  getAudioContext,
@@ -21,13 +21,13 @@ import {
21
21
  getVADPlugin,
22
22
  registerNoiseSuppressionPlugin,
23
23
  registerVADPlugin
24
- } from "./chunk-BMVZ3KKG.mjs";
24
+ } from "./chunk-FOGC2MFA.mjs";
25
25
  import {
26
26
  RNNoisePlugin
27
27
  } from "./chunk-XO6B3D4A.mjs";
28
28
  import {
29
29
  EnergyVADPlugin
30
- } from "./chunk-2TKYGFMC.mjs";
30
+ } from "./chunk-3A2CTC4K.mjs";
31
31
  export {
32
32
  EnergyVADPlugin,
33
33
  RNNoisePlugin,
@@ -128,9 +128,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
128
128
  const smoothing = energyParams.smoothing ?? 0.95;
129
129
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
130
130
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
131
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
132
- const minSNR = energyParams.minSNR ?? 6;
133
- const snrRange = energyParams.snrRange ?? 12;
131
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
132
+ const minSNR = energyParams.minSNR ?? 10;
133
+ const snrRange = energyParams.snrRange ?? 10;
134
134
  const minEnergy = energyParams.minEnergy ?? 5e-4;
135
135
  return `
136
136
  class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -176,9 +176,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
176
176
  this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
177
177
  } else {
178
178
  // If signal is louder, adapt upwards
179
- // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
180
- // If we are silent, adapt at the normal loud rate
181
- const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
179
+ // We use a multi-stage adaptation rate:
180
+ // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
181
+ // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
182
+ // 3. Otherwise, adapt at the normal loud rate
183
+ const snr = instantRms / (this.noiseFloor + 1e-6);
184
+ const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
185
+
186
+ let multiplier = 1.0;
187
+ if (this.isSpeaking) {
188
+ multiplier = 0.01;
189
+ } else if (snrDb > 20) {
190
+ multiplier = 0.1;
191
+ }
192
+
193
+ const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
182
194
  this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
183
195
  }
184
196
 
@@ -311,23 +323,24 @@ var VADStateMachine = class {
311
323
  enabled: config?.enabled ?? true,
312
324
  pluginName: config?.pluginName ?? "energy-vad",
313
325
  // Voice-optimized defaults
314
- startThreshold: config?.startThreshold ?? 0.6,
326
+ startThreshold: config?.startThreshold ?? 0.8,
315
327
  // Higher threshold to avoid noise
316
- stopThreshold: config?.stopThreshold ?? 0.45,
328
+ stopThreshold: config?.stopThreshold ?? 0.3,
317
329
  // Balanced for voice
318
- hangoverMs: config?.hangoverMs ?? 400,
330
+ hangoverMs: config?.hangoverMs ?? 300,
319
331
  // Smooth for natural speech
320
332
  preRollMs: config?.preRollMs ?? 250,
321
333
  // Generous pre-roll
322
- minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
334
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
335
+ // Increased to filter keyboard clicks
323
336
  minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
324
337
  energyVad: {
325
338
  smoothing: config?.energyVad?.smoothing ?? 0.95,
326
339
  initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
327
340
  noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
328
- noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 5e-3,
329
- minSNR: config?.energyVad?.minSNR ?? 6,
330
- snrRange: config?.energyVad?.snrRange ?? 12,
341
+ noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
342
+ minSNR: config?.energyVad?.minSNR ?? 10,
343
+ snrRange: config?.energyVad?.snrRange ?? 10,
331
344
  minEnergy: config?.energyVad?.minEnergy ?? 5e-4
332
345
  }
333
346
  };
@@ -1,12 +1,12 @@
1
1
  import {
2
2
  attachProcessingToTrack
3
- } from "../chunk-UQG6Z5W3.mjs";
4
- import "../chunk-JP6DA62Y.mjs";
5
- import "../chunk-2EX3FXSF.mjs";
3
+ } from "../chunk-ZISGHJDU.mjs";
4
+ import "../chunk-E7NH2QKZ.mjs";
5
+ import "../chunk-KGCEV2VT.mjs";
6
6
  import "../chunk-OZ7KMC4S.mjs";
7
- import "../chunk-BMVZ3KKG.mjs";
7
+ import "../chunk-FOGC2MFA.mjs";
8
8
  import "../chunk-XO6B3D4A.mjs";
9
- import "../chunk-2TKYGFMC.mjs";
9
+ import "../chunk-3A2CTC4K.mjs";
10
10
  export {
11
11
  attachProcessingToTrack
12
12
  };
@@ -126,9 +126,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
126
126
  const smoothing = energyParams.smoothing ?? 0.95;
127
127
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
128
128
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
129
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
130
- const minSNR = energyParams.minSNR ?? 6;
131
- const snrRange = energyParams.snrRange ?? 12;
129
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
130
+ const minSNR = energyParams.minSNR ?? 10;
131
+ const snrRange = energyParams.snrRange ?? 10;
132
132
  const minEnergy = energyParams.minEnergy ?? 5e-4;
133
133
  return `
134
134
  class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -174,9 +174,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
174
174
  this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
175
175
  } else {
176
176
  // If signal is louder, adapt upwards
177
- // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
178
- // If we are silent, adapt at the normal loud rate
179
- const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
177
+ // We use a multi-stage adaptation rate:
178
+ // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
179
+ // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
180
+ // 3. Otherwise, adapt at the normal loud rate
181
+ const snr = instantRms / (this.noiseFloor + 1e-6);
182
+ const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
183
+
184
+ let multiplier = 1.0;
185
+ if (this.isSpeaking) {
186
+ multiplier = 0.01;
187
+ } else if (snrDb > 20) {
188
+ multiplier = 0.1;
189
+ }
190
+
191
+ const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
180
192
  this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
181
193
  }
182
194
 
@@ -309,23 +321,24 @@ var VADStateMachine = class {
309
321
  enabled: config?.enabled ?? true,
310
322
  pluginName: config?.pluginName ?? "energy-vad",
311
323
  // Voice-optimized defaults
312
- startThreshold: config?.startThreshold ?? 0.6,
324
+ startThreshold: config?.startThreshold ?? 0.8,
313
325
  // Higher threshold to avoid noise
314
- stopThreshold: config?.stopThreshold ?? 0.45,
326
+ stopThreshold: config?.stopThreshold ?? 0.3,
315
327
  // Balanced for voice
316
- hangoverMs: config?.hangoverMs ?? 400,
328
+ hangoverMs: config?.hangoverMs ?? 300,
317
329
  // Smooth for natural speech
318
330
  preRollMs: config?.preRollMs ?? 250,
319
331
  // Generous pre-roll
320
- minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
332
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
333
+ // Increased to filter keyboard clicks
321
334
  minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
322
335
  energyVad: {
323
336
  smoothing: config?.energyVad?.smoothing ?? 0.95,
324
337
  initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
325
338
  noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
326
- noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 5e-3,
327
- minSNR: config?.energyVad?.minSNR ?? 6,
328
- snrRange: config?.energyVad?.snrRange ?? 12,
339
+ noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
340
+ minSNR: config?.energyVad?.minSNR ?? 10,
341
+ snrRange: config?.energyVad?.snrRange ?? 10,
329
342
  minEnergy: config?.energyVad?.minEnergy ?? 5e-4
330
343
  }
331
344
  };
@@ -1,11 +1,11 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "../chunk-JP6DA62Y.mjs";
4
- import "../chunk-2EX3FXSF.mjs";
3
+ } from "../chunk-E7NH2QKZ.mjs";
4
+ import "../chunk-KGCEV2VT.mjs";
5
5
  import "../chunk-OZ7KMC4S.mjs";
6
- import "../chunk-BMVZ3KKG.mjs";
6
+ import "../chunk-FOGC2MFA.mjs";
7
7
  import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-2TKYGFMC.mjs";
8
+ import "../chunk-3A2CTC4K.mjs";
9
9
  export {
10
10
  createAudioPipeline
11
11
  };
package/dist/types.d.mts CHANGED
@@ -43,7 +43,7 @@ interface AudioProcessingConfig {
43
43
  * When VAD probability rises above this, audio is unmuted.
44
44
  * Lower = more sensitive (catches quiet speech, may include noise)
45
45
  * Higher = less sensitive (only confident speech, may clip quiet parts)
46
- * Default: 0.6 (optimized for voice-only)
46
+ * Default: 0.8 (aggressive noise rejection)
47
47
  */
48
48
  startThreshold?: number;
49
49
  /**
@@ -51,7 +51,7 @@ interface AudioProcessingConfig {
51
51
  * When VAD probability drops below this (after hangover), audio is muted.
52
52
  * Lower = keeps audio on longer (less aggressive gating)
53
53
  * Higher = mutes faster (more aggressive noise suppression)
54
- * Default: 0.45 (balanced voice detection)
54
+ * Default: 0.3 (wide hysteresis for stability)
55
55
  */
56
56
  stopThreshold?: number;
57
57
  /**
@@ -59,7 +59,7 @@ interface AudioProcessingConfig {
59
59
  * Prevents rapid on/off toggling during pauses.
60
60
  * Lower = more aggressive gating, may clip between words
61
61
  * Higher = smoother but may let trailing noise through
62
- * Default: 400ms (optimized for natural speech)
62
+ * Default: 300ms
63
63
  */
64
64
  hangoverMs?: number;
65
65
  /**
@@ -70,8 +70,8 @@ interface AudioProcessingConfig {
70
70
  preRollMs?: number;
71
71
  /**
72
72
  * Minimum speech duration in ms to consider it valid speech.
73
- * Filters out very brief noise spikes.
74
- * Default: 100ms
73
+ * Filters out very brief noise spikes like keyboard clicks.
74
+ * Default: 150ms
75
75
  */
76
76
  minSpeechDurationMs?: number;
77
77
  /**
@@ -102,17 +102,17 @@ interface AudioProcessingConfig {
102
102
  noiseFloorAdaptRateQuiet?: number;
103
103
  /**
104
104
  * Rate at which noise floor adapts to loud signals (0-1).
105
- * Default: 0.005 (slower adaptation for speech)
105
+ * Default: 0.01 (faster tracking of rising noise)
106
106
  */
107
107
  noiseFloorAdaptRateLoud?: number;
108
108
  /**
109
109
  * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
110
- * Default: 6.0 (voice is ~2x louder than noise floor)
110
+ * Default: 10.0 (more aggressive noise rejection)
111
111
  */
112
112
  minSNR?: number;
113
113
  /**
114
114
  * SNR range in dB for probability scaling.
115
- * Default: 12.0 (probability scales from minSNR to minSNR+snrRange)
115
+ * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
116
116
  */
117
117
  snrRange?: number;
118
118
  /**
package/dist/types.d.ts CHANGED
@@ -43,7 +43,7 @@ interface AudioProcessingConfig {
43
43
  * When VAD probability rises above this, audio is unmuted.
44
44
  * Lower = more sensitive (catches quiet speech, may include noise)
45
45
  * Higher = less sensitive (only confident speech, may clip quiet parts)
46
- * Default: 0.6 (optimized for voice-only)
46
+ * Default: 0.8 (aggressive noise rejection)
47
47
  */
48
48
  startThreshold?: number;
49
49
  /**
@@ -51,7 +51,7 @@ interface AudioProcessingConfig {
51
51
  * When VAD probability drops below this (after hangover), audio is muted.
52
52
  * Lower = keeps audio on longer (less aggressive gating)
53
53
  * Higher = mutes faster (more aggressive noise suppression)
54
- * Default: 0.45 (balanced voice detection)
54
+ * Default: 0.3 (wide hysteresis for stability)
55
55
  */
56
56
  stopThreshold?: number;
57
57
  /**
@@ -59,7 +59,7 @@ interface AudioProcessingConfig {
59
59
  * Prevents rapid on/off toggling during pauses.
60
60
  * Lower = more aggressive gating, may clip between words
61
61
  * Higher = smoother but may let trailing noise through
62
- * Default: 400ms (optimized for natural speech)
62
+ * Default: 300ms
63
63
  */
64
64
  hangoverMs?: number;
65
65
  /**
@@ -70,8 +70,8 @@ interface AudioProcessingConfig {
70
70
  preRollMs?: number;
71
71
  /**
72
72
  * Minimum speech duration in ms to consider it valid speech.
73
- * Filters out very brief noise spikes.
74
- * Default: 100ms
73
+ * Filters out very brief noise spikes like keyboard clicks.
74
+ * Default: 150ms
75
75
  */
76
76
  minSpeechDurationMs?: number;
77
77
  /**
@@ -102,17 +102,17 @@ interface AudioProcessingConfig {
102
102
  noiseFloorAdaptRateQuiet?: number;
103
103
  /**
104
104
  * Rate at which noise floor adapts to loud signals (0-1).
105
- * Default: 0.005 (slower adaptation for speech)
105
+ * Default: 0.01 (faster tracking of rising noise)
106
106
  */
107
107
  noiseFloorAdaptRateLoud?: number;
108
108
  /**
109
109
  * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
110
- * Default: 6.0 (voice is ~2x louder than noise floor)
110
+ * Default: 10.0 (more aggressive noise rejection)
111
111
  */
112
112
  minSNR?: number;
113
113
  /**
114
114
  * SNR range in dB for probability scaling.
115
- * Default: 12.0 (probability scales from minSNR to minSNR+snrRange)
115
+ * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
116
116
  */
117
117
  snrRange?: number;
118
118
  /**
@@ -28,9 +28,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
28
28
  const smoothing = energyParams.smoothing ?? 0.95;
29
29
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
30
30
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
31
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 5e-3;
32
- const minSNR = energyParams.minSNR ?? 6;
33
- const snrRange = energyParams.snrRange ?? 12;
31
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
32
+ const minSNR = energyParams.minSNR ?? 10;
33
+ const snrRange = energyParams.snrRange ?? 10;
34
34
  const minEnergy = energyParams.minEnergy ?? 5e-4;
35
35
  return `
36
36
  class EnergyVadProcessor extends AudioWorkletProcessor {
@@ -76,9 +76,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
76
76
  this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
77
77
  } else {
78
78
  // If signal is louder, adapt upwards
79
- // If we are currently speaking, adapt EXTREMELY slowly to avoid "chasing" speech
80
- // If we are silent, adapt at the normal loud rate
81
- const adaptRate = this.isSpeaking ? (this.noiseFloorAdaptRateLoud * 0.02) : this.noiseFloorAdaptRateLoud;
79
+ // We use a multi-stage adaptation rate:
80
+ // 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
81
+ // 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
82
+ // 3. Otherwise, adapt at the normal loud rate
83
+ const snr = instantRms / (this.noiseFloor + 1e-6);
84
+ const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
85
+
86
+ let multiplier = 1.0;
87
+ if (this.isSpeaking) {
88
+ multiplier = 0.01;
89
+ } else if (snrDb > 20) {
90
+ multiplier = 0.1;
91
+ }
92
+
93
+ const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
82
94
  this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
83
95
  }
84
96
 
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  EnergyVADPlugin
3
- } from "../chunk-2TKYGFMC.mjs";
3
+ } from "../chunk-3A2CTC4K.mjs";
4
4
  export {
5
5
  EnergyVADPlugin
6
6
  };
@@ -36,23 +36,24 @@ var VADStateMachine = class {
36
36
  enabled: config?.enabled ?? true,
37
37
  pluginName: config?.pluginName ?? "energy-vad",
38
38
  // Voice-optimized defaults
39
- startThreshold: config?.startThreshold ?? 0.6,
39
+ startThreshold: config?.startThreshold ?? 0.8,
40
40
  // Higher threshold to avoid noise
41
- stopThreshold: config?.stopThreshold ?? 0.45,
41
+ stopThreshold: config?.stopThreshold ?? 0.3,
42
42
  // Balanced for voice
43
- hangoverMs: config?.hangoverMs ?? 400,
43
+ hangoverMs: config?.hangoverMs ?? 300,
44
44
  // Smooth for natural speech
45
45
  preRollMs: config?.preRollMs ?? 250,
46
46
  // Generous pre-roll
47
- minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
47
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 150,
48
+ // Increased to filter keyboard clicks
48
49
  minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
49
50
  energyVad: {
50
51
  smoothing: config?.energyVad?.smoothing ?? 0.95,
51
52
  initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
52
53
  noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
53
- noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 5e-3,
54
- minSNR: config?.energyVad?.minSNR ?? 6,
55
- snrRange: config?.energyVad?.snrRange ?? 12,
54
+ noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
55
+ minSNR: config?.energyVad?.minSNR ?? 10,
56
+ snrRange: config?.energyVad?.snrRange ?? 10,
56
57
  minEnergy: config?.energyVad?.minEnergy ?? 5e-4
57
58
  }
58
59
  };
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  VADStateMachine
3
- } from "../chunk-2EX3FXSF.mjs";
3
+ } from "../chunk-KGCEV2VT.mjs";
4
4
  export {
5
5
  VADStateMachine
6
6
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tensamin/audio",
3
- "version": "0.1.7",
3
+ "version": "0.1.9",
4
4
  "main": "dist/index.js",
5
5
  "module": "dist/index.mjs",
6
6
  "types": "dist/index.d.ts",