@tensamin/audio 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -101,11 +101,11 @@ vad: {
101
101
  energyVad?: {
102
102
  smoothing: number; // Default: 0.95
103
103
  initialNoiseFloor: number; // Default: 0.001
104
- noiseFloorAdaptRateQuiet: number; // Default: 0.005
105
- noiseFloorAdaptRateLoud: number; // Default: 0.01
104
+ noiseFloorAdaptRateQuiet: number; // Default: 0.01
105
+ noiseFloorAdaptRateLoud: number; // Default: 0.05
106
106
  minSNR: number; // Default: 10.0 (dB)
107
107
  snrRange: number; // Default: 10.0 (dB)
108
- minEnergy: number; // Default: 0.0005
108
+ minEnergy: number; // Default: 0.001
109
109
  };
110
110
  }
111
111
  ```
@@ -116,7 +116,7 @@ vad: {
116
116
  - `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
117
117
  - `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
118
118
  - `preRollMs`: Audio buffer duration before speech onset
119
- - `minSpeechDurationMs`: Minimum duration to consider as valid speech
119
+ - `minSpeechDurationMs`: Minimum duration to consider as valid speech (Default: 150ms)
120
120
  - `minSilenceDurationMs`: Minimum silence duration between speech segments
121
121
 
122
122
  **Energy VAD Parameters:**
@@ -124,7 +124,7 @@ vad: {
124
124
  - `smoothing`: Energy calculation smoothing factor (0-1)
125
125
  - `minSNR`: Minimum signal-to-noise ratio in dB for speech detection
126
126
  - `snrRange`: Range in dB for probability scaling from minSNR
127
- - `minEnergy`: Minimum absolute RMS energy to consider as speech
127
+ - `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.001, ~-60dB)
128
128
 
129
129
  ### Output Control
130
130
 
@@ -30,7 +30,7 @@ var VADStateMachine = class {
30
30
  noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
31
31
  minSNR: config?.energyVad?.minSNR ?? 10,
32
32
  snrRange: config?.energyVad?.snrRange ?? 10,
33
- minEnergy: config?.energyVad?.minEnergy ?? 5e-4
33
+ minEnergy: config?.energyVad?.minEnergy ?? 1e-3
34
34
  }
35
35
  };
36
36
  this.lastSilenceTime = Date.now();
@@ -61,25 +61,33 @@ var VADStateMachine = class {
61
61
  newState = "silent";
62
62
  this.lastSilenceTime = timestamp;
63
63
  }
64
- } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
64
+ } else if (this.currentState === "speech_starting") {
65
+ if (probability >= stopThreshold) {
66
+ const speechDuration = timestamp - this.speechStartTime;
67
+ if (speechDuration >= minSpeechDurationMs) {
68
+ newState = "speaking";
69
+ } else {
70
+ newState = "speech_starting";
71
+ }
72
+ this.lastSpeechTime = timestamp;
73
+ } else {
74
+ newState = "silent";
75
+ this.lastSilenceTime = timestamp;
76
+ }
77
+ } else if (this.currentState === "speaking") {
65
78
  if (probability >= stopThreshold) {
66
79
  newState = "speaking";
67
80
  this.lastSpeechTime = timestamp;
68
81
  } else {
69
82
  const timeSinceSpeech = timestamp - this.lastSpeechTime;
70
- const speechDuration = timestamp - this.speechStartTime;
71
83
  if (timeSinceSpeech < hangoverMs) {
72
84
  newState = "speaking";
73
- } else if (speechDuration < minSpeechDurationMs) {
74
- newState = "silent";
75
- this.lastSilenceTime = timestamp;
76
85
  } else {
77
86
  newState = "speech_ending";
78
87
  this.lastSilenceTime = timestamp;
79
88
  }
80
89
  }
81
90
  }
82
- if (newState === "speech_starting") newState = "speaking";
83
91
  if (newState === "speech_ending") newState = "silent";
84
92
  this.currentState = newState;
85
93
  return {
@@ -3,11 +3,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
3
3
  const energyParams = vadConfig?.energyVad || {};
4
4
  const smoothing = energyParams.smoothing ?? 0.95;
5
5
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
6
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
7
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
6
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
7
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
8
8
  const minSNR = energyParams.minSNR ?? 10;
9
9
  const snrRange = energyParams.snrRange ?? 10;
10
- const minEnergy = energyParams.minEnergy ?? 5e-4;
10
+ const minEnergy = energyParams.minEnergy ?? 1e-3;
11
11
  return `
12
12
  class EnergyVadProcessor extends AudioWorkletProcessor {
13
13
  constructor() {
@@ -61,9 +61,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
61
61
 
62
62
  let multiplier = 1.0;
63
63
  if (this.isSpeaking) {
64
- multiplier = 0.01;
64
+ multiplier = 0.05;
65
65
  } else if (snrDb > 20) {
66
- multiplier = 0.1;
66
+ multiplier = 0.2;
67
67
  }
68
68
 
69
69
  const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
@@ -71,8 +71,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
71
71
  }
72
72
 
73
73
  // Ensure noise floor doesn't drop to absolute zero
74
- // 0.0002 is approx -74dB, a reasonable floor for ambient noise
75
- this.noiseFloor = Math.max(this.noiseFloor, 0.0002);
74
+ // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
75
+ this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
76
76
 
77
77
  // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
78
78
  const snr = this.energy / (this.noiseFloor + 1e-6);
@@ -84,8 +84,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
84
84
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
85
85
 
86
86
  // Apply absolute energy threshold
87
+ // We use a soft threshold to avoid abrupt cutting
87
88
  if (this.energy < this.minEnergy) {
88
- probability = 0;
89
+ const energyRatio = this.energy / (this.minEnergy + 1e-6);
90
+ probability *= Math.pow(energyRatio, 2); // Quadratic falloff
89
91
  }
90
92
 
91
93
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "./chunk-CD5XFC5M.mjs";
3
+ } from "./chunk-SMZJFNRU.mjs";
4
4
 
5
5
  // src/livekit/integration.ts
6
6
  async function attachProcessingToTrack(track, config = {}) {
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  VADStateMachine
3
- } from "./chunk-VCQMZVO3.mjs";
3
+ } from "./chunk-DYY2MXMU.mjs";
4
4
  import {
5
5
  getAudioContext,
6
6
  registerPipeline,
@@ -9,7 +9,7 @@ import {
9
9
  import {
10
10
  getNoiseSuppressionPlugin,
11
11
  getVADPlugin
12
- } from "./chunk-Z3QBDLTM.mjs";
12
+ } from "./chunk-XZSFQJW4.mjs";
13
13
 
14
14
  // src/pipeline/audio-pipeline.ts
15
15
  import mitt from "mitt";
@@ -3,7 +3,7 @@ import {
3
3
  } from "./chunk-XO6B3D4A.mjs";
4
4
  import {
5
5
  EnergyVADPlugin
6
- } from "./chunk-SMNOCQYR.mjs";
6
+ } from "./chunk-KEWK2OKV.mjs";
7
7
 
8
8
  // src/extensibility/plugins.ts
9
9
  var nsPlugins = /* @__PURE__ */ new Map();
@@ -106,11 +106,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
106
106
  const energyParams = vadConfig?.energyVad || {};
107
107
  const smoothing = energyParams.smoothing ?? 0.95;
108
108
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
109
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
110
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
109
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
110
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
111
111
  const minSNR = energyParams.minSNR ?? 10;
112
112
  const snrRange = energyParams.snrRange ?? 10;
113
- const minEnergy = energyParams.minEnergy ?? 5e-4;
113
+ const minEnergy = energyParams.minEnergy ?? 1e-3;
114
114
  return `
115
115
  class EnergyVadProcessor extends AudioWorkletProcessor {
116
116
  constructor() {
@@ -164,9 +164,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
164
164
 
165
165
  let multiplier = 1.0;
166
166
  if (this.isSpeaking) {
167
- multiplier = 0.01;
167
+ multiplier = 0.05;
168
168
  } else if (snrDb > 20) {
169
- multiplier = 0.1;
169
+ multiplier = 0.2;
170
170
  }
171
171
 
172
172
  const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
@@ -174,8 +174,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
174
174
  }
175
175
 
176
176
  // Ensure noise floor doesn't drop to absolute zero
177
- // 0.0002 is approx -74dB, a reasonable floor for ambient noise
178
- this.noiseFloor = Math.max(this.noiseFloor, 0.0002);
177
+ // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
178
+ this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
179
179
 
180
180
  // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
181
181
  const snr = this.energy / (this.noiseFloor + 1e-6);
@@ -187,8 +187,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
187
187
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
188
188
 
189
189
  // Apply absolute energy threshold
190
+ // We use a soft threshold to avoid abrupt cutting
190
191
  if (this.energy < this.minEnergy) {
191
- probability = 0;
192
+ const energyRatio = this.energy / (this.minEnergy + 1e-6);
193
+ probability *= Math.pow(energyRatio, 2); // Quadratic falloff
192
194
  }
193
195
 
194
196
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -3,9 +3,9 @@ import {
3
3
  getVADPlugin,
4
4
  registerNoiseSuppressionPlugin,
5
5
  registerVADPlugin
6
- } from "../chunk-Z3QBDLTM.mjs";
6
+ } from "../chunk-XZSFQJW4.mjs";
7
7
  import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-SMNOCQYR.mjs";
8
+ import "../chunk-KEWK2OKV.mjs";
9
9
  export {
10
10
  getNoiseSuppressionPlugin,
11
11
  getVADPlugin,
package/dist/index.js CHANGED
@@ -158,11 +158,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
158
158
  const energyParams = vadConfig?.energyVad || {};
159
159
  const smoothing = energyParams.smoothing ?? 0.95;
160
160
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
161
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
162
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
161
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
162
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
163
163
  const minSNR = energyParams.minSNR ?? 10;
164
164
  const snrRange = energyParams.snrRange ?? 10;
165
- const minEnergy = energyParams.minEnergy ?? 5e-4;
165
+ const minEnergy = energyParams.minEnergy ?? 1e-3;
166
166
  return `
167
167
  class EnergyVadProcessor extends AudioWorkletProcessor {
168
168
  constructor() {
@@ -216,9 +216,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
216
216
 
217
217
  let multiplier = 1.0;
218
218
  if (this.isSpeaking) {
219
- multiplier = 0.01;
219
+ multiplier = 0.05;
220
220
  } else if (snrDb > 20) {
221
- multiplier = 0.1;
221
+ multiplier = 0.2;
222
222
  }
223
223
 
224
224
  const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
@@ -226,8 +226,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
226
226
  }
227
227
 
228
228
  // Ensure noise floor doesn't drop to absolute zero
229
- // 0.0002 is approx -74dB, a reasonable floor for ambient noise
230
- this.noiseFloor = Math.max(this.noiseFloor, 0.0002);
229
+ // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
230
+ this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
231
231
 
232
232
  // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
233
233
  const snr = this.energy / (this.noiseFloor + 1e-6);
@@ -239,8 +239,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
239
239
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
240
240
 
241
241
  // Apply absolute energy threshold
242
+ // We use a soft threshold to avoid abrupt cutting
242
243
  if (this.energy < this.minEnergy) {
243
- probability = 0;
244
+ const energyRatio = this.energy / (this.minEnergy + 1e-6);
245
+ probability *= Math.pow(energyRatio, 2); // Quadratic falloff
244
246
  }
245
247
 
246
248
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -379,7 +381,7 @@ var VADStateMachine = class {
379
381
  noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
380
382
  minSNR: config?.energyVad?.minSNR ?? 10,
381
383
  snrRange: config?.energyVad?.snrRange ?? 10,
382
- minEnergy: config?.energyVad?.minEnergy ?? 5e-4
384
+ minEnergy: config?.energyVad?.minEnergy ?? 1e-3
383
385
  }
384
386
  };
385
387
  this.lastSilenceTime = Date.now();
@@ -410,25 +412,33 @@ var VADStateMachine = class {
410
412
  newState = "silent";
411
413
  this.lastSilenceTime = timestamp;
412
414
  }
413
- } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
415
+ } else if (this.currentState === "speech_starting") {
416
+ if (probability >= stopThreshold) {
417
+ const speechDuration = timestamp - this.speechStartTime;
418
+ if (speechDuration >= minSpeechDurationMs) {
419
+ newState = "speaking";
420
+ } else {
421
+ newState = "speech_starting";
422
+ }
423
+ this.lastSpeechTime = timestamp;
424
+ } else {
425
+ newState = "silent";
426
+ this.lastSilenceTime = timestamp;
427
+ }
428
+ } else if (this.currentState === "speaking") {
414
429
  if (probability >= stopThreshold) {
415
430
  newState = "speaking";
416
431
  this.lastSpeechTime = timestamp;
417
432
  } else {
418
433
  const timeSinceSpeech = timestamp - this.lastSpeechTime;
419
- const speechDuration = timestamp - this.speechStartTime;
420
434
  if (timeSinceSpeech < hangoverMs) {
421
435
  newState = "speaking";
422
- } else if (speechDuration < minSpeechDurationMs) {
423
- newState = "silent";
424
- this.lastSilenceTime = timestamp;
425
436
  } else {
426
437
  newState = "speech_ending";
427
438
  this.lastSilenceTime = timestamp;
428
439
  }
429
440
  }
430
441
  }
431
- if (newState === "speech_starting") newState = "speaking";
432
442
  if (newState === "speech_ending") newState = "silent";
433
443
  this.currentState = newState;
434
444
  return {
package/dist/index.mjs CHANGED
@@ -1,13 +1,13 @@
1
1
  import "./chunk-WBQAMGXK.mjs";
2
2
  import {
3
3
  attachProcessingToTrack
4
- } from "./chunk-IL4F7WVW.mjs";
4
+ } from "./chunk-Q2I22TJG.mjs";
5
5
  import {
6
6
  createAudioPipeline
7
- } from "./chunk-CD5XFC5M.mjs";
7
+ } from "./chunk-SMZJFNRU.mjs";
8
8
  import {
9
9
  VADStateMachine
10
- } from "./chunk-VCQMZVO3.mjs";
10
+ } from "./chunk-DYY2MXMU.mjs";
11
11
  import {
12
12
  closeAudioContext,
13
13
  getAudioContext,
@@ -21,13 +21,13 @@ import {
21
21
  getVADPlugin,
22
22
  registerNoiseSuppressionPlugin,
23
23
  registerVADPlugin
24
- } from "./chunk-Z3QBDLTM.mjs";
24
+ } from "./chunk-XZSFQJW4.mjs";
25
25
  import {
26
26
  RNNoisePlugin
27
27
  } from "./chunk-XO6B3D4A.mjs";
28
28
  import {
29
29
  EnergyVADPlugin
30
- } from "./chunk-SMNOCQYR.mjs";
30
+ } from "./chunk-KEWK2OKV.mjs";
31
31
  export {
32
32
  EnergyVADPlugin,
33
33
  RNNoisePlugin,
@@ -127,11 +127,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
127
127
  const energyParams = vadConfig?.energyVad || {};
128
128
  const smoothing = energyParams.smoothing ?? 0.95;
129
129
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
130
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
131
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
130
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
131
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
132
132
  const minSNR = energyParams.minSNR ?? 10;
133
133
  const snrRange = energyParams.snrRange ?? 10;
134
- const minEnergy = energyParams.minEnergy ?? 5e-4;
134
+ const minEnergy = energyParams.minEnergy ?? 1e-3;
135
135
  return `
136
136
  class EnergyVadProcessor extends AudioWorkletProcessor {
137
137
  constructor() {
@@ -185,9 +185,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
185
185
 
186
186
  let multiplier = 1.0;
187
187
  if (this.isSpeaking) {
188
- multiplier = 0.01;
188
+ multiplier = 0.05;
189
189
  } else if (snrDb > 20) {
190
- multiplier = 0.1;
190
+ multiplier = 0.2;
191
191
  }
192
192
 
193
193
  const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
@@ -195,8 +195,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
195
195
  }
196
196
 
197
197
  // Ensure noise floor doesn't drop to absolute zero
198
- // 0.0002 is approx -74dB, a reasonable floor for ambient noise
199
- this.noiseFloor = Math.max(this.noiseFloor, 0.0002);
198
+ // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
199
+ this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
200
200
 
201
201
  // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
202
202
  const snr = this.energy / (this.noiseFloor + 1e-6);
@@ -208,8 +208,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
208
208
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
209
209
 
210
210
  // Apply absolute energy threshold
211
+ // We use a soft threshold to avoid abrupt cutting
211
212
  if (this.energy < this.minEnergy) {
212
- probability = 0;
213
+ const energyRatio = this.energy / (this.minEnergy + 1e-6);
214
+ probability *= Math.pow(energyRatio, 2); // Quadratic falloff
213
215
  }
214
216
 
215
217
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -342,7 +344,7 @@ var VADStateMachine = class {
342
344
  noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
343
345
  minSNR: config?.energyVad?.minSNR ?? 10,
344
346
  snrRange: config?.energyVad?.snrRange ?? 10,
345
- minEnergy: config?.energyVad?.minEnergy ?? 5e-4
347
+ minEnergy: config?.energyVad?.minEnergy ?? 1e-3
346
348
  }
347
349
  };
348
350
  this.lastSilenceTime = Date.now();
@@ -373,25 +375,33 @@ var VADStateMachine = class {
373
375
  newState = "silent";
374
376
  this.lastSilenceTime = timestamp;
375
377
  }
376
- } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
378
+ } else if (this.currentState === "speech_starting") {
379
+ if (probability >= stopThreshold) {
380
+ const speechDuration = timestamp - this.speechStartTime;
381
+ if (speechDuration >= minSpeechDurationMs) {
382
+ newState = "speaking";
383
+ } else {
384
+ newState = "speech_starting";
385
+ }
386
+ this.lastSpeechTime = timestamp;
387
+ } else {
388
+ newState = "silent";
389
+ this.lastSilenceTime = timestamp;
390
+ }
391
+ } else if (this.currentState === "speaking") {
377
392
  if (probability >= stopThreshold) {
378
393
  newState = "speaking";
379
394
  this.lastSpeechTime = timestamp;
380
395
  } else {
381
396
  const timeSinceSpeech = timestamp - this.lastSpeechTime;
382
- const speechDuration = timestamp - this.speechStartTime;
383
397
  if (timeSinceSpeech < hangoverMs) {
384
398
  newState = "speaking";
385
- } else if (speechDuration < minSpeechDurationMs) {
386
- newState = "silent";
387
- this.lastSilenceTime = timestamp;
388
399
  } else {
389
400
  newState = "speech_ending";
390
401
  this.lastSilenceTime = timestamp;
391
402
  }
392
403
  }
393
404
  }
394
- if (newState === "speech_starting") newState = "speaking";
395
405
  if (newState === "speech_ending") newState = "silent";
396
406
  this.currentState = newState;
397
407
  return {
@@ -1,12 +1,12 @@
1
1
  import {
2
2
  attachProcessingToTrack
3
- } from "../chunk-IL4F7WVW.mjs";
4
- import "../chunk-CD5XFC5M.mjs";
5
- import "../chunk-VCQMZVO3.mjs";
3
+ } from "../chunk-Q2I22TJG.mjs";
4
+ import "../chunk-SMZJFNRU.mjs";
5
+ import "../chunk-DYY2MXMU.mjs";
6
6
  import "../chunk-OZ7KMC4S.mjs";
7
- import "../chunk-Z3QBDLTM.mjs";
7
+ import "../chunk-XZSFQJW4.mjs";
8
8
  import "../chunk-XO6B3D4A.mjs";
9
- import "../chunk-SMNOCQYR.mjs";
9
+ import "../chunk-KEWK2OKV.mjs";
10
10
  export {
11
11
  attachProcessingToTrack
12
12
  };
@@ -125,11 +125,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
125
125
  const energyParams = vadConfig?.energyVad || {};
126
126
  const smoothing = energyParams.smoothing ?? 0.95;
127
127
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
128
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
129
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
128
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
129
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
130
130
  const minSNR = energyParams.minSNR ?? 10;
131
131
  const snrRange = energyParams.snrRange ?? 10;
132
- const minEnergy = energyParams.minEnergy ?? 5e-4;
132
+ const minEnergy = energyParams.minEnergy ?? 1e-3;
133
133
  return `
134
134
  class EnergyVadProcessor extends AudioWorkletProcessor {
135
135
  constructor() {
@@ -183,9 +183,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
183
183
 
184
184
  let multiplier = 1.0;
185
185
  if (this.isSpeaking) {
186
- multiplier = 0.01;
186
+ multiplier = 0.05;
187
187
  } else if (snrDb > 20) {
188
- multiplier = 0.1;
188
+ multiplier = 0.2;
189
189
  }
190
190
 
191
191
  const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
@@ -193,8 +193,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
193
193
  }
194
194
 
195
195
  // Ensure noise floor doesn't drop to absolute zero
196
- // 0.0002 is approx -74dB, a reasonable floor for ambient noise
197
- this.noiseFloor = Math.max(this.noiseFloor, 0.0002);
196
+ // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
197
+ this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
198
198
 
199
199
  // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
200
200
  const snr = this.energy / (this.noiseFloor + 1e-6);
@@ -206,8 +206,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
206
206
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
207
207
 
208
208
  // Apply absolute energy threshold
209
+ // We use a soft threshold to avoid abrupt cutting
209
210
  if (this.energy < this.minEnergy) {
210
- probability = 0;
211
+ const energyRatio = this.energy / (this.minEnergy + 1e-6);
212
+ probability *= Math.pow(energyRatio, 2); // Quadratic falloff
211
213
  }
212
214
 
213
215
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -340,7 +342,7 @@ var VADStateMachine = class {
340
342
  noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
341
343
  minSNR: config?.energyVad?.minSNR ?? 10,
342
344
  snrRange: config?.energyVad?.snrRange ?? 10,
343
- minEnergy: config?.energyVad?.minEnergy ?? 5e-4
345
+ minEnergy: config?.energyVad?.minEnergy ?? 1e-3
344
346
  }
345
347
  };
346
348
  this.lastSilenceTime = Date.now();
@@ -371,25 +373,33 @@ var VADStateMachine = class {
371
373
  newState = "silent";
372
374
  this.lastSilenceTime = timestamp;
373
375
  }
374
- } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
376
+ } else if (this.currentState === "speech_starting") {
377
+ if (probability >= stopThreshold) {
378
+ const speechDuration = timestamp - this.speechStartTime;
379
+ if (speechDuration >= minSpeechDurationMs) {
380
+ newState = "speaking";
381
+ } else {
382
+ newState = "speech_starting";
383
+ }
384
+ this.lastSpeechTime = timestamp;
385
+ } else {
386
+ newState = "silent";
387
+ this.lastSilenceTime = timestamp;
388
+ }
389
+ } else if (this.currentState === "speaking") {
375
390
  if (probability >= stopThreshold) {
376
391
  newState = "speaking";
377
392
  this.lastSpeechTime = timestamp;
378
393
  } else {
379
394
  const timeSinceSpeech = timestamp - this.lastSpeechTime;
380
- const speechDuration = timestamp - this.speechStartTime;
381
395
  if (timeSinceSpeech < hangoverMs) {
382
396
  newState = "speaking";
383
- } else if (speechDuration < minSpeechDurationMs) {
384
- newState = "silent";
385
- this.lastSilenceTime = timestamp;
386
397
  } else {
387
398
  newState = "speech_ending";
388
399
  this.lastSilenceTime = timestamp;
389
400
  }
390
401
  }
391
402
  }
392
- if (newState === "speech_starting") newState = "speaking";
393
403
  if (newState === "speech_ending") newState = "silent";
394
404
  this.currentState = newState;
395
405
  return {
@@ -1,11 +1,11 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "../chunk-CD5XFC5M.mjs";
4
- import "../chunk-VCQMZVO3.mjs";
3
+ } from "../chunk-SMZJFNRU.mjs";
4
+ import "../chunk-DYY2MXMU.mjs";
5
5
  import "../chunk-OZ7KMC4S.mjs";
6
- import "../chunk-Z3QBDLTM.mjs";
6
+ import "../chunk-XZSFQJW4.mjs";
7
7
  import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-SMNOCQYR.mjs";
8
+ import "../chunk-KEWK2OKV.mjs";
9
9
  export {
10
10
  createAudioPipeline
11
11
  };
package/dist/types.d.mts CHANGED
@@ -97,12 +97,12 @@ interface AudioProcessingConfig {
97
97
  initialNoiseFloor?: number;
98
98
  /**
99
99
  * Rate at which noise floor adapts to quiet signals (0-1).
100
- * Default: 0.005 (slower downward drift)
100
+ * Default: 0.01
101
101
  */
102
102
  noiseFloorAdaptRateQuiet?: number;
103
103
  /**
104
104
  * Rate at which noise floor adapts to loud signals (0-1).
105
- * Default: 0.01
105
+ * Default: 0.05 (faster tracking of rising noise)
106
106
  */
107
107
  noiseFloorAdaptRateLoud?: number;
108
108
  /**
@@ -118,7 +118,7 @@ interface AudioProcessingConfig {
118
118
  /**
119
119
  * Minimum absolute RMS energy to consider as speech.
120
120
  * Prevents triggering on very quiet background noise in silent rooms.
121
- * Default: 0.0005
121
+ * Default: 0.001 (approx -60dB)
122
122
  */
123
123
  minEnergy?: number;
124
124
  };
package/dist/types.d.ts CHANGED
@@ -97,12 +97,12 @@ interface AudioProcessingConfig {
97
97
  initialNoiseFloor?: number;
98
98
  /**
99
99
  * Rate at which noise floor adapts to quiet signals (0-1).
100
- * Default: 0.005 (slower downward drift)
100
+ * Default: 0.01
101
101
  */
102
102
  noiseFloorAdaptRateQuiet?: number;
103
103
  /**
104
104
  * Rate at which noise floor adapts to loud signals (0-1).
105
- * Default: 0.01
105
+ * Default: 0.05 (faster tracking of rising noise)
106
106
  */
107
107
  noiseFloorAdaptRateLoud?: number;
108
108
  /**
@@ -118,7 +118,7 @@ interface AudioProcessingConfig {
118
118
  /**
119
119
  * Minimum absolute RMS energy to consider as speech.
120
120
  * Prevents triggering on very quiet background noise in silent rooms.
121
- * Default: 0.0005
121
+ * Default: 0.001 (approx -60dB)
122
122
  */
123
123
  minEnergy?: number;
124
124
  };
@@ -27,11 +27,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
27
27
  const energyParams = vadConfig?.energyVad || {};
28
28
  const smoothing = energyParams.smoothing ?? 0.95;
29
29
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
30
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 5e-3;
31
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
30
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
31
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
32
32
  const minSNR = energyParams.minSNR ?? 10;
33
33
  const snrRange = energyParams.snrRange ?? 10;
34
- const minEnergy = energyParams.minEnergy ?? 5e-4;
34
+ const minEnergy = energyParams.minEnergy ?? 1e-3;
35
35
  return `
36
36
  class EnergyVadProcessor extends AudioWorkletProcessor {
37
37
  constructor() {
@@ -85,9 +85,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
85
85
 
86
86
  let multiplier = 1.0;
87
87
  if (this.isSpeaking) {
88
- multiplier = 0.01;
88
+ multiplier = 0.05;
89
89
  } else if (snrDb > 20) {
90
- multiplier = 0.1;
90
+ multiplier = 0.2;
91
91
  }
92
92
 
93
93
  const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
@@ -95,8 +95,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
95
95
  }
96
96
 
97
97
  // Ensure noise floor doesn't drop to absolute zero
98
- // 0.0002 is approx -74dB, a reasonable floor for ambient noise
99
- this.noiseFloor = Math.max(this.noiseFloor, 0.0002);
98
+ // 0.00005 is approx -86dB, very quiet but prevents SNR explosion
99
+ this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
100
100
 
101
101
  // Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
102
102
  const snr = this.energy / (this.noiseFloor + 1e-6);
@@ -108,8 +108,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
108
108
  let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
109
109
 
110
110
  // Apply absolute energy threshold
111
+ // We use a soft threshold to avoid abrupt cutting
111
112
  if (this.energy < this.minEnergy) {
112
- probability = 0;
113
+ const energyRatio = this.energy / (this.minEnergy + 1e-6);
114
+ probability *= Math.pow(energyRatio, 2); // Quadratic falloff
113
115
  }
114
116
 
115
117
  this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  EnergyVADPlugin
3
- } from "../chunk-SMNOCQYR.mjs";
3
+ } from "../chunk-KEWK2OKV.mjs";
4
4
  export {
5
5
  EnergyVADPlugin
6
6
  };
@@ -54,7 +54,7 @@ var VADStateMachine = class {
54
54
  noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
55
55
  minSNR: config?.energyVad?.minSNR ?? 10,
56
56
  snrRange: config?.energyVad?.snrRange ?? 10,
57
- minEnergy: config?.energyVad?.minEnergy ?? 5e-4
57
+ minEnergy: config?.energyVad?.minEnergy ?? 1e-3
58
58
  }
59
59
  };
60
60
  this.lastSilenceTime = Date.now();
@@ -85,25 +85,33 @@ var VADStateMachine = class {
85
85
  newState = "silent";
86
86
  this.lastSilenceTime = timestamp;
87
87
  }
88
- } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
88
+ } else if (this.currentState === "speech_starting") {
89
+ if (probability >= stopThreshold) {
90
+ const speechDuration = timestamp - this.speechStartTime;
91
+ if (speechDuration >= minSpeechDurationMs) {
92
+ newState = "speaking";
93
+ } else {
94
+ newState = "speech_starting";
95
+ }
96
+ this.lastSpeechTime = timestamp;
97
+ } else {
98
+ newState = "silent";
99
+ this.lastSilenceTime = timestamp;
100
+ }
101
+ } else if (this.currentState === "speaking") {
89
102
  if (probability >= stopThreshold) {
90
103
  newState = "speaking";
91
104
  this.lastSpeechTime = timestamp;
92
105
  } else {
93
106
  const timeSinceSpeech = timestamp - this.lastSpeechTime;
94
- const speechDuration = timestamp - this.speechStartTime;
95
107
  if (timeSinceSpeech < hangoverMs) {
96
108
  newState = "speaking";
97
- } else if (speechDuration < minSpeechDurationMs) {
98
- newState = "silent";
99
- this.lastSilenceTime = timestamp;
100
109
  } else {
101
110
  newState = "speech_ending";
102
111
  this.lastSilenceTime = timestamp;
103
112
  }
104
113
  }
105
114
  }
106
- if (newState === "speech_starting") newState = "speaking";
107
115
  if (newState === "speech_ending") newState = "silent";
108
116
  this.currentState = newState;
109
117
  return {
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  VADStateMachine
3
- } from "../chunk-VCQMZVO3.mjs";
3
+ } from "../chunk-DYY2MXMU.mjs";
4
4
  export {
5
5
  VADStateMachine
6
6
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tensamin/audio",
3
- "version": "0.1.10",
3
+ "version": "0.1.12",
4
4
  "main": "dist/index.js",
5
5
  "module": "dist/index.mjs",
6
6
  "types": "dist/index.d.ts",