@tensamin/audio 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/{chunk-VCQMZVO3.mjs → chunk-DYY2MXMU.mjs} +15 -7
- package/dist/{chunk-SMNOCQYR.mjs → chunk-KEWK2OKV.mjs} +10 -8
- package/dist/{chunk-IL4F7WVW.mjs → chunk-Q2I22TJG.mjs} +1 -1
- package/dist/{chunk-CD5XFC5M.mjs → chunk-SMZJFNRU.mjs} +2 -2
- package/dist/{chunk-Z3QBDLTM.mjs → chunk-XZSFQJW4.mjs} +1 -1
- package/dist/extensibility/plugins.js +10 -8
- package/dist/extensibility/plugins.mjs +2 -2
- package/dist/index.js +25 -15
- package/dist/index.mjs +5 -5
- package/dist/livekit/integration.js +25 -15
- package/dist/livekit/integration.mjs +5 -5
- package/dist/pipeline/audio-pipeline.js +25 -15
- package/dist/pipeline/audio-pipeline.mjs +4 -4
- package/dist/types.d.mts +3 -3
- package/dist/types.d.ts +3 -3
- package/dist/vad/vad-node.js +10 -8
- package/dist/vad/vad-node.mjs +1 -1
- package/dist/vad/vad-state.js +15 -7
- package/dist/vad/vad-state.mjs +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -101,11 +101,11 @@ vad: {
|
|
|
101
101
|
energyVad?: {
|
|
102
102
|
smoothing: number; // Default: 0.95
|
|
103
103
|
initialNoiseFloor: number; // Default: 0.001
|
|
104
|
-
noiseFloorAdaptRateQuiet: number; // Default: 0.
|
|
105
|
-
noiseFloorAdaptRateLoud: number; // Default: 0.
|
|
104
|
+
noiseFloorAdaptRateQuiet: number; // Default: 0.01
|
|
105
|
+
noiseFloorAdaptRateLoud: number; // Default: 0.05
|
|
106
106
|
minSNR: number; // Default: 10.0 (dB)
|
|
107
107
|
snrRange: number; // Default: 10.0 (dB)
|
|
108
|
-
minEnergy: number; // Default: 0.
|
|
108
|
+
minEnergy: number; // Default: 0.001
|
|
109
109
|
};
|
|
110
110
|
}
|
|
111
111
|
```
|
|
@@ -116,7 +116,7 @@ vad: {
|
|
|
116
116
|
- `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
|
|
117
117
|
- `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
|
|
118
118
|
- `preRollMs`: Audio buffer duration before speech onset
|
|
119
|
-
- `minSpeechDurationMs`: Minimum duration to consider as valid speech
|
|
119
|
+
- `minSpeechDurationMs`: Minimum duration to consider as valid speech (Default: 150ms)
|
|
120
120
|
- `minSilenceDurationMs`: Minimum silence duration between speech segments
|
|
121
121
|
|
|
122
122
|
**Energy VAD Parameters:**
|
|
@@ -124,7 +124,7 @@ vad: {
|
|
|
124
124
|
- `smoothing`: Energy calculation smoothing factor (0-1)
|
|
125
125
|
- `minSNR`: Minimum signal-to-noise ratio in dB for speech detection
|
|
126
126
|
- `snrRange`: Range in dB for probability scaling from minSNR
|
|
127
|
-
- `minEnergy`: Minimum absolute RMS energy to consider as speech
|
|
127
|
+
- `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.001, ~-60dB)
|
|
128
128
|
|
|
129
129
|
### Output Control
|
|
130
130
|
|
|
@@ -30,7 +30,7 @@ var VADStateMachine = class {
|
|
|
30
30
|
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
|
|
31
31
|
minSNR: config?.energyVad?.minSNR ?? 10,
|
|
32
32
|
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
33
|
-
minEnergy: config?.energyVad?.minEnergy ??
|
|
33
|
+
minEnergy: config?.energyVad?.minEnergy ?? 1e-3
|
|
34
34
|
}
|
|
35
35
|
};
|
|
36
36
|
this.lastSilenceTime = Date.now();
|
|
@@ -61,25 +61,33 @@ var VADStateMachine = class {
|
|
|
61
61
|
newState = "silent";
|
|
62
62
|
this.lastSilenceTime = timestamp;
|
|
63
63
|
}
|
|
64
|
-
} else if (this.currentState === "speech_starting"
|
|
64
|
+
} else if (this.currentState === "speech_starting") {
|
|
65
|
+
if (probability >= stopThreshold) {
|
|
66
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
67
|
+
if (speechDuration >= minSpeechDurationMs) {
|
|
68
|
+
newState = "speaking";
|
|
69
|
+
} else {
|
|
70
|
+
newState = "speech_starting";
|
|
71
|
+
}
|
|
72
|
+
this.lastSpeechTime = timestamp;
|
|
73
|
+
} else {
|
|
74
|
+
newState = "silent";
|
|
75
|
+
this.lastSilenceTime = timestamp;
|
|
76
|
+
}
|
|
77
|
+
} else if (this.currentState === "speaking") {
|
|
65
78
|
if (probability >= stopThreshold) {
|
|
66
79
|
newState = "speaking";
|
|
67
80
|
this.lastSpeechTime = timestamp;
|
|
68
81
|
} else {
|
|
69
82
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
70
|
-
const speechDuration = timestamp - this.speechStartTime;
|
|
71
83
|
if (timeSinceSpeech < hangoverMs) {
|
|
72
84
|
newState = "speaking";
|
|
73
|
-
} else if (speechDuration < minSpeechDurationMs) {
|
|
74
|
-
newState = "silent";
|
|
75
|
-
this.lastSilenceTime = timestamp;
|
|
76
85
|
} else {
|
|
77
86
|
newState = "speech_ending";
|
|
78
87
|
this.lastSilenceTime = timestamp;
|
|
79
88
|
}
|
|
80
89
|
}
|
|
81
90
|
}
|
|
82
|
-
if (newState === "speech_starting") newState = "speaking";
|
|
83
91
|
if (newState === "speech_ending") newState = "silent";
|
|
84
92
|
this.currentState = newState;
|
|
85
93
|
return {
|
|
@@ -3,11 +3,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
3
3
|
const energyParams = vadConfig?.energyVad || {};
|
|
4
4
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
5
5
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
6
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
7
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
6
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
7
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
8
8
|
const minSNR = energyParams.minSNR ?? 10;
|
|
9
9
|
const snrRange = energyParams.snrRange ?? 10;
|
|
10
|
-
const minEnergy = energyParams.minEnergy ??
|
|
10
|
+
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
11
11
|
return `
|
|
12
12
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
13
13
|
constructor() {
|
|
@@ -61,9 +61,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
61
61
|
|
|
62
62
|
let multiplier = 1.0;
|
|
63
63
|
if (this.isSpeaking) {
|
|
64
|
-
multiplier = 0.
|
|
64
|
+
multiplier = 0.05;
|
|
65
65
|
} else if (snrDb > 20) {
|
|
66
|
-
multiplier = 0.
|
|
66
|
+
multiplier = 0.2;
|
|
67
67
|
}
|
|
68
68
|
|
|
69
69
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
@@ -71,8 +71,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
// Ensure noise floor doesn't drop to absolute zero
|
|
74
|
-
// 0.
|
|
75
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.
|
|
74
|
+
// 0.00005 is approx -86dB, very quiet but prevents SNR explosion
|
|
75
|
+
this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
|
|
76
76
|
|
|
77
77
|
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
78
78
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
@@ -84,8 +84,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
84
84
|
let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
|
|
85
85
|
|
|
86
86
|
// Apply absolute energy threshold
|
|
87
|
+
// We use a soft threshold to avoid abrupt cutting
|
|
87
88
|
if (this.energy < this.minEnergy) {
|
|
88
|
-
|
|
89
|
+
const energyRatio = this.energy / (this.minEnergy + 1e-6);
|
|
90
|
+
probability *= Math.pow(energyRatio, 2); // Quadratic falloff
|
|
89
91
|
}
|
|
90
92
|
|
|
91
93
|
this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
VADStateMachine
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-DYY2MXMU.mjs";
|
|
4
4
|
import {
|
|
5
5
|
getAudioContext,
|
|
6
6
|
registerPipeline,
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
import {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-XZSFQJW4.mjs";
|
|
13
13
|
|
|
14
14
|
// src/pipeline/audio-pipeline.ts
|
|
15
15
|
import mitt from "mitt";
|
|
@@ -106,11 +106,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
106
106
|
const energyParams = vadConfig?.energyVad || {};
|
|
107
107
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
108
108
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
109
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
110
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
109
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
110
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
111
111
|
const minSNR = energyParams.minSNR ?? 10;
|
|
112
112
|
const snrRange = energyParams.snrRange ?? 10;
|
|
113
|
-
const minEnergy = energyParams.minEnergy ??
|
|
113
|
+
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
114
114
|
return `
|
|
115
115
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
116
116
|
constructor() {
|
|
@@ -164,9 +164,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
164
164
|
|
|
165
165
|
let multiplier = 1.0;
|
|
166
166
|
if (this.isSpeaking) {
|
|
167
|
-
multiplier = 0.
|
|
167
|
+
multiplier = 0.05;
|
|
168
168
|
} else if (snrDb > 20) {
|
|
169
|
-
multiplier = 0.
|
|
169
|
+
multiplier = 0.2;
|
|
170
170
|
}
|
|
171
171
|
|
|
172
172
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
@@ -174,8 +174,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
174
174
|
}
|
|
175
175
|
|
|
176
176
|
// Ensure noise floor doesn't drop to absolute zero
|
|
177
|
-
// 0.
|
|
178
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.
|
|
177
|
+
// 0.00005 is approx -86dB, very quiet but prevents SNR explosion
|
|
178
|
+
this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
|
|
179
179
|
|
|
180
180
|
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
181
181
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
@@ -187,8 +187,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
187
187
|
let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
|
|
188
188
|
|
|
189
189
|
// Apply absolute energy threshold
|
|
190
|
+
// We use a soft threshold to avoid abrupt cutting
|
|
190
191
|
if (this.energy < this.minEnergy) {
|
|
191
|
-
|
|
192
|
+
const energyRatio = this.energy / (this.minEnergy + 1e-6);
|
|
193
|
+
probability *= Math.pow(energyRatio, 2); // Quadratic falloff
|
|
192
194
|
}
|
|
193
195
|
|
|
194
196
|
this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
|
|
@@ -3,9 +3,9 @@ import {
|
|
|
3
3
|
getVADPlugin,
|
|
4
4
|
registerNoiseSuppressionPlugin,
|
|
5
5
|
registerVADPlugin
|
|
6
|
-
} from "../chunk-
|
|
6
|
+
} from "../chunk-XZSFQJW4.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-KEWK2OKV.mjs";
|
|
9
9
|
export {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin,
|
package/dist/index.js
CHANGED
|
@@ -158,11 +158,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
158
158
|
const energyParams = vadConfig?.energyVad || {};
|
|
159
159
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
160
160
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
161
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
162
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
161
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
162
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
163
163
|
const minSNR = energyParams.minSNR ?? 10;
|
|
164
164
|
const snrRange = energyParams.snrRange ?? 10;
|
|
165
|
-
const minEnergy = energyParams.minEnergy ??
|
|
165
|
+
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
166
166
|
return `
|
|
167
167
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
168
168
|
constructor() {
|
|
@@ -216,9 +216,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
216
216
|
|
|
217
217
|
let multiplier = 1.0;
|
|
218
218
|
if (this.isSpeaking) {
|
|
219
|
-
multiplier = 0.
|
|
219
|
+
multiplier = 0.05;
|
|
220
220
|
} else if (snrDb > 20) {
|
|
221
|
-
multiplier = 0.
|
|
221
|
+
multiplier = 0.2;
|
|
222
222
|
}
|
|
223
223
|
|
|
224
224
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
@@ -226,8 +226,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
226
226
|
}
|
|
227
227
|
|
|
228
228
|
// Ensure noise floor doesn't drop to absolute zero
|
|
229
|
-
// 0.
|
|
230
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.
|
|
229
|
+
// 0.00005 is approx -86dB, very quiet but prevents SNR explosion
|
|
230
|
+
this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
|
|
231
231
|
|
|
232
232
|
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
233
233
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
@@ -239,8 +239,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
239
239
|
let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
|
|
240
240
|
|
|
241
241
|
// Apply absolute energy threshold
|
|
242
|
+
// We use a soft threshold to avoid abrupt cutting
|
|
242
243
|
if (this.energy < this.minEnergy) {
|
|
243
|
-
|
|
244
|
+
const energyRatio = this.energy / (this.minEnergy + 1e-6);
|
|
245
|
+
probability *= Math.pow(energyRatio, 2); // Quadratic falloff
|
|
244
246
|
}
|
|
245
247
|
|
|
246
248
|
this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
|
|
@@ -379,7 +381,7 @@ var VADStateMachine = class {
|
|
|
379
381
|
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
|
|
380
382
|
minSNR: config?.energyVad?.minSNR ?? 10,
|
|
381
383
|
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
382
|
-
minEnergy: config?.energyVad?.minEnergy ??
|
|
384
|
+
minEnergy: config?.energyVad?.minEnergy ?? 1e-3
|
|
383
385
|
}
|
|
384
386
|
};
|
|
385
387
|
this.lastSilenceTime = Date.now();
|
|
@@ -410,25 +412,33 @@ var VADStateMachine = class {
|
|
|
410
412
|
newState = "silent";
|
|
411
413
|
this.lastSilenceTime = timestamp;
|
|
412
414
|
}
|
|
413
|
-
} else if (this.currentState === "speech_starting"
|
|
415
|
+
} else if (this.currentState === "speech_starting") {
|
|
416
|
+
if (probability >= stopThreshold) {
|
|
417
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
418
|
+
if (speechDuration >= minSpeechDurationMs) {
|
|
419
|
+
newState = "speaking";
|
|
420
|
+
} else {
|
|
421
|
+
newState = "speech_starting";
|
|
422
|
+
}
|
|
423
|
+
this.lastSpeechTime = timestamp;
|
|
424
|
+
} else {
|
|
425
|
+
newState = "silent";
|
|
426
|
+
this.lastSilenceTime = timestamp;
|
|
427
|
+
}
|
|
428
|
+
} else if (this.currentState === "speaking") {
|
|
414
429
|
if (probability >= stopThreshold) {
|
|
415
430
|
newState = "speaking";
|
|
416
431
|
this.lastSpeechTime = timestamp;
|
|
417
432
|
} else {
|
|
418
433
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
419
|
-
const speechDuration = timestamp - this.speechStartTime;
|
|
420
434
|
if (timeSinceSpeech < hangoverMs) {
|
|
421
435
|
newState = "speaking";
|
|
422
|
-
} else if (speechDuration < minSpeechDurationMs) {
|
|
423
|
-
newState = "silent";
|
|
424
|
-
this.lastSilenceTime = timestamp;
|
|
425
436
|
} else {
|
|
426
437
|
newState = "speech_ending";
|
|
427
438
|
this.lastSilenceTime = timestamp;
|
|
428
439
|
}
|
|
429
440
|
}
|
|
430
441
|
}
|
|
431
|
-
if (newState === "speech_starting") newState = "speaking";
|
|
432
442
|
if (newState === "speech_ending") newState = "silent";
|
|
433
443
|
this.currentState = newState;
|
|
434
444
|
return {
|
package/dist/index.mjs
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import "./chunk-WBQAMGXK.mjs";
|
|
2
2
|
import {
|
|
3
3
|
attachProcessingToTrack
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-Q2I22TJG.mjs";
|
|
5
5
|
import {
|
|
6
6
|
createAudioPipeline
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-SMZJFNRU.mjs";
|
|
8
8
|
import {
|
|
9
9
|
VADStateMachine
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-DYY2MXMU.mjs";
|
|
11
11
|
import {
|
|
12
12
|
closeAudioContext,
|
|
13
13
|
getAudioContext,
|
|
@@ -21,13 +21,13 @@ import {
|
|
|
21
21
|
getVADPlugin,
|
|
22
22
|
registerNoiseSuppressionPlugin,
|
|
23
23
|
registerVADPlugin
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-XZSFQJW4.mjs";
|
|
25
25
|
import {
|
|
26
26
|
RNNoisePlugin
|
|
27
27
|
} from "./chunk-XO6B3D4A.mjs";
|
|
28
28
|
import {
|
|
29
29
|
EnergyVADPlugin
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-KEWK2OKV.mjs";
|
|
31
31
|
export {
|
|
32
32
|
EnergyVADPlugin,
|
|
33
33
|
RNNoisePlugin,
|
|
@@ -127,11 +127,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
127
127
|
const energyParams = vadConfig?.energyVad || {};
|
|
128
128
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
129
129
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
130
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
131
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
130
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
131
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
132
132
|
const minSNR = energyParams.minSNR ?? 10;
|
|
133
133
|
const snrRange = energyParams.snrRange ?? 10;
|
|
134
|
-
const minEnergy = energyParams.minEnergy ??
|
|
134
|
+
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
135
135
|
return `
|
|
136
136
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
137
137
|
constructor() {
|
|
@@ -185,9 +185,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
185
185
|
|
|
186
186
|
let multiplier = 1.0;
|
|
187
187
|
if (this.isSpeaking) {
|
|
188
|
-
multiplier = 0.
|
|
188
|
+
multiplier = 0.05;
|
|
189
189
|
} else if (snrDb > 20) {
|
|
190
|
-
multiplier = 0.
|
|
190
|
+
multiplier = 0.2;
|
|
191
191
|
}
|
|
192
192
|
|
|
193
193
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
@@ -195,8 +195,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
195
195
|
}
|
|
196
196
|
|
|
197
197
|
// Ensure noise floor doesn't drop to absolute zero
|
|
198
|
-
// 0.
|
|
199
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.
|
|
198
|
+
// 0.00005 is approx -86dB, very quiet but prevents SNR explosion
|
|
199
|
+
this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
|
|
200
200
|
|
|
201
201
|
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
202
202
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
@@ -208,8 +208,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
208
208
|
let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
|
|
209
209
|
|
|
210
210
|
// Apply absolute energy threshold
|
|
211
|
+
// We use a soft threshold to avoid abrupt cutting
|
|
211
212
|
if (this.energy < this.minEnergy) {
|
|
212
|
-
|
|
213
|
+
const energyRatio = this.energy / (this.minEnergy + 1e-6);
|
|
214
|
+
probability *= Math.pow(energyRatio, 2); // Quadratic falloff
|
|
213
215
|
}
|
|
214
216
|
|
|
215
217
|
this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
|
|
@@ -342,7 +344,7 @@ var VADStateMachine = class {
|
|
|
342
344
|
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
|
|
343
345
|
minSNR: config?.energyVad?.minSNR ?? 10,
|
|
344
346
|
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
345
|
-
minEnergy: config?.energyVad?.minEnergy ??
|
|
347
|
+
minEnergy: config?.energyVad?.minEnergy ?? 1e-3
|
|
346
348
|
}
|
|
347
349
|
};
|
|
348
350
|
this.lastSilenceTime = Date.now();
|
|
@@ -373,25 +375,33 @@ var VADStateMachine = class {
|
|
|
373
375
|
newState = "silent";
|
|
374
376
|
this.lastSilenceTime = timestamp;
|
|
375
377
|
}
|
|
376
|
-
} else if (this.currentState === "speech_starting"
|
|
378
|
+
} else if (this.currentState === "speech_starting") {
|
|
379
|
+
if (probability >= stopThreshold) {
|
|
380
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
381
|
+
if (speechDuration >= minSpeechDurationMs) {
|
|
382
|
+
newState = "speaking";
|
|
383
|
+
} else {
|
|
384
|
+
newState = "speech_starting";
|
|
385
|
+
}
|
|
386
|
+
this.lastSpeechTime = timestamp;
|
|
387
|
+
} else {
|
|
388
|
+
newState = "silent";
|
|
389
|
+
this.lastSilenceTime = timestamp;
|
|
390
|
+
}
|
|
391
|
+
} else if (this.currentState === "speaking") {
|
|
377
392
|
if (probability >= stopThreshold) {
|
|
378
393
|
newState = "speaking";
|
|
379
394
|
this.lastSpeechTime = timestamp;
|
|
380
395
|
} else {
|
|
381
396
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
382
|
-
const speechDuration = timestamp - this.speechStartTime;
|
|
383
397
|
if (timeSinceSpeech < hangoverMs) {
|
|
384
398
|
newState = "speaking";
|
|
385
|
-
} else if (speechDuration < minSpeechDurationMs) {
|
|
386
|
-
newState = "silent";
|
|
387
|
-
this.lastSilenceTime = timestamp;
|
|
388
399
|
} else {
|
|
389
400
|
newState = "speech_ending";
|
|
390
401
|
this.lastSilenceTime = timestamp;
|
|
391
402
|
}
|
|
392
403
|
}
|
|
393
404
|
}
|
|
394
|
-
if (newState === "speech_starting") newState = "speaking";
|
|
395
405
|
if (newState === "speech_ending") newState = "silent";
|
|
396
406
|
this.currentState = newState;
|
|
397
407
|
return {
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import {
|
|
2
2
|
attachProcessingToTrack
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
5
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-Q2I22TJG.mjs";
|
|
4
|
+
import "../chunk-SMZJFNRU.mjs";
|
|
5
|
+
import "../chunk-DYY2MXMU.mjs";
|
|
6
6
|
import "../chunk-OZ7KMC4S.mjs";
|
|
7
|
-
import "../chunk-
|
|
7
|
+
import "../chunk-XZSFQJW4.mjs";
|
|
8
8
|
import "../chunk-XO6B3D4A.mjs";
|
|
9
|
-
import "../chunk-
|
|
9
|
+
import "../chunk-KEWK2OKV.mjs";
|
|
10
10
|
export {
|
|
11
11
|
attachProcessingToTrack
|
|
12
12
|
};
|
|
@@ -125,11 +125,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
125
125
|
const energyParams = vadConfig?.energyVad || {};
|
|
126
126
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
127
127
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
128
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
129
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
128
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
129
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
130
130
|
const minSNR = energyParams.minSNR ?? 10;
|
|
131
131
|
const snrRange = energyParams.snrRange ?? 10;
|
|
132
|
-
const minEnergy = energyParams.minEnergy ??
|
|
132
|
+
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
133
133
|
return `
|
|
134
134
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
135
135
|
constructor() {
|
|
@@ -183,9 +183,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
183
183
|
|
|
184
184
|
let multiplier = 1.0;
|
|
185
185
|
if (this.isSpeaking) {
|
|
186
|
-
multiplier = 0.
|
|
186
|
+
multiplier = 0.05;
|
|
187
187
|
} else if (snrDb > 20) {
|
|
188
|
-
multiplier = 0.
|
|
188
|
+
multiplier = 0.2;
|
|
189
189
|
}
|
|
190
190
|
|
|
191
191
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
@@ -193,8 +193,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
193
193
|
}
|
|
194
194
|
|
|
195
195
|
// Ensure noise floor doesn't drop to absolute zero
|
|
196
|
-
// 0.
|
|
197
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.
|
|
196
|
+
// 0.00005 is approx -86dB, very quiet but prevents SNR explosion
|
|
197
|
+
this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
|
|
198
198
|
|
|
199
199
|
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
200
200
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
@@ -206,8 +206,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
206
206
|
let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
|
|
207
207
|
|
|
208
208
|
// Apply absolute energy threshold
|
|
209
|
+
// We use a soft threshold to avoid abrupt cutting
|
|
209
210
|
if (this.energy < this.minEnergy) {
|
|
210
|
-
|
|
211
|
+
const energyRatio = this.energy / (this.minEnergy + 1e-6);
|
|
212
|
+
probability *= Math.pow(energyRatio, 2); // Quadratic falloff
|
|
211
213
|
}
|
|
212
214
|
|
|
213
215
|
this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
|
|
@@ -340,7 +342,7 @@ var VADStateMachine = class {
|
|
|
340
342
|
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
|
|
341
343
|
minSNR: config?.energyVad?.minSNR ?? 10,
|
|
342
344
|
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
343
|
-
minEnergy: config?.energyVad?.minEnergy ??
|
|
345
|
+
minEnergy: config?.energyVad?.minEnergy ?? 1e-3
|
|
344
346
|
}
|
|
345
347
|
};
|
|
346
348
|
this.lastSilenceTime = Date.now();
|
|
@@ -371,25 +373,33 @@ var VADStateMachine = class {
|
|
|
371
373
|
newState = "silent";
|
|
372
374
|
this.lastSilenceTime = timestamp;
|
|
373
375
|
}
|
|
374
|
-
} else if (this.currentState === "speech_starting"
|
|
376
|
+
} else if (this.currentState === "speech_starting") {
|
|
377
|
+
if (probability >= stopThreshold) {
|
|
378
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
379
|
+
if (speechDuration >= minSpeechDurationMs) {
|
|
380
|
+
newState = "speaking";
|
|
381
|
+
} else {
|
|
382
|
+
newState = "speech_starting";
|
|
383
|
+
}
|
|
384
|
+
this.lastSpeechTime = timestamp;
|
|
385
|
+
} else {
|
|
386
|
+
newState = "silent";
|
|
387
|
+
this.lastSilenceTime = timestamp;
|
|
388
|
+
}
|
|
389
|
+
} else if (this.currentState === "speaking") {
|
|
375
390
|
if (probability >= stopThreshold) {
|
|
376
391
|
newState = "speaking";
|
|
377
392
|
this.lastSpeechTime = timestamp;
|
|
378
393
|
} else {
|
|
379
394
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
380
|
-
const speechDuration = timestamp - this.speechStartTime;
|
|
381
395
|
if (timeSinceSpeech < hangoverMs) {
|
|
382
396
|
newState = "speaking";
|
|
383
|
-
} else if (speechDuration < minSpeechDurationMs) {
|
|
384
|
-
newState = "silent";
|
|
385
|
-
this.lastSilenceTime = timestamp;
|
|
386
397
|
} else {
|
|
387
398
|
newState = "speech_ending";
|
|
388
399
|
this.lastSilenceTime = timestamp;
|
|
389
400
|
}
|
|
390
401
|
}
|
|
391
402
|
}
|
|
392
|
-
if (newState === "speech_starting") newState = "speaking";
|
|
393
403
|
if (newState === "speech_ending") newState = "silent";
|
|
394
404
|
this.currentState = newState;
|
|
395
405
|
return {
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createAudioPipeline
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-SMZJFNRU.mjs";
|
|
4
|
+
import "../chunk-DYY2MXMU.mjs";
|
|
5
5
|
import "../chunk-OZ7KMC4S.mjs";
|
|
6
|
-
import "../chunk-
|
|
6
|
+
import "../chunk-XZSFQJW4.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-KEWK2OKV.mjs";
|
|
9
9
|
export {
|
|
10
10
|
createAudioPipeline
|
|
11
11
|
};
|
package/dist/types.d.mts
CHANGED
|
@@ -97,12 +97,12 @@ interface AudioProcessingConfig {
|
|
|
97
97
|
initialNoiseFloor?: number;
|
|
98
98
|
/**
|
|
99
99
|
* Rate at which noise floor adapts to quiet signals (0-1).
|
|
100
|
-
* Default: 0.
|
|
100
|
+
* Default: 0.01
|
|
101
101
|
*/
|
|
102
102
|
noiseFloorAdaptRateQuiet?: number;
|
|
103
103
|
/**
|
|
104
104
|
* Rate at which noise floor adapts to loud signals (0-1).
|
|
105
|
-
* Default: 0.
|
|
105
|
+
* Default: 0.05 (faster tracking of rising noise)
|
|
106
106
|
*/
|
|
107
107
|
noiseFloorAdaptRateLoud?: number;
|
|
108
108
|
/**
|
|
@@ -118,7 +118,7 @@ interface AudioProcessingConfig {
|
|
|
118
118
|
/**
|
|
119
119
|
* Minimum absolute RMS energy to consider as speech.
|
|
120
120
|
* Prevents triggering on very quiet background noise in silent rooms.
|
|
121
|
-
* Default: 0.
|
|
121
|
+
* Default: 0.001 (approx -60dB)
|
|
122
122
|
*/
|
|
123
123
|
minEnergy?: number;
|
|
124
124
|
};
|
package/dist/types.d.ts
CHANGED
|
@@ -97,12 +97,12 @@ interface AudioProcessingConfig {
|
|
|
97
97
|
initialNoiseFloor?: number;
|
|
98
98
|
/**
|
|
99
99
|
* Rate at which noise floor adapts to quiet signals (0-1).
|
|
100
|
-
* Default: 0.
|
|
100
|
+
* Default: 0.01
|
|
101
101
|
*/
|
|
102
102
|
noiseFloorAdaptRateQuiet?: number;
|
|
103
103
|
/**
|
|
104
104
|
* Rate at which noise floor adapts to loud signals (0-1).
|
|
105
|
-
* Default: 0.
|
|
105
|
+
* Default: 0.05 (faster tracking of rising noise)
|
|
106
106
|
*/
|
|
107
107
|
noiseFloorAdaptRateLoud?: number;
|
|
108
108
|
/**
|
|
@@ -118,7 +118,7 @@ interface AudioProcessingConfig {
|
|
|
118
118
|
/**
|
|
119
119
|
* Minimum absolute RMS energy to consider as speech.
|
|
120
120
|
* Prevents triggering on very quiet background noise in silent rooms.
|
|
121
|
-
* Default: 0.
|
|
121
|
+
* Default: 0.001 (approx -60dB)
|
|
122
122
|
*/
|
|
123
123
|
minEnergy?: number;
|
|
124
124
|
};
|
package/dist/vad/vad-node.js
CHANGED
|
@@ -27,11 +27,11 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
27
27
|
const energyParams = vadConfig?.energyVad || {};
|
|
28
28
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
29
29
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
30
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ??
|
|
31
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.
|
|
30
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
31
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.05;
|
|
32
32
|
const minSNR = energyParams.minSNR ?? 10;
|
|
33
33
|
const snrRange = energyParams.snrRange ?? 10;
|
|
34
|
-
const minEnergy = energyParams.minEnergy ??
|
|
34
|
+
const minEnergy = energyParams.minEnergy ?? 1e-3;
|
|
35
35
|
return `
|
|
36
36
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
37
37
|
constructor() {
|
|
@@ -85,9 +85,9 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
85
85
|
|
|
86
86
|
let multiplier = 1.0;
|
|
87
87
|
if (this.isSpeaking) {
|
|
88
|
-
multiplier = 0.
|
|
88
|
+
multiplier = 0.05;
|
|
89
89
|
} else if (snrDb > 20) {
|
|
90
|
-
multiplier = 0.
|
|
90
|
+
multiplier = 0.2;
|
|
91
91
|
}
|
|
92
92
|
|
|
93
93
|
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
@@ -95,8 +95,8 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
95
95
|
}
|
|
96
96
|
|
|
97
97
|
// Ensure noise floor doesn't drop to absolute zero
|
|
98
|
-
// 0.
|
|
99
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.
|
|
98
|
+
// 0.00005 is approx -86dB, very quiet but prevents SNR explosion
|
|
99
|
+
this.noiseFloor = Math.max(this.noiseFloor, 0.00005);
|
|
100
100
|
|
|
101
101
|
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
102
102
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
@@ -108,8 +108,10 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
108
108
|
let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
|
|
109
109
|
|
|
110
110
|
// Apply absolute energy threshold
|
|
111
|
+
// We use a soft threshold to avoid abrupt cutting
|
|
111
112
|
if (this.energy < this.minEnergy) {
|
|
112
|
-
|
|
113
|
+
const energyRatio = this.energy / (this.minEnergy + 1e-6);
|
|
114
|
+
probability *= Math.pow(energyRatio, 2); // Quadratic falloff
|
|
113
115
|
}
|
|
114
116
|
|
|
115
117
|
this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
|
package/dist/vad/vad-node.mjs
CHANGED
package/dist/vad/vad-state.js
CHANGED
|
@@ -54,7 +54,7 @@ var VADStateMachine = class {
|
|
|
54
54
|
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
|
|
55
55
|
minSNR: config?.energyVad?.minSNR ?? 10,
|
|
56
56
|
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
57
|
-
minEnergy: config?.energyVad?.minEnergy ??
|
|
57
|
+
minEnergy: config?.energyVad?.minEnergy ?? 1e-3
|
|
58
58
|
}
|
|
59
59
|
};
|
|
60
60
|
this.lastSilenceTime = Date.now();
|
|
@@ -85,25 +85,33 @@ var VADStateMachine = class {
|
|
|
85
85
|
newState = "silent";
|
|
86
86
|
this.lastSilenceTime = timestamp;
|
|
87
87
|
}
|
|
88
|
-
} else if (this.currentState === "speech_starting"
|
|
88
|
+
} else if (this.currentState === "speech_starting") {
|
|
89
|
+
if (probability >= stopThreshold) {
|
|
90
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
91
|
+
if (speechDuration >= minSpeechDurationMs) {
|
|
92
|
+
newState = "speaking";
|
|
93
|
+
} else {
|
|
94
|
+
newState = "speech_starting";
|
|
95
|
+
}
|
|
96
|
+
this.lastSpeechTime = timestamp;
|
|
97
|
+
} else {
|
|
98
|
+
newState = "silent";
|
|
99
|
+
this.lastSilenceTime = timestamp;
|
|
100
|
+
}
|
|
101
|
+
} else if (this.currentState === "speaking") {
|
|
89
102
|
if (probability >= stopThreshold) {
|
|
90
103
|
newState = "speaking";
|
|
91
104
|
this.lastSpeechTime = timestamp;
|
|
92
105
|
} else {
|
|
93
106
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
94
|
-
const speechDuration = timestamp - this.speechStartTime;
|
|
95
107
|
if (timeSinceSpeech < hangoverMs) {
|
|
96
108
|
newState = "speaking";
|
|
97
|
-
} else if (speechDuration < minSpeechDurationMs) {
|
|
98
|
-
newState = "silent";
|
|
99
|
-
this.lastSilenceTime = timestamp;
|
|
100
109
|
} else {
|
|
101
110
|
newState = "speech_ending";
|
|
102
111
|
this.lastSilenceTime = timestamp;
|
|
103
112
|
}
|
|
104
113
|
}
|
|
105
114
|
}
|
|
106
|
-
if (newState === "speech_starting") newState = "speaking";
|
|
107
115
|
if (newState === "speech_ending") newState = "silent";
|
|
108
116
|
this.currentState = newState;
|
|
109
117
|
return {
|
package/dist/vad/vad-state.mjs
CHANGED