@tensamin/audio 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/dist/{chunk-JP6DA62Y.mjs → chunk-2UPI6VWY.mjs} +2 -2
- package/dist/{chunk-2TKYGFMC.mjs → chunk-3A2CTC4K.mjs} +18 -6
- package/dist/{chunk-BMVZ3KKG.mjs → chunk-FOGC2MFA.mjs} +1 -1
- package/dist/{chunk-2EX3FXSF.mjs → chunk-XHMNP7NC.mjs} +6 -6
- package/dist/{chunk-UQG6Z5W3.mjs → chunk-Y6IG7XGC.mjs} +1 -1
- package/dist/extensibility/plugins.js +18 -6
- package/dist/extensibility/plugins.mjs +2 -2
- package/dist/index.js +24 -12
- package/dist/index.mjs +5 -5
- package/dist/livekit/integration.js +24 -12
- package/dist/livekit/integration.mjs +5 -5
- package/dist/pipeline/audio-pipeline.js +24 -12
- package/dist/pipeline/audio-pipeline.mjs +4 -4
- package/dist/types.d.mts +6 -6
- package/dist/types.d.ts +6 -6
- package/dist/vad/vad-node.js +18 -6
- package/dist/vad/vad-node.mjs +1 -1
- package/dist/vad/vad-state.js +6 -6
- package/dist/vad/vad-state.mjs +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -102,9 +102,9 @@ vad: {
|
|
|
102
102
|
smoothing: number; // Default: 0.95
|
|
103
103
|
initialNoiseFloor: number; // Default: 0.001
|
|
104
104
|
noiseFloorAdaptRateQuiet: number; // Default: 0.05
|
|
105
|
-
noiseFloorAdaptRateLoud: number; // Default: 0.
|
|
106
|
-
minSNR: number; // Default:
|
|
107
|
-
snrRange: number; // Default:
|
|
105
|
+
noiseFloorAdaptRateLoud: number; // Default: 0.01
|
|
106
|
+
minSNR: number; // Default: 10.0 (dB)
|
|
107
|
+
snrRange: number; // Default: 10.0 (dB)
|
|
108
108
|
minEnergy: number; // Default: 0.0005
|
|
109
109
|
};
|
|
110
110
|
}
|
|
@@ -112,9 +112,9 @@ vad: {
|
|
|
112
112
|
|
|
113
113
|
**Threshold Parameters:**
|
|
114
114
|
|
|
115
|
-
- `startThreshold`: Probability threshold to unmute audio (Default: 0.
|
|
116
|
-
- `stopThreshold`: Probability threshold to mute audio (Default: 0.
|
|
117
|
-
- `hangoverMs`: Delay before muting after speech stops
|
|
115
|
+
- `startThreshold`: Probability threshold to unmute audio (Default: 0.8, ~18dB SNR)
|
|
116
|
+
- `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
|
|
117
|
+
- `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
|
|
118
118
|
- `preRollMs`: Audio buffer duration before speech onset
|
|
119
119
|
- `minSpeechDurationMs`: Minimum duration to consider as valid speech
|
|
120
120
|
- `minSilenceDurationMs`: Minimum silence duration between speech segments
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
VADStateMachine
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-XHMNP7NC.mjs";
|
|
4
4
|
import {
|
|
5
5
|
getAudioContext,
|
|
6
6
|
registerPipeline,
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
import {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-FOGC2MFA.mjs";
|
|
13
13
|
|
|
14
14
|
// src/pipeline/audio-pipeline.ts
|
|
15
15
|
import mitt from "mitt";
|
|
@@ -4,9 +4,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
4
4
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
5
5
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
6
6
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
|
|
7
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ??
|
|
8
|
-
const minSNR = energyParams.minSNR ??
|
|
9
|
-
const snrRange = energyParams.snrRange ??
|
|
7
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
|
|
8
|
+
const minSNR = energyParams.minSNR ?? 10;
|
|
9
|
+
const snrRange = energyParams.snrRange ?? 10;
|
|
10
10
|
const minEnergy = energyParams.minEnergy ?? 5e-4;
|
|
11
11
|
return `
|
|
12
12
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
@@ -52,9 +52,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
52
52
|
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
|
|
53
53
|
} else {
|
|
54
54
|
// If signal is louder, adapt upwards
|
|
55
|
-
//
|
|
56
|
-
// If we are
|
|
57
|
-
|
|
55
|
+
// We use a multi-stage adaptation rate:
|
|
56
|
+
// 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
|
|
57
|
+
// 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
|
|
58
|
+
// 3. Otherwise, adapt at the normal loud rate
|
|
59
|
+
const snr = instantRms / (this.noiseFloor + 1e-6);
|
|
60
|
+
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
61
|
+
|
|
62
|
+
let multiplier = 1.0;
|
|
63
|
+
if (this.isSpeaking) {
|
|
64
|
+
multiplier = 0.01;
|
|
65
|
+
} else if (snrDb > 20) {
|
|
66
|
+
multiplier = 0.1;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
58
70
|
this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
|
|
59
71
|
}
|
|
60
72
|
|
|
@@ -12,11 +12,11 @@ var VADStateMachine = class {
|
|
|
12
12
|
enabled: config?.enabled ?? true,
|
|
13
13
|
pluginName: config?.pluginName ?? "energy-vad",
|
|
14
14
|
// Voice-optimized defaults
|
|
15
|
-
startThreshold: config?.startThreshold ?? 0.
|
|
15
|
+
startThreshold: config?.startThreshold ?? 0.8,
|
|
16
16
|
// Higher threshold to avoid noise
|
|
17
|
-
stopThreshold: config?.stopThreshold ?? 0.
|
|
17
|
+
stopThreshold: config?.stopThreshold ?? 0.3,
|
|
18
18
|
// Balanced for voice
|
|
19
|
-
hangoverMs: config?.hangoverMs ??
|
|
19
|
+
hangoverMs: config?.hangoverMs ?? 300,
|
|
20
20
|
// Smooth for natural speech
|
|
21
21
|
preRollMs: config?.preRollMs ?? 250,
|
|
22
22
|
// Generous pre-roll
|
|
@@ -26,9 +26,9 @@ var VADStateMachine = class {
|
|
|
26
26
|
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
27
27
|
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
28
28
|
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
|
|
29
|
-
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ??
|
|
30
|
-
minSNR: config?.energyVad?.minSNR ??
|
|
31
|
-
snrRange: config?.energyVad?.snrRange ??
|
|
29
|
+
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
|
|
30
|
+
minSNR: config?.energyVad?.minSNR ?? 10,
|
|
31
|
+
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
32
32
|
minEnergy: config?.energyVad?.minEnergy ?? 5e-4
|
|
33
33
|
}
|
|
34
34
|
};
|
|
@@ -107,9 +107,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
107
107
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
108
108
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
109
109
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
|
|
110
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ??
|
|
111
|
-
const minSNR = energyParams.minSNR ??
|
|
112
|
-
const snrRange = energyParams.snrRange ??
|
|
110
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
|
|
111
|
+
const minSNR = energyParams.minSNR ?? 10;
|
|
112
|
+
const snrRange = energyParams.snrRange ?? 10;
|
|
113
113
|
const minEnergy = energyParams.minEnergy ?? 5e-4;
|
|
114
114
|
return `
|
|
115
115
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
@@ -155,9 +155,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
155
155
|
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
|
|
156
156
|
} else {
|
|
157
157
|
// If signal is louder, adapt upwards
|
|
158
|
-
//
|
|
159
|
-
// If we are
|
|
160
|
-
|
|
158
|
+
// We use a multi-stage adaptation rate:
|
|
159
|
+
// 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
|
|
160
|
+
// 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
|
|
161
|
+
// 3. Otherwise, adapt at the normal loud rate
|
|
162
|
+
const snr = instantRms / (this.noiseFloor + 1e-6);
|
|
163
|
+
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
164
|
+
|
|
165
|
+
let multiplier = 1.0;
|
|
166
|
+
if (this.isSpeaking) {
|
|
167
|
+
multiplier = 0.01;
|
|
168
|
+
} else if (snrDb > 20) {
|
|
169
|
+
multiplier = 0.1;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
161
173
|
this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
|
|
162
174
|
}
|
|
163
175
|
|
|
@@ -3,9 +3,9 @@ import {
|
|
|
3
3
|
getVADPlugin,
|
|
4
4
|
registerNoiseSuppressionPlugin,
|
|
5
5
|
registerVADPlugin
|
|
6
|
-
} from "../chunk-
|
|
6
|
+
} from "../chunk-FOGC2MFA.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-3A2CTC4K.mjs";
|
|
9
9
|
export {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin,
|
package/dist/index.js
CHANGED
|
@@ -159,9 +159,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
159
159
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
160
160
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
161
161
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
|
|
162
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ??
|
|
163
|
-
const minSNR = energyParams.minSNR ??
|
|
164
|
-
const snrRange = energyParams.snrRange ??
|
|
162
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
|
|
163
|
+
const minSNR = energyParams.minSNR ?? 10;
|
|
164
|
+
const snrRange = energyParams.snrRange ?? 10;
|
|
165
165
|
const minEnergy = energyParams.minEnergy ?? 5e-4;
|
|
166
166
|
return `
|
|
167
167
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
@@ -207,9 +207,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
207
207
|
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
|
|
208
208
|
} else {
|
|
209
209
|
// If signal is louder, adapt upwards
|
|
210
|
-
//
|
|
211
|
-
// If we are
|
|
212
|
-
|
|
210
|
+
// We use a multi-stage adaptation rate:
|
|
211
|
+
// 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
|
|
212
|
+
// 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
|
|
213
|
+
// 3. Otherwise, adapt at the normal loud rate
|
|
214
|
+
const snr = instantRms / (this.noiseFloor + 1e-6);
|
|
215
|
+
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
216
|
+
|
|
217
|
+
let multiplier = 1.0;
|
|
218
|
+
if (this.isSpeaking) {
|
|
219
|
+
multiplier = 0.01;
|
|
220
|
+
} else if (snrDb > 20) {
|
|
221
|
+
multiplier = 0.1;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
213
225
|
this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
|
|
214
226
|
}
|
|
215
227
|
|
|
@@ -348,11 +360,11 @@ var VADStateMachine = class {
|
|
|
348
360
|
enabled: config?.enabled ?? true,
|
|
349
361
|
pluginName: config?.pluginName ?? "energy-vad",
|
|
350
362
|
// Voice-optimized defaults
|
|
351
|
-
startThreshold: config?.startThreshold ?? 0.
|
|
363
|
+
startThreshold: config?.startThreshold ?? 0.8,
|
|
352
364
|
// Higher threshold to avoid noise
|
|
353
|
-
stopThreshold: config?.stopThreshold ?? 0.
|
|
365
|
+
stopThreshold: config?.stopThreshold ?? 0.3,
|
|
354
366
|
// Balanced for voice
|
|
355
|
-
hangoverMs: config?.hangoverMs ??
|
|
367
|
+
hangoverMs: config?.hangoverMs ?? 300,
|
|
356
368
|
// Smooth for natural speech
|
|
357
369
|
preRollMs: config?.preRollMs ?? 250,
|
|
358
370
|
// Generous pre-roll
|
|
@@ -362,9 +374,9 @@ var VADStateMachine = class {
|
|
|
362
374
|
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
363
375
|
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
364
376
|
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
|
|
365
|
-
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ??
|
|
366
|
-
minSNR: config?.energyVad?.minSNR ??
|
|
367
|
-
snrRange: config?.energyVad?.snrRange ??
|
|
377
|
+
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
|
|
378
|
+
minSNR: config?.energyVad?.minSNR ?? 10,
|
|
379
|
+
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
368
380
|
minEnergy: config?.energyVad?.minEnergy ?? 5e-4
|
|
369
381
|
}
|
|
370
382
|
};
|
package/dist/index.mjs
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import "./chunk-WBQAMGXK.mjs";
|
|
2
2
|
import {
|
|
3
3
|
attachProcessingToTrack
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-Y6IG7XGC.mjs";
|
|
5
5
|
import {
|
|
6
6
|
createAudioPipeline
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-2UPI6VWY.mjs";
|
|
8
8
|
import {
|
|
9
9
|
VADStateMachine
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-XHMNP7NC.mjs";
|
|
11
11
|
import {
|
|
12
12
|
closeAudioContext,
|
|
13
13
|
getAudioContext,
|
|
@@ -21,13 +21,13 @@ import {
|
|
|
21
21
|
getVADPlugin,
|
|
22
22
|
registerNoiseSuppressionPlugin,
|
|
23
23
|
registerVADPlugin
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-FOGC2MFA.mjs";
|
|
25
25
|
import {
|
|
26
26
|
RNNoisePlugin
|
|
27
27
|
} from "./chunk-XO6B3D4A.mjs";
|
|
28
28
|
import {
|
|
29
29
|
EnergyVADPlugin
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-3A2CTC4K.mjs";
|
|
31
31
|
export {
|
|
32
32
|
EnergyVADPlugin,
|
|
33
33
|
RNNoisePlugin,
|
|
@@ -128,9 +128,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
128
128
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
129
129
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
130
130
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
|
|
131
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ??
|
|
132
|
-
const minSNR = energyParams.minSNR ??
|
|
133
|
-
const snrRange = energyParams.snrRange ??
|
|
131
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
|
|
132
|
+
const minSNR = energyParams.minSNR ?? 10;
|
|
133
|
+
const snrRange = energyParams.snrRange ?? 10;
|
|
134
134
|
const minEnergy = energyParams.minEnergy ?? 5e-4;
|
|
135
135
|
return `
|
|
136
136
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
@@ -176,9 +176,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
176
176
|
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
|
|
177
177
|
} else {
|
|
178
178
|
// If signal is louder, adapt upwards
|
|
179
|
-
//
|
|
180
|
-
// If we are
|
|
181
|
-
|
|
179
|
+
// We use a multi-stage adaptation rate:
|
|
180
|
+
// 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
|
|
181
|
+
// 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
|
|
182
|
+
// 3. Otherwise, adapt at the normal loud rate
|
|
183
|
+
const snr = instantRms / (this.noiseFloor + 1e-6);
|
|
184
|
+
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
185
|
+
|
|
186
|
+
let multiplier = 1.0;
|
|
187
|
+
if (this.isSpeaking) {
|
|
188
|
+
multiplier = 0.01;
|
|
189
|
+
} else if (snrDb > 20) {
|
|
190
|
+
multiplier = 0.1;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
182
194
|
this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
|
|
183
195
|
}
|
|
184
196
|
|
|
@@ -311,11 +323,11 @@ var VADStateMachine = class {
|
|
|
311
323
|
enabled: config?.enabled ?? true,
|
|
312
324
|
pluginName: config?.pluginName ?? "energy-vad",
|
|
313
325
|
// Voice-optimized defaults
|
|
314
|
-
startThreshold: config?.startThreshold ?? 0.
|
|
326
|
+
startThreshold: config?.startThreshold ?? 0.8,
|
|
315
327
|
// Higher threshold to avoid noise
|
|
316
|
-
stopThreshold: config?.stopThreshold ?? 0.
|
|
328
|
+
stopThreshold: config?.stopThreshold ?? 0.3,
|
|
317
329
|
// Balanced for voice
|
|
318
|
-
hangoverMs: config?.hangoverMs ??
|
|
330
|
+
hangoverMs: config?.hangoverMs ?? 300,
|
|
319
331
|
// Smooth for natural speech
|
|
320
332
|
preRollMs: config?.preRollMs ?? 250,
|
|
321
333
|
// Generous pre-roll
|
|
@@ -325,9 +337,9 @@ var VADStateMachine = class {
|
|
|
325
337
|
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
326
338
|
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
327
339
|
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
|
|
328
|
-
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ??
|
|
329
|
-
minSNR: config?.energyVad?.minSNR ??
|
|
330
|
-
snrRange: config?.energyVad?.snrRange ??
|
|
340
|
+
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
|
|
341
|
+
minSNR: config?.energyVad?.minSNR ?? 10,
|
|
342
|
+
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
331
343
|
minEnergy: config?.energyVad?.minEnergy ?? 5e-4
|
|
332
344
|
}
|
|
333
345
|
};
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import {
|
|
2
2
|
attachProcessingToTrack
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
5
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-Y6IG7XGC.mjs";
|
|
4
|
+
import "../chunk-2UPI6VWY.mjs";
|
|
5
|
+
import "../chunk-XHMNP7NC.mjs";
|
|
6
6
|
import "../chunk-OZ7KMC4S.mjs";
|
|
7
|
-
import "../chunk-
|
|
7
|
+
import "../chunk-FOGC2MFA.mjs";
|
|
8
8
|
import "../chunk-XO6B3D4A.mjs";
|
|
9
|
-
import "../chunk-
|
|
9
|
+
import "../chunk-3A2CTC4K.mjs";
|
|
10
10
|
export {
|
|
11
11
|
attachProcessingToTrack
|
|
12
12
|
};
|
|
@@ -126,9 +126,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
126
126
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
127
127
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
128
128
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
|
|
129
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ??
|
|
130
|
-
const minSNR = energyParams.minSNR ??
|
|
131
|
-
const snrRange = energyParams.snrRange ??
|
|
129
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
|
|
130
|
+
const minSNR = energyParams.minSNR ?? 10;
|
|
131
|
+
const snrRange = energyParams.snrRange ?? 10;
|
|
132
132
|
const minEnergy = energyParams.minEnergy ?? 5e-4;
|
|
133
133
|
return `
|
|
134
134
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
@@ -174,9 +174,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
174
174
|
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
|
|
175
175
|
} else {
|
|
176
176
|
// If signal is louder, adapt upwards
|
|
177
|
-
//
|
|
178
|
-
// If we are
|
|
179
|
-
|
|
177
|
+
// We use a multi-stage adaptation rate:
|
|
178
|
+
// 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
|
|
179
|
+
// 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
|
|
180
|
+
// 3. Otherwise, adapt at the normal loud rate
|
|
181
|
+
const snr = instantRms / (this.noiseFloor + 1e-6);
|
|
182
|
+
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
183
|
+
|
|
184
|
+
let multiplier = 1.0;
|
|
185
|
+
if (this.isSpeaking) {
|
|
186
|
+
multiplier = 0.01;
|
|
187
|
+
} else if (snrDb > 20) {
|
|
188
|
+
multiplier = 0.1;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
180
192
|
this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
|
|
181
193
|
}
|
|
182
194
|
|
|
@@ -309,11 +321,11 @@ var VADStateMachine = class {
|
|
|
309
321
|
enabled: config?.enabled ?? true,
|
|
310
322
|
pluginName: config?.pluginName ?? "energy-vad",
|
|
311
323
|
// Voice-optimized defaults
|
|
312
|
-
startThreshold: config?.startThreshold ?? 0.
|
|
324
|
+
startThreshold: config?.startThreshold ?? 0.8,
|
|
313
325
|
// Higher threshold to avoid noise
|
|
314
|
-
stopThreshold: config?.stopThreshold ?? 0.
|
|
326
|
+
stopThreshold: config?.stopThreshold ?? 0.3,
|
|
315
327
|
// Balanced for voice
|
|
316
|
-
hangoverMs: config?.hangoverMs ??
|
|
328
|
+
hangoverMs: config?.hangoverMs ?? 300,
|
|
317
329
|
// Smooth for natural speech
|
|
318
330
|
preRollMs: config?.preRollMs ?? 250,
|
|
319
331
|
// Generous pre-roll
|
|
@@ -323,9 +335,9 @@ var VADStateMachine = class {
|
|
|
323
335
|
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
324
336
|
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
325
337
|
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
|
|
326
|
-
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ??
|
|
327
|
-
minSNR: config?.energyVad?.minSNR ??
|
|
328
|
-
snrRange: config?.energyVad?.snrRange ??
|
|
338
|
+
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
|
|
339
|
+
minSNR: config?.energyVad?.minSNR ?? 10,
|
|
340
|
+
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
329
341
|
minEnergy: config?.energyVad?.minEnergy ?? 5e-4
|
|
330
342
|
}
|
|
331
343
|
};
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createAudioPipeline
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-2UPI6VWY.mjs";
|
|
4
|
+
import "../chunk-XHMNP7NC.mjs";
|
|
5
5
|
import "../chunk-OZ7KMC4S.mjs";
|
|
6
|
-
import "../chunk-
|
|
6
|
+
import "../chunk-FOGC2MFA.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-3A2CTC4K.mjs";
|
|
9
9
|
export {
|
|
10
10
|
createAudioPipeline
|
|
11
11
|
};
|
package/dist/types.d.mts
CHANGED
|
@@ -43,7 +43,7 @@ interface AudioProcessingConfig {
|
|
|
43
43
|
* When VAD probability rises above this, audio is unmuted.
|
|
44
44
|
* Lower = more sensitive (catches quiet speech, may include noise)
|
|
45
45
|
* Higher = less sensitive (only confident speech, may clip quiet parts)
|
|
46
|
-
* Default: 0.
|
|
46
|
+
* Default: 0.8 (aggressive noise rejection)
|
|
47
47
|
*/
|
|
48
48
|
startThreshold?: number;
|
|
49
49
|
/**
|
|
@@ -51,7 +51,7 @@ interface AudioProcessingConfig {
|
|
|
51
51
|
* When VAD probability drops below this (after hangover), audio is muted.
|
|
52
52
|
* Lower = keeps audio on longer (less aggressive gating)
|
|
53
53
|
* Higher = mutes faster (more aggressive noise suppression)
|
|
54
|
-
* Default: 0.
|
|
54
|
+
* Default: 0.3 (wide hysteresis for stability)
|
|
55
55
|
*/
|
|
56
56
|
stopThreshold?: number;
|
|
57
57
|
/**
|
|
@@ -59,7 +59,7 @@ interface AudioProcessingConfig {
|
|
|
59
59
|
* Prevents rapid on/off toggling during pauses.
|
|
60
60
|
* Lower = more aggressive gating, may clip between words
|
|
61
61
|
* Higher = smoother but may let trailing noise through
|
|
62
|
-
* Default:
|
|
62
|
+
* Default: 300ms
|
|
63
63
|
*/
|
|
64
64
|
hangoverMs?: number;
|
|
65
65
|
/**
|
|
@@ -102,17 +102,17 @@ interface AudioProcessingConfig {
|
|
|
102
102
|
noiseFloorAdaptRateQuiet?: number;
|
|
103
103
|
/**
|
|
104
104
|
* Rate at which noise floor adapts to loud signals (0-1).
|
|
105
|
-
* Default: 0.
|
|
105
|
+
* Default: 0.01 (faster tracking of rising noise)
|
|
106
106
|
*/
|
|
107
107
|
noiseFloorAdaptRateLoud?: number;
|
|
108
108
|
/**
|
|
109
109
|
* Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
|
|
110
|
-
* Default:
|
|
110
|
+
* Default: 10.0 (more aggressive noise rejection)
|
|
111
111
|
*/
|
|
112
112
|
minSNR?: number;
|
|
113
113
|
/**
|
|
114
114
|
* SNR range in dB for probability scaling.
|
|
115
|
-
* Default:
|
|
115
|
+
* Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
|
|
116
116
|
*/
|
|
117
117
|
snrRange?: number;
|
|
118
118
|
/**
|
package/dist/types.d.ts
CHANGED
|
@@ -43,7 +43,7 @@ interface AudioProcessingConfig {
|
|
|
43
43
|
* When VAD probability rises above this, audio is unmuted.
|
|
44
44
|
* Lower = more sensitive (catches quiet speech, may include noise)
|
|
45
45
|
* Higher = less sensitive (only confident speech, may clip quiet parts)
|
|
46
|
-
* Default: 0.
|
|
46
|
+
* Default: 0.8 (aggressive noise rejection)
|
|
47
47
|
*/
|
|
48
48
|
startThreshold?: number;
|
|
49
49
|
/**
|
|
@@ -51,7 +51,7 @@ interface AudioProcessingConfig {
|
|
|
51
51
|
* When VAD probability drops below this (after hangover), audio is muted.
|
|
52
52
|
* Lower = keeps audio on longer (less aggressive gating)
|
|
53
53
|
* Higher = mutes faster (more aggressive noise suppression)
|
|
54
|
-
* Default: 0.
|
|
54
|
+
* Default: 0.3 (wide hysteresis for stability)
|
|
55
55
|
*/
|
|
56
56
|
stopThreshold?: number;
|
|
57
57
|
/**
|
|
@@ -59,7 +59,7 @@ interface AudioProcessingConfig {
|
|
|
59
59
|
* Prevents rapid on/off toggling during pauses.
|
|
60
60
|
* Lower = more aggressive gating, may clip between words
|
|
61
61
|
* Higher = smoother but may let trailing noise through
|
|
62
|
-
* Default:
|
|
62
|
+
* Default: 300ms
|
|
63
63
|
*/
|
|
64
64
|
hangoverMs?: number;
|
|
65
65
|
/**
|
|
@@ -102,17 +102,17 @@ interface AudioProcessingConfig {
|
|
|
102
102
|
noiseFloorAdaptRateQuiet?: number;
|
|
103
103
|
/**
|
|
104
104
|
* Rate at which noise floor adapts to loud signals (0-1).
|
|
105
|
-
* Default: 0.
|
|
105
|
+
* Default: 0.01 (faster tracking of rising noise)
|
|
106
106
|
*/
|
|
107
107
|
noiseFloorAdaptRateLoud?: number;
|
|
108
108
|
/**
|
|
109
109
|
* Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
|
|
110
|
-
* Default:
|
|
110
|
+
* Default: 10.0 (more aggressive noise rejection)
|
|
111
111
|
*/
|
|
112
112
|
minSNR?: number;
|
|
113
113
|
/**
|
|
114
114
|
* SNR range in dB for probability scaling.
|
|
115
|
-
* Default:
|
|
115
|
+
* Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
|
|
116
116
|
*/
|
|
117
117
|
snrRange?: number;
|
|
118
118
|
/**
|
package/dist/vad/vad-node.js
CHANGED
|
@@ -28,9 +28,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
28
28
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
29
29
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
30
30
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.05;
|
|
31
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ??
|
|
32
|
-
const minSNR = energyParams.minSNR ??
|
|
33
|
-
const snrRange = energyParams.snrRange ??
|
|
31
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.01;
|
|
32
|
+
const minSNR = energyParams.minSNR ?? 10;
|
|
33
|
+
const snrRange = energyParams.snrRange ?? 10;
|
|
34
34
|
const minEnergy = energyParams.minEnergy ?? 5e-4;
|
|
35
35
|
return `
|
|
36
36
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
@@ -76,9 +76,21 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
76
76
|
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + instantRms * this.noiseFloorAdaptRateQuiet;
|
|
77
77
|
} else {
|
|
78
78
|
// If signal is louder, adapt upwards
|
|
79
|
-
//
|
|
80
|
-
// If we are
|
|
81
|
-
|
|
79
|
+
// We use a multi-stage adaptation rate:
|
|
80
|
+
// 1. If we are officially speaking, adapt EXTREMELY slowly (0.01x)
|
|
81
|
+
// 2. If SNR is very high (> 20dB), assume it's speech and adapt very slowly (0.1x)
|
|
82
|
+
// 3. Otherwise, adapt at the normal loud rate
|
|
83
|
+
const snr = instantRms / (this.noiseFloor + 1e-6);
|
|
84
|
+
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
85
|
+
|
|
86
|
+
let multiplier = 1.0;
|
|
87
|
+
if (this.isSpeaking) {
|
|
88
|
+
multiplier = 0.01;
|
|
89
|
+
} else if (snrDb > 20) {
|
|
90
|
+
multiplier = 0.1;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const adaptRate = this.noiseFloorAdaptRateLoud * multiplier;
|
|
82
94
|
this.noiseFloor = this.noiseFloor * (1 - adaptRate) + instantRms * adaptRate;
|
|
83
95
|
}
|
|
84
96
|
|
package/dist/vad/vad-node.mjs
CHANGED
package/dist/vad/vad-state.js
CHANGED
|
@@ -36,11 +36,11 @@ var VADStateMachine = class {
|
|
|
36
36
|
enabled: config?.enabled ?? true,
|
|
37
37
|
pluginName: config?.pluginName ?? "energy-vad",
|
|
38
38
|
// Voice-optimized defaults
|
|
39
|
-
startThreshold: config?.startThreshold ?? 0.
|
|
39
|
+
startThreshold: config?.startThreshold ?? 0.8,
|
|
40
40
|
// Higher threshold to avoid noise
|
|
41
|
-
stopThreshold: config?.stopThreshold ?? 0.
|
|
41
|
+
stopThreshold: config?.stopThreshold ?? 0.3,
|
|
42
42
|
// Balanced for voice
|
|
43
|
-
hangoverMs: config?.hangoverMs ??
|
|
43
|
+
hangoverMs: config?.hangoverMs ?? 300,
|
|
44
44
|
// Smooth for natural speech
|
|
45
45
|
preRollMs: config?.preRollMs ?? 250,
|
|
46
46
|
// Generous pre-roll
|
|
@@ -50,9 +50,9 @@ var VADStateMachine = class {
|
|
|
50
50
|
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
51
51
|
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
52
52
|
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.05,
|
|
53
|
-
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ??
|
|
54
|
-
minSNR: config?.energyVad?.minSNR ??
|
|
55
|
-
snrRange: config?.energyVad?.snrRange ??
|
|
53
|
+
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.01,
|
|
54
|
+
minSNR: config?.energyVad?.minSNR ?? 10,
|
|
55
|
+
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
56
56
|
minEnergy: config?.energyVad?.minEnergy ?? 5e-4
|
|
57
57
|
}
|
|
58
58
|
};
|
package/dist/vad/vad-state.mjs
CHANGED