@tensamin/audio 0.1.14 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -6
- package/dist/{chunk-2G2JFHJY.mjs → chunk-GLKAWCEW.mjs} +11 -33
- package/dist/{chunk-K4YLH73B.mjs → chunk-KLBA2CPE.mjs} +3 -5
- package/dist/{chunk-6F2HZUYO.mjs → chunk-QQFKHTCQ.mjs} +1 -1
- package/dist/{chunk-UFKIAMG3.mjs → chunk-U26F3GJN.mjs} +1 -1
- package/dist/{chunk-R5M2DGAQ.mjs → chunk-WQVMSR7V.mjs} +5 -6
- package/dist/extensibility/plugins.js +11 -33
- package/dist/extensibility/plugins.mjs +2 -2
- package/dist/index.js +17 -42
- package/dist/index.mjs +5 -5
- package/dist/livekit/integration.js +17 -42
- package/dist/livekit/integration.mjs +5 -5
- package/dist/pipeline/audio-pipeline.js +17 -42
- package/dist/pipeline/audio-pipeline.mjs +4 -4
- package/dist/types.d.mts +4 -15
- package/dist/types.d.ts +4 -15
- package/dist/vad/vad-node.js +11 -33
- package/dist/vad/vad-node.mjs +1 -1
- package/dist/vad/vad-state.js +3 -5
- package/dist/vad/vad-state.mjs +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -101,11 +101,9 @@ vad: {
|
|
|
101
101
|
energyVad?: {
|
|
102
102
|
smoothing: number; // Default: 0.95
|
|
103
103
|
initialNoiseFloor: number; // Default: 0.001
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
snrRange: number; // Default: 10.0 (dB)
|
|
108
|
-
minEnergy: number; // Default: 0.003
|
|
104
|
+
minSNR: number; // Default: 8.0 (dB)
|
|
105
|
+
snrRange: number; // Default: 12.0 (dB)
|
|
106
|
+
minEnergy: number; // Default: 0.01
|
|
109
107
|
};
|
|
110
108
|
}
|
|
111
109
|
```
|
|
@@ -124,7 +122,7 @@ vad: {
|
|
|
124
122
|
- `smoothing`: Energy calculation smoothing factor (0-1)
|
|
125
123
|
- `minSNR`: Minimum signal-to-noise ratio in dB for speech detection
|
|
126
124
|
- `snrRange`: Range in dB for probability scaling from minSNR
|
|
127
|
-
- `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.
|
|
125
|
+
- `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.01, ~-40dB)
|
|
128
126
|
|
|
129
127
|
### Output Control
|
|
130
128
|
|
|
@@ -3,11 +3,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
3
3
|
const energyParams = vadConfig?.energyVad || {};
|
|
4
4
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
5
5
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
6
|
-
const
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
const snrRange = energyParams.snrRange ?? 10;
|
|
10
|
-
const minEnergy = energyParams.minEnergy ?? 3e-3;
|
|
6
|
+
const minSNR = energyParams.minSNR ?? 8;
|
|
7
|
+
const snrRange = energyParams.snrRange ?? 12;
|
|
8
|
+
const minEnergy = energyParams.minEnergy ?? 0.01;
|
|
11
9
|
return `
|
|
12
10
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
13
11
|
constructor() {
|
|
@@ -15,8 +13,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
15
13
|
this.smoothing = ${smoothing};
|
|
16
14
|
this.energy = 0;
|
|
17
15
|
this.noiseFloor = ${initialNoiseFloor};
|
|
18
|
-
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
19
|
-
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
20
16
|
this.minSNR = ${minSNR};
|
|
21
17
|
this.snrRange = ${snrRange};
|
|
22
18
|
this.minEnergy = ${minEnergy};
|
|
@@ -54,33 +50,15 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
54
50
|
const crestFactor = peak / (instantRms + 1e-10);
|
|
55
51
|
const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
|
|
56
52
|
|
|
57
|
-
//
|
|
58
|
-
//
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
|
|
65
|
-
const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
|
|
66
|
-
|
|
67
|
-
// Only adapt upwards if:
|
|
68
|
-
// 1. SNR is low (< 10dB) - likely just background noise
|
|
69
|
-
// 2. AND crest factor is low (< 15dB) - not a sharp transient
|
|
70
|
-
if (smoothedSnrDb < 10 && crestFactorDb < 15) {
|
|
71
|
-
// This is persistent background noise, adapt upwards
|
|
72
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
|
|
73
|
-
} else {
|
|
74
|
-
// Either high SNR (speech) or high crest factor (click) - adapt very slowly
|
|
75
|
-
const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
|
|
76
|
-
this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
// Ensure noise floor doesn't drop to absolute zero
|
|
81
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
|
|
53
|
+
// FIXED noise floor with minimal adaptation
|
|
54
|
+
// Only adapt within strict bounds to prevent drift
|
|
55
|
+
const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
|
|
56
|
+
this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
|
|
57
|
+
|
|
58
|
+
// Hard clamp to prevent any drift outside acceptable range
|
|
59
|
+
this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
|
|
82
60
|
|
|
83
|
-
//
|
|
61
|
+
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
84
62
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
85
63
|
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
86
64
|
|
|
@@ -26,11 +26,9 @@ var VADStateMachine = class {
|
|
|
26
26
|
energyVad: {
|
|
27
27
|
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
28
28
|
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
33
|
-
minEnergy: config?.energyVad?.minEnergy ?? 3e-3
|
|
29
|
+
minSNR: config?.energyVad?.minSNR ?? 8,
|
|
30
|
+
snrRange: config?.energyVad?.snrRange ?? 12,
|
|
31
|
+
minEnergy: config?.energyVad?.minEnergy ?? 0.01
|
|
34
32
|
}
|
|
35
33
|
};
|
|
36
34
|
this.lastSilenceTime = Date.now();
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
VADStateMachine
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-KLBA2CPE.mjs";
|
|
4
4
|
import {
|
|
5
5
|
getAudioContext,
|
|
6
6
|
registerPipeline,
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
import {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-U26F3GJN.mjs";
|
|
13
13
|
|
|
14
14
|
// src/pipeline/audio-pipeline.ts
|
|
15
15
|
import mitt from "mitt";
|
|
@@ -37,10 +37,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
37
37
|
energyVad: {
|
|
38
38
|
smoothing: 0.95,
|
|
39
39
|
initialNoiseFloor: 1e-3,
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
snrRange: 8
|
|
40
|
+
minSNR: 8,
|
|
41
|
+
snrRange: 12,
|
|
42
|
+
minEnergy: 0.01
|
|
44
43
|
},
|
|
45
44
|
...config.vad
|
|
46
45
|
},
|
|
@@ -106,11 +106,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
106
106
|
const energyParams = vadConfig?.energyVad || {};
|
|
107
107
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
108
108
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
109
|
-
const
|
|
110
|
-
const
|
|
111
|
-
const
|
|
112
|
-
const snrRange = energyParams.snrRange ?? 10;
|
|
113
|
-
const minEnergy = energyParams.minEnergy ?? 3e-3;
|
|
109
|
+
const minSNR = energyParams.minSNR ?? 8;
|
|
110
|
+
const snrRange = energyParams.snrRange ?? 12;
|
|
111
|
+
const minEnergy = energyParams.minEnergy ?? 0.01;
|
|
114
112
|
return `
|
|
115
113
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
116
114
|
constructor() {
|
|
@@ -118,8 +116,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
118
116
|
this.smoothing = ${smoothing};
|
|
119
117
|
this.energy = 0;
|
|
120
118
|
this.noiseFloor = ${initialNoiseFloor};
|
|
121
|
-
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
122
|
-
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
123
119
|
this.minSNR = ${minSNR};
|
|
124
120
|
this.snrRange = ${snrRange};
|
|
125
121
|
this.minEnergy = ${minEnergy};
|
|
@@ -157,33 +153,15 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
157
153
|
const crestFactor = peak / (instantRms + 1e-10);
|
|
158
154
|
const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
|
|
159
155
|
|
|
160
|
-
//
|
|
161
|
-
//
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
|
|
168
|
-
const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
|
|
169
|
-
|
|
170
|
-
// Only adapt upwards if:
|
|
171
|
-
// 1. SNR is low (< 10dB) - likely just background noise
|
|
172
|
-
// 2. AND crest factor is low (< 15dB) - not a sharp transient
|
|
173
|
-
if (smoothedSnrDb < 10 && crestFactorDb < 15) {
|
|
174
|
-
// This is persistent background noise, adapt upwards
|
|
175
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
|
|
176
|
-
} else {
|
|
177
|
-
// Either high SNR (speech) or high crest factor (click) - adapt very slowly
|
|
178
|
-
const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
|
|
179
|
-
this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
// Ensure noise floor doesn't drop to absolute zero
|
|
184
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
|
|
156
|
+
// FIXED noise floor with minimal adaptation
|
|
157
|
+
// Only adapt within strict bounds to prevent drift
|
|
158
|
+
const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
|
|
159
|
+
this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
|
|
160
|
+
|
|
161
|
+
// Hard clamp to prevent any drift outside acceptable range
|
|
162
|
+
this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
|
|
185
163
|
|
|
186
|
-
//
|
|
164
|
+
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
187
165
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
188
166
|
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
189
167
|
|
|
@@ -3,9 +3,9 @@ import {
|
|
|
3
3
|
getVADPlugin,
|
|
4
4
|
registerNoiseSuppressionPlugin,
|
|
5
5
|
registerVADPlugin
|
|
6
|
-
} from "../chunk-
|
|
6
|
+
} from "../chunk-U26F3GJN.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-GLKAWCEW.mjs";
|
|
9
9
|
export {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin,
|
package/dist/index.js
CHANGED
|
@@ -158,11 +158,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
158
158
|
const energyParams = vadConfig?.energyVad || {};
|
|
159
159
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
160
160
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
161
|
-
const
|
|
162
|
-
const
|
|
163
|
-
const
|
|
164
|
-
const snrRange = energyParams.snrRange ?? 10;
|
|
165
|
-
const minEnergy = energyParams.minEnergy ?? 3e-3;
|
|
161
|
+
const minSNR = energyParams.minSNR ?? 8;
|
|
162
|
+
const snrRange = energyParams.snrRange ?? 12;
|
|
163
|
+
const minEnergy = energyParams.minEnergy ?? 0.01;
|
|
166
164
|
return `
|
|
167
165
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
168
166
|
constructor() {
|
|
@@ -170,8 +168,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
170
168
|
this.smoothing = ${smoothing};
|
|
171
169
|
this.energy = 0;
|
|
172
170
|
this.noiseFloor = ${initialNoiseFloor};
|
|
173
|
-
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
174
|
-
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
175
171
|
this.minSNR = ${minSNR};
|
|
176
172
|
this.snrRange = ${snrRange};
|
|
177
173
|
this.minEnergy = ${minEnergy};
|
|
@@ -209,33 +205,15 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
209
205
|
const crestFactor = peak / (instantRms + 1e-10);
|
|
210
206
|
const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
|
|
211
207
|
|
|
212
|
-
//
|
|
213
|
-
//
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
|
|
220
|
-
const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
|
|
221
|
-
|
|
222
|
-
// Only adapt upwards if:
|
|
223
|
-
// 1. SNR is low (< 10dB) - likely just background noise
|
|
224
|
-
// 2. AND crest factor is low (< 15dB) - not a sharp transient
|
|
225
|
-
if (smoothedSnrDb < 10 && crestFactorDb < 15) {
|
|
226
|
-
// This is persistent background noise, adapt upwards
|
|
227
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
|
|
228
|
-
} else {
|
|
229
|
-
// Either high SNR (speech) or high crest factor (click) - adapt very slowly
|
|
230
|
-
const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
|
|
231
|
-
this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
|
|
232
|
-
}
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
// Ensure noise floor doesn't drop to absolute zero
|
|
236
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
|
|
208
|
+
// FIXED noise floor with minimal adaptation
|
|
209
|
+
// Only adapt within strict bounds to prevent drift
|
|
210
|
+
const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
|
|
211
|
+
this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
|
|
212
|
+
|
|
213
|
+
// Hard clamp to prevent any drift outside acceptable range
|
|
214
|
+
this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
|
|
237
215
|
|
|
238
|
-
//
|
|
216
|
+
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
239
217
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
240
218
|
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
241
219
|
|
|
@@ -392,11 +370,9 @@ var VADStateMachine = class {
|
|
|
392
370
|
energyVad: {
|
|
393
371
|
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
394
372
|
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
399
|
-
minEnergy: config?.energyVad?.minEnergy ?? 3e-3
|
|
373
|
+
minSNR: config?.energyVad?.minSNR ?? 8,
|
|
374
|
+
snrRange: config?.energyVad?.snrRange ?? 12,
|
|
375
|
+
minEnergy: config?.energyVad?.minEnergy ?? 0.01
|
|
400
376
|
}
|
|
401
377
|
};
|
|
402
378
|
this.lastSilenceTime = Date.now();
|
|
@@ -489,10 +465,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
489
465
|
energyVad: {
|
|
490
466
|
smoothing: 0.95,
|
|
491
467
|
initialNoiseFloor: 1e-3,
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
snrRange: 8
|
|
468
|
+
minSNR: 8,
|
|
469
|
+
snrRange: 12,
|
|
470
|
+
minEnergy: 0.01
|
|
496
471
|
},
|
|
497
472
|
...config.vad
|
|
498
473
|
},
|
package/dist/index.mjs
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import "./chunk-WBQAMGXK.mjs";
|
|
2
2
|
import {
|
|
3
3
|
attachProcessingToTrack
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-QQFKHTCQ.mjs";
|
|
5
5
|
import {
|
|
6
6
|
createAudioPipeline
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-WQVMSR7V.mjs";
|
|
8
8
|
import {
|
|
9
9
|
VADStateMachine
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-KLBA2CPE.mjs";
|
|
11
11
|
import {
|
|
12
12
|
closeAudioContext,
|
|
13
13
|
getAudioContext,
|
|
@@ -21,13 +21,13 @@ import {
|
|
|
21
21
|
getVADPlugin,
|
|
22
22
|
registerNoiseSuppressionPlugin,
|
|
23
23
|
registerVADPlugin
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-U26F3GJN.mjs";
|
|
25
25
|
import {
|
|
26
26
|
RNNoisePlugin
|
|
27
27
|
} from "./chunk-XO6B3D4A.mjs";
|
|
28
28
|
import {
|
|
29
29
|
EnergyVADPlugin
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-GLKAWCEW.mjs";
|
|
31
31
|
export {
|
|
32
32
|
EnergyVADPlugin,
|
|
33
33
|
RNNoisePlugin,
|
|
@@ -127,11 +127,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
127
127
|
const energyParams = vadConfig?.energyVad || {};
|
|
128
128
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
129
129
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
130
|
-
const
|
|
131
|
-
const
|
|
132
|
-
const
|
|
133
|
-
const snrRange = energyParams.snrRange ?? 10;
|
|
134
|
-
const minEnergy = energyParams.minEnergy ?? 3e-3;
|
|
130
|
+
const minSNR = energyParams.minSNR ?? 8;
|
|
131
|
+
const snrRange = energyParams.snrRange ?? 12;
|
|
132
|
+
const minEnergy = energyParams.minEnergy ?? 0.01;
|
|
135
133
|
return `
|
|
136
134
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
137
135
|
constructor() {
|
|
@@ -139,8 +137,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
139
137
|
this.smoothing = ${smoothing};
|
|
140
138
|
this.energy = 0;
|
|
141
139
|
this.noiseFloor = ${initialNoiseFloor};
|
|
142
|
-
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
143
|
-
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
144
140
|
this.minSNR = ${minSNR};
|
|
145
141
|
this.snrRange = ${snrRange};
|
|
146
142
|
this.minEnergy = ${minEnergy};
|
|
@@ -178,33 +174,15 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
178
174
|
const crestFactor = peak / (instantRms + 1e-10);
|
|
179
175
|
const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
|
|
180
176
|
|
|
181
|
-
//
|
|
182
|
-
//
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
|
|
189
|
-
const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
|
|
190
|
-
|
|
191
|
-
// Only adapt upwards if:
|
|
192
|
-
// 1. SNR is low (< 10dB) - likely just background noise
|
|
193
|
-
// 2. AND crest factor is low (< 15dB) - not a sharp transient
|
|
194
|
-
if (smoothedSnrDb < 10 && crestFactorDb < 15) {
|
|
195
|
-
// This is persistent background noise, adapt upwards
|
|
196
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
|
|
197
|
-
} else {
|
|
198
|
-
// Either high SNR (speech) or high crest factor (click) - adapt very slowly
|
|
199
|
-
const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
|
|
200
|
-
this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
// Ensure noise floor doesn't drop to absolute zero
|
|
205
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
|
|
177
|
+
// FIXED noise floor with minimal adaptation
|
|
178
|
+
// Only adapt within strict bounds to prevent drift
|
|
179
|
+
const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
|
|
180
|
+
this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
|
|
181
|
+
|
|
182
|
+
// Hard clamp to prevent any drift outside acceptable range
|
|
183
|
+
this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
|
|
206
184
|
|
|
207
|
-
//
|
|
185
|
+
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
208
186
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
209
187
|
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
210
188
|
|
|
@@ -355,11 +333,9 @@ var VADStateMachine = class {
|
|
|
355
333
|
energyVad: {
|
|
356
334
|
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
357
335
|
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
362
|
-
minEnergy: config?.energyVad?.minEnergy ?? 3e-3
|
|
336
|
+
minSNR: config?.energyVad?.minSNR ?? 8,
|
|
337
|
+
snrRange: config?.energyVad?.snrRange ?? 12,
|
|
338
|
+
minEnergy: config?.energyVad?.minEnergy ?? 0.01
|
|
363
339
|
}
|
|
364
340
|
};
|
|
365
341
|
this.lastSilenceTime = Date.now();
|
|
@@ -452,10 +428,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
452
428
|
energyVad: {
|
|
453
429
|
smoothing: 0.95,
|
|
454
430
|
initialNoiseFloor: 1e-3,
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
snrRange: 8
|
|
431
|
+
minSNR: 8,
|
|
432
|
+
snrRange: 12,
|
|
433
|
+
minEnergy: 0.01
|
|
459
434
|
},
|
|
460
435
|
...config.vad
|
|
461
436
|
},
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import {
|
|
2
2
|
attachProcessingToTrack
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
5
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-QQFKHTCQ.mjs";
|
|
4
|
+
import "../chunk-WQVMSR7V.mjs";
|
|
5
|
+
import "../chunk-KLBA2CPE.mjs";
|
|
6
6
|
import "../chunk-OZ7KMC4S.mjs";
|
|
7
|
-
import "../chunk-
|
|
7
|
+
import "../chunk-U26F3GJN.mjs";
|
|
8
8
|
import "../chunk-XO6B3D4A.mjs";
|
|
9
|
-
import "../chunk-
|
|
9
|
+
import "../chunk-GLKAWCEW.mjs";
|
|
10
10
|
export {
|
|
11
11
|
attachProcessingToTrack
|
|
12
12
|
};
|
|
@@ -125,11 +125,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
125
125
|
const energyParams = vadConfig?.energyVad || {};
|
|
126
126
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
127
127
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
128
|
-
const
|
|
129
|
-
const
|
|
130
|
-
const
|
|
131
|
-
const snrRange = energyParams.snrRange ?? 10;
|
|
132
|
-
const minEnergy = energyParams.minEnergy ?? 3e-3;
|
|
128
|
+
const minSNR = energyParams.minSNR ?? 8;
|
|
129
|
+
const snrRange = energyParams.snrRange ?? 12;
|
|
130
|
+
const minEnergy = energyParams.minEnergy ?? 0.01;
|
|
133
131
|
return `
|
|
134
132
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
135
133
|
constructor() {
|
|
@@ -137,8 +135,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
137
135
|
this.smoothing = ${smoothing};
|
|
138
136
|
this.energy = 0;
|
|
139
137
|
this.noiseFloor = ${initialNoiseFloor};
|
|
140
|
-
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
141
|
-
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
142
138
|
this.minSNR = ${minSNR};
|
|
143
139
|
this.snrRange = ${snrRange};
|
|
144
140
|
this.minEnergy = ${minEnergy};
|
|
@@ -176,33 +172,15 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
176
172
|
const crestFactor = peak / (instantRms + 1e-10);
|
|
177
173
|
const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
|
|
178
174
|
|
|
179
|
-
//
|
|
180
|
-
//
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
|
|
187
|
-
const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
|
|
188
|
-
|
|
189
|
-
// Only adapt upwards if:
|
|
190
|
-
// 1. SNR is low (< 10dB) - likely just background noise
|
|
191
|
-
// 2. AND crest factor is low (< 15dB) - not a sharp transient
|
|
192
|
-
if (smoothedSnrDb < 10 && crestFactorDb < 15) {
|
|
193
|
-
// This is persistent background noise, adapt upwards
|
|
194
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
|
|
195
|
-
} else {
|
|
196
|
-
// Either high SNR (speech) or high crest factor (click) - adapt very slowly
|
|
197
|
-
const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
|
|
198
|
-
this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
// Ensure noise floor doesn't drop to absolute zero
|
|
203
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
|
|
175
|
+
// FIXED noise floor with minimal adaptation
|
|
176
|
+
// Only adapt within strict bounds to prevent drift
|
|
177
|
+
const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
|
|
178
|
+
this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
|
|
179
|
+
|
|
180
|
+
// Hard clamp to prevent any drift outside acceptable range
|
|
181
|
+
this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
|
|
204
182
|
|
|
205
|
-
//
|
|
183
|
+
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
206
184
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
207
185
|
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
208
186
|
|
|
@@ -353,11 +331,9 @@ var VADStateMachine = class {
|
|
|
353
331
|
energyVad: {
|
|
354
332
|
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
355
333
|
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
360
|
-
minEnergy: config?.energyVad?.minEnergy ?? 3e-3
|
|
334
|
+
minSNR: config?.energyVad?.minSNR ?? 8,
|
|
335
|
+
snrRange: config?.energyVad?.snrRange ?? 12,
|
|
336
|
+
minEnergy: config?.energyVad?.minEnergy ?? 0.01
|
|
361
337
|
}
|
|
362
338
|
};
|
|
363
339
|
this.lastSilenceTime = Date.now();
|
|
@@ -450,10 +426,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
450
426
|
energyVad: {
|
|
451
427
|
smoothing: 0.95,
|
|
452
428
|
initialNoiseFloor: 1e-3,
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
snrRange: 8
|
|
429
|
+
minSNR: 8,
|
|
430
|
+
snrRange: 12,
|
|
431
|
+
minEnergy: 0.01
|
|
457
432
|
},
|
|
458
433
|
...config.vad
|
|
459
434
|
},
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createAudioPipeline
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-WQVMSR7V.mjs";
|
|
4
|
+
import "../chunk-KLBA2CPE.mjs";
|
|
5
5
|
import "../chunk-OZ7KMC4S.mjs";
|
|
6
|
-
import "../chunk-
|
|
6
|
+
import "../chunk-U26F3GJN.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-GLKAWCEW.mjs";
|
|
9
9
|
export {
|
|
10
10
|
createAudioPipeline
|
|
11
11
|
};
|
package/dist/types.d.mts
CHANGED
|
@@ -95,31 +95,20 @@ interface AudioProcessingConfig {
|
|
|
95
95
|
* Default: 0.001
|
|
96
96
|
*/
|
|
97
97
|
initialNoiseFloor?: number;
|
|
98
|
-
/**
|
|
99
|
-
* Rate at which noise floor adapts to quiet signals (0-1).
|
|
100
|
-
* Default: 0.002 (very slow downward drift)
|
|
101
|
-
*/
|
|
102
|
-
noiseFloorAdaptRateQuiet?: number;
|
|
103
|
-
/**
|
|
104
|
-
* Rate at which noise floor adapts to loud signals (0-1).
|
|
105
|
-
* Applied to low-energy, low-crest-factor signals (background noise).
|
|
106
|
-
* Default: 0.02
|
|
107
|
-
*/
|
|
108
|
-
noiseFloorAdaptRateLoud?: number;
|
|
109
98
|
/**
|
|
110
99
|
* Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
|
|
111
|
-
* Default:
|
|
100
|
+
* Default: 8.0
|
|
112
101
|
*/
|
|
113
102
|
minSNR?: number;
|
|
114
103
|
/**
|
|
115
104
|
* SNR range in dB for probability scaling.
|
|
116
|
-
* Default:
|
|
105
|
+
* Default: 12.0 (probability scales from minSNR to minSNR+snrRange)
|
|
117
106
|
*/
|
|
118
107
|
snrRange?: number;
|
|
119
108
|
/**
|
|
120
109
|
* Minimum absolute RMS energy to consider as speech.
|
|
121
|
-
* Prevents triggering on
|
|
122
|
-
* Default: 0.
|
|
110
|
+
* Prevents triggering on quiet background noise.
|
|
111
|
+
* Default: 0.01 (approx -40dB, typical voice level)
|
|
123
112
|
*/
|
|
124
113
|
minEnergy?: number;
|
|
125
114
|
};
|
package/dist/types.d.ts
CHANGED
|
@@ -95,31 +95,20 @@ interface AudioProcessingConfig {
|
|
|
95
95
|
* Default: 0.001
|
|
96
96
|
*/
|
|
97
97
|
initialNoiseFloor?: number;
|
|
98
|
-
/**
|
|
99
|
-
* Rate at which noise floor adapts to quiet signals (0-1).
|
|
100
|
-
* Default: 0.002 (very slow downward drift)
|
|
101
|
-
*/
|
|
102
|
-
noiseFloorAdaptRateQuiet?: number;
|
|
103
|
-
/**
|
|
104
|
-
* Rate at which noise floor adapts to loud signals (0-1).
|
|
105
|
-
* Applied to low-energy, low-crest-factor signals (background noise).
|
|
106
|
-
* Default: 0.02
|
|
107
|
-
*/
|
|
108
|
-
noiseFloorAdaptRateLoud?: number;
|
|
109
98
|
/**
|
|
110
99
|
* Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
|
|
111
|
-
* Default:
|
|
100
|
+
* Default: 8.0
|
|
112
101
|
*/
|
|
113
102
|
minSNR?: number;
|
|
114
103
|
/**
|
|
115
104
|
* SNR range in dB for probability scaling.
|
|
116
|
-
* Default:
|
|
105
|
+
* Default: 12.0 (probability scales from minSNR to minSNR+snrRange)
|
|
117
106
|
*/
|
|
118
107
|
snrRange?: number;
|
|
119
108
|
/**
|
|
120
109
|
* Minimum absolute RMS energy to consider as speech.
|
|
121
|
-
* Prevents triggering on
|
|
122
|
-
* Default: 0.
|
|
110
|
+
* Prevents triggering on quiet background noise.
|
|
111
|
+
* Default: 0.01 (approx -40dB, typical voice level)
|
|
123
112
|
*/
|
|
124
113
|
minEnergy?: number;
|
|
125
114
|
};
|
package/dist/vad/vad-node.js
CHANGED
|
@@ -27,11 +27,9 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
27
27
|
const energyParams = vadConfig?.energyVad || {};
|
|
28
28
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
29
29
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
30
|
-
const
|
|
31
|
-
const
|
|
32
|
-
const
|
|
33
|
-
const snrRange = energyParams.snrRange ?? 10;
|
|
34
|
-
const minEnergy = energyParams.minEnergy ?? 3e-3;
|
|
30
|
+
const minSNR = energyParams.minSNR ?? 8;
|
|
31
|
+
const snrRange = energyParams.snrRange ?? 12;
|
|
32
|
+
const minEnergy = energyParams.minEnergy ?? 0.01;
|
|
35
33
|
return `
|
|
36
34
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
37
35
|
constructor() {
|
|
@@ -39,8 +37,6 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
39
37
|
this.smoothing = ${smoothing};
|
|
40
38
|
this.energy = 0;
|
|
41
39
|
this.noiseFloor = ${initialNoiseFloor};
|
|
42
|
-
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
43
|
-
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
44
40
|
this.minSNR = ${minSNR};
|
|
45
41
|
this.snrRange = ${snrRange};
|
|
46
42
|
this.minEnergy = ${minEnergy};
|
|
@@ -78,33 +74,15 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
78
74
|
const crestFactor = peak / (instantRms + 1e-10);
|
|
79
75
|
const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
|
|
80
76
|
|
|
81
|
-
//
|
|
82
|
-
//
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
|
|
89
|
-
const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
|
|
90
|
-
|
|
91
|
-
// Only adapt upwards if:
|
|
92
|
-
// 1. SNR is low (< 10dB) - likely just background noise
|
|
93
|
-
// 2. AND crest factor is low (< 15dB) - not a sharp transient
|
|
94
|
-
if (smoothedSnrDb < 10 && crestFactorDb < 15) {
|
|
95
|
-
// This is persistent background noise, adapt upwards
|
|
96
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
|
|
97
|
-
} else {
|
|
98
|
-
// Either high SNR (speech) or high crest factor (click) - adapt very slowly
|
|
99
|
-
const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
|
|
100
|
-
this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
// Ensure noise floor doesn't drop to absolute zero
|
|
105
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
|
|
77
|
+
// FIXED noise floor with minimal adaptation
|
|
78
|
+
// Only adapt within strict bounds to prevent drift
|
|
79
|
+
const targetFloor = Math.max(0.0003, Math.min(0.003, instantRms));
|
|
80
|
+
this.noiseFloor = this.noiseFloor * 0.995 + targetFloor * 0.005;
|
|
81
|
+
|
|
82
|
+
// Hard clamp to prevent any drift outside acceptable range
|
|
83
|
+
this.noiseFloor = Math.max(0.0003, Math.min(0.003, this.noiseFloor));
|
|
106
84
|
|
|
107
|
-
//
|
|
85
|
+
// Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
108
86
|
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
109
87
|
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
110
88
|
|
package/dist/vad/vad-node.mjs
CHANGED
package/dist/vad/vad-state.js
CHANGED
|
@@ -50,11 +50,9 @@ var VADStateMachine = class {
|
|
|
50
50
|
energyVad: {
|
|
51
51
|
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
52
52
|
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
57
|
-
minEnergy: config?.energyVad?.minEnergy ?? 3e-3
|
|
53
|
+
minSNR: config?.energyVad?.minSNR ?? 8,
|
|
54
|
+
snrRange: config?.energyVad?.snrRange ?? 12,
|
|
55
|
+
minEnergy: config?.energyVad?.minEnergy ?? 0.01
|
|
58
56
|
}
|
|
59
57
|
};
|
|
60
58
|
this.lastSilenceTime = Date.now();
|
package/dist/vad/vad-state.mjs
CHANGED