@tensamin/audio 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-AHBRT4RD.mjs → chunk-DF4AYGHJ.mjs} +6 -2
- package/dist/{chunk-NMHKX64G.mjs → chunk-TLPO52HV.mjs} +34 -8
- package/dist/{chunk-ERJVV5JR.mjs → chunk-TWQJGBBU.mjs} +1 -1
- package/dist/{chunk-YOSTLLCS.mjs → chunk-ZCC7ID7L.mjs} +1 -1
- package/dist/extensibility/plugins.js +34 -8
- package/dist/extensibility/plugins.mjs +2 -2
- package/dist/index.js +39 -9
- package/dist/index.mjs +4 -4
- package/dist/livekit/integration.js +39 -9
- package/dist/livekit/integration.mjs +4 -4
- package/dist/pipeline/audio-pipeline.js +39 -9
- package/dist/pipeline/audio-pipeline.mjs +3 -3
- package/dist/vad/vad-node.d.mts +2 -0
- package/dist/vad/vad-node.d.ts +2 -0
- package/dist/vad/vad-node.js +34 -8
- package/dist/vad/vad-node.mjs +1 -1
- package/package.json +1 -1
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
import {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-ZCC7ID7L.mjs";
|
|
13
13
|
|
|
14
14
|
// src/pipeline/audio-pipeline.ts
|
|
15
15
|
import mitt from "mitt";
|
|
@@ -93,12 +93,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
93
93
|
throw err;
|
|
94
94
|
}
|
|
95
95
|
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
96
|
+
let vadPlugin;
|
|
96
97
|
try {
|
|
97
|
-
|
|
98
|
+
vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
98
99
|
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
99
100
|
try {
|
|
100
101
|
const timestamp = context.currentTime * 1e3;
|
|
101
102
|
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
103
|
+
if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
|
|
104
|
+
vadPlugin.updateSpeakingState(newState.isSpeaking);
|
|
105
|
+
}
|
|
102
106
|
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
103
107
|
emitter.emit("vadChange", newState);
|
|
104
108
|
lastVadState = newState;
|
|
@@ -4,7 +4,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
4
4
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
5
5
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
6
6
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
7
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-
|
|
7
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
|
|
8
8
|
const minSNR = energyParams.minSNR ?? 2;
|
|
9
9
|
const snrRange = energyParams.snrRange ?? 8;
|
|
10
10
|
return `
|
|
@@ -18,6 +18,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
18
18
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
19
19
|
this.minSNR = ${minSNR};
|
|
20
20
|
this.snrRange = ${snrRange};
|
|
21
|
+
this.isSpeaking = false;
|
|
22
|
+
|
|
23
|
+
this.port.onmessage = (event) => {
|
|
24
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
25
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
26
|
+
}
|
|
27
|
+
};
|
|
21
28
|
}
|
|
22
29
|
|
|
23
30
|
process(inputs, outputs, parameters) {
|
|
@@ -32,14 +39,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
32
39
|
}
|
|
33
40
|
const rms = Math.sqrt(sum / channel.length);
|
|
34
41
|
|
|
35
|
-
// Adaptive noise floor estimation
|
|
36
|
-
//
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
43
|
+
// This prevents the noise floor from rising during speech
|
|
44
|
+
if (!this.isSpeaking) {
|
|
45
|
+
if (rms < this.noiseFloor) {
|
|
46
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
47
|
+
} else {
|
|
48
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
49
|
+
// This could be brief noise we haven't classified as speech yet
|
|
50
|
+
|
|
51
|
+
// SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
|
|
52
|
+
// assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
|
|
53
|
+
// This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
|
|
54
|
+
const instantSnr = rms / (this.noiseFloor + 1e-6);
|
|
55
|
+
|
|
56
|
+
if (instantSnr < 3.0) {
|
|
57
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
42
60
|
}
|
|
61
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
43
62
|
|
|
44
63
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
45
64
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -60,6 +79,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
60
79
|
};
|
|
61
80
|
var EnergyVADPlugin = class {
|
|
62
81
|
name = "energy-vad";
|
|
82
|
+
workletNode = null;
|
|
63
83
|
async createNode(context, config, onDecision) {
|
|
64
84
|
if (!config?.enabled) {
|
|
65
85
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -86,6 +106,7 @@ var EnergyVADPlugin = class {
|
|
|
86
106
|
let node;
|
|
87
107
|
try {
|
|
88
108
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
109
|
+
this.workletNode = node;
|
|
89
110
|
console.log("Energy VAD node created successfully");
|
|
90
111
|
} catch (e) {
|
|
91
112
|
const error = new Error(
|
|
@@ -111,6 +132,11 @@ var EnergyVADPlugin = class {
|
|
|
111
132
|
};
|
|
112
133
|
return node;
|
|
113
134
|
}
|
|
135
|
+
updateSpeakingState(isSpeaking) {
|
|
136
|
+
if (this.workletNode) {
|
|
137
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
138
|
+
}
|
|
139
|
+
}
|
|
114
140
|
};
|
|
115
141
|
|
|
116
142
|
export {
|
|
@@ -107,7 +107,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
107
107
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
108
108
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
109
109
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
110
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-
|
|
110
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
|
|
111
111
|
const minSNR = energyParams.minSNR ?? 2;
|
|
112
112
|
const snrRange = energyParams.snrRange ?? 8;
|
|
113
113
|
return `
|
|
@@ -121,6 +121,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
121
121
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
122
122
|
this.minSNR = ${minSNR};
|
|
123
123
|
this.snrRange = ${snrRange};
|
|
124
|
+
this.isSpeaking = false;
|
|
125
|
+
|
|
126
|
+
this.port.onmessage = (event) => {
|
|
127
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
128
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
129
|
+
}
|
|
130
|
+
};
|
|
124
131
|
}
|
|
125
132
|
|
|
126
133
|
process(inputs, outputs, parameters) {
|
|
@@ -135,14 +142,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
135
142
|
}
|
|
136
143
|
const rms = Math.sqrt(sum / channel.length);
|
|
137
144
|
|
|
138
|
-
// Adaptive noise floor estimation
|
|
139
|
-
//
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
146
|
+
// This prevents the noise floor from rising during speech
|
|
147
|
+
if (!this.isSpeaking) {
|
|
148
|
+
if (rms < this.noiseFloor) {
|
|
149
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
150
|
+
} else {
|
|
151
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
152
|
+
// This could be brief noise we haven't classified as speech yet
|
|
153
|
+
|
|
154
|
+
// SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
|
|
155
|
+
// assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
|
|
156
|
+
// This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
|
|
157
|
+
const instantSnr = rms / (this.noiseFloor + 1e-6);
|
|
158
|
+
|
|
159
|
+
if (instantSnr < 3.0) {
|
|
160
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
145
163
|
}
|
|
164
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
146
165
|
|
|
147
166
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
148
167
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -163,6 +182,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
163
182
|
};
|
|
164
183
|
var EnergyVADPlugin = class {
|
|
165
184
|
name = "energy-vad";
|
|
185
|
+
workletNode = null;
|
|
166
186
|
async createNode(context, config, onDecision) {
|
|
167
187
|
if (!config?.enabled) {
|
|
168
188
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -189,6 +209,7 @@ var EnergyVADPlugin = class {
|
|
|
189
209
|
let node;
|
|
190
210
|
try {
|
|
191
211
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
212
|
+
this.workletNode = node;
|
|
192
213
|
console.log("Energy VAD node created successfully");
|
|
193
214
|
} catch (e) {
|
|
194
215
|
const error = new Error(
|
|
@@ -214,6 +235,11 @@ var EnergyVADPlugin = class {
|
|
|
214
235
|
};
|
|
215
236
|
return node;
|
|
216
237
|
}
|
|
238
|
+
updateSpeakingState(isSpeaking) {
|
|
239
|
+
if (this.workletNode) {
|
|
240
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
241
|
+
}
|
|
242
|
+
}
|
|
217
243
|
};
|
|
218
244
|
|
|
219
245
|
// src/extensibility/plugins.ts
|
|
@@ -3,9 +3,9 @@ import {
|
|
|
3
3
|
getVADPlugin,
|
|
4
4
|
registerNoiseSuppressionPlugin,
|
|
5
5
|
registerVADPlugin
|
|
6
|
-
} from "../chunk-
|
|
6
|
+
} from "../chunk-ZCC7ID7L.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-TLPO52HV.mjs";
|
|
9
9
|
export {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin,
|
package/dist/index.js
CHANGED
|
@@ -159,7 +159,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
159
159
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
160
160
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
161
161
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
162
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-
|
|
162
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
|
|
163
163
|
const minSNR = energyParams.minSNR ?? 2;
|
|
164
164
|
const snrRange = energyParams.snrRange ?? 8;
|
|
165
165
|
return `
|
|
@@ -173,6 +173,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
173
173
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
174
174
|
this.minSNR = ${minSNR};
|
|
175
175
|
this.snrRange = ${snrRange};
|
|
176
|
+
this.isSpeaking = false;
|
|
177
|
+
|
|
178
|
+
this.port.onmessage = (event) => {
|
|
179
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
180
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
181
|
+
}
|
|
182
|
+
};
|
|
176
183
|
}
|
|
177
184
|
|
|
178
185
|
process(inputs, outputs, parameters) {
|
|
@@ -187,14 +194,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
187
194
|
}
|
|
188
195
|
const rms = Math.sqrt(sum / channel.length);
|
|
189
196
|
|
|
190
|
-
// Adaptive noise floor estimation
|
|
191
|
-
//
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
198
|
+
// This prevents the noise floor from rising during speech
|
|
199
|
+
if (!this.isSpeaking) {
|
|
200
|
+
if (rms < this.noiseFloor) {
|
|
201
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
202
|
+
} else {
|
|
203
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
204
|
+
// This could be brief noise we haven't classified as speech yet
|
|
205
|
+
|
|
206
|
+
// SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
|
|
207
|
+
// assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
|
|
208
|
+
// This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
|
|
209
|
+
const instantSnr = rms / (this.noiseFloor + 1e-6);
|
|
210
|
+
|
|
211
|
+
if (instantSnr < 3.0) {
|
|
212
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
197
215
|
}
|
|
216
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
198
217
|
|
|
199
218
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
200
219
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -215,6 +234,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
215
234
|
};
|
|
216
235
|
var EnergyVADPlugin = class {
|
|
217
236
|
name = "energy-vad";
|
|
237
|
+
workletNode = null;
|
|
218
238
|
async createNode(context, config, onDecision) {
|
|
219
239
|
if (!config?.enabled) {
|
|
220
240
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -241,6 +261,7 @@ var EnergyVADPlugin = class {
|
|
|
241
261
|
let node;
|
|
242
262
|
try {
|
|
243
263
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
264
|
+
this.workletNode = node;
|
|
244
265
|
console.log("Energy VAD node created successfully");
|
|
245
266
|
} catch (e) {
|
|
246
267
|
const error = new Error(
|
|
@@ -266,6 +287,11 @@ var EnergyVADPlugin = class {
|
|
|
266
287
|
};
|
|
267
288
|
return node;
|
|
268
289
|
}
|
|
290
|
+
updateSpeakingState(isSpeaking) {
|
|
291
|
+
if (this.workletNode) {
|
|
292
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
293
|
+
}
|
|
294
|
+
}
|
|
269
295
|
};
|
|
270
296
|
|
|
271
297
|
// src/extensibility/plugins.ts
|
|
@@ -473,12 +499,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
473
499
|
throw err;
|
|
474
500
|
}
|
|
475
501
|
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
502
|
+
let vadPlugin;
|
|
476
503
|
try {
|
|
477
|
-
|
|
504
|
+
vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
478
505
|
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
479
506
|
try {
|
|
480
507
|
const timestamp = context.currentTime * 1e3;
|
|
481
508
|
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
509
|
+
if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
|
|
510
|
+
vadPlugin.updateSpeakingState(newState.isSpeaking);
|
|
511
|
+
}
|
|
482
512
|
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
483
513
|
emitter.emit("vadChange", newState);
|
|
484
514
|
lastVadState = newState;
|
package/dist/index.mjs
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import "./chunk-WBQAMGXK.mjs";
|
|
2
2
|
import {
|
|
3
3
|
attachProcessingToTrack
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-TWQJGBBU.mjs";
|
|
5
5
|
import {
|
|
6
6
|
createAudioPipeline
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-DF4AYGHJ.mjs";
|
|
8
8
|
import {
|
|
9
9
|
VADStateMachine
|
|
10
10
|
} from "./chunk-N553RHTI.mjs";
|
|
@@ -21,13 +21,13 @@ import {
|
|
|
21
21
|
getVADPlugin,
|
|
22
22
|
registerNoiseSuppressionPlugin,
|
|
23
23
|
registerVADPlugin
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-ZCC7ID7L.mjs";
|
|
25
25
|
import {
|
|
26
26
|
RNNoisePlugin
|
|
27
27
|
} from "./chunk-XO6B3D4A.mjs";
|
|
28
28
|
import {
|
|
29
29
|
EnergyVADPlugin
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-TLPO52HV.mjs";
|
|
31
31
|
export {
|
|
32
32
|
EnergyVADPlugin,
|
|
33
33
|
RNNoisePlugin,
|
|
@@ -128,7 +128,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
128
128
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
129
129
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
130
130
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
131
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-
|
|
131
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
|
|
132
132
|
const minSNR = energyParams.minSNR ?? 2;
|
|
133
133
|
const snrRange = energyParams.snrRange ?? 8;
|
|
134
134
|
return `
|
|
@@ -142,6 +142,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
142
142
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
143
143
|
this.minSNR = ${minSNR};
|
|
144
144
|
this.snrRange = ${snrRange};
|
|
145
|
+
this.isSpeaking = false;
|
|
146
|
+
|
|
147
|
+
this.port.onmessage = (event) => {
|
|
148
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
149
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
150
|
+
}
|
|
151
|
+
};
|
|
145
152
|
}
|
|
146
153
|
|
|
147
154
|
process(inputs, outputs, parameters) {
|
|
@@ -156,14 +163,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
156
163
|
}
|
|
157
164
|
const rms = Math.sqrt(sum / channel.length);
|
|
158
165
|
|
|
159
|
-
// Adaptive noise floor estimation
|
|
160
|
-
//
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
167
|
+
// This prevents the noise floor from rising during speech
|
|
168
|
+
if (!this.isSpeaking) {
|
|
169
|
+
if (rms < this.noiseFloor) {
|
|
170
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
171
|
+
} else {
|
|
172
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
173
|
+
// This could be brief noise we haven't classified as speech yet
|
|
174
|
+
|
|
175
|
+
// SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
|
|
176
|
+
// assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
|
|
177
|
+
// This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
|
|
178
|
+
const instantSnr = rms / (this.noiseFloor + 1e-6);
|
|
179
|
+
|
|
180
|
+
if (instantSnr < 3.0) {
|
|
181
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
166
184
|
}
|
|
185
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
167
186
|
|
|
168
187
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
169
188
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -184,6 +203,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
184
203
|
};
|
|
185
204
|
var EnergyVADPlugin = class {
|
|
186
205
|
name = "energy-vad";
|
|
206
|
+
workletNode = null;
|
|
187
207
|
async createNode(context, config, onDecision) {
|
|
188
208
|
if (!config?.enabled) {
|
|
189
209
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -210,6 +230,7 @@ var EnergyVADPlugin = class {
|
|
|
210
230
|
let node;
|
|
211
231
|
try {
|
|
212
232
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
233
|
+
this.workletNode = node;
|
|
213
234
|
console.log("Energy VAD node created successfully");
|
|
214
235
|
} catch (e) {
|
|
215
236
|
const error = new Error(
|
|
@@ -235,6 +256,11 @@ var EnergyVADPlugin = class {
|
|
|
235
256
|
};
|
|
236
257
|
return node;
|
|
237
258
|
}
|
|
259
|
+
updateSpeakingState(isSpeaking) {
|
|
260
|
+
if (this.workletNode) {
|
|
261
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
262
|
+
}
|
|
263
|
+
}
|
|
238
264
|
};
|
|
239
265
|
|
|
240
266
|
// src/extensibility/plugins.ts
|
|
@@ -436,12 +462,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
436
462
|
throw err;
|
|
437
463
|
}
|
|
438
464
|
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
465
|
+
let vadPlugin;
|
|
439
466
|
try {
|
|
440
|
-
|
|
467
|
+
vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
441
468
|
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
442
469
|
try {
|
|
443
470
|
const timestamp = context.currentTime * 1e3;
|
|
444
471
|
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
472
|
+
if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
|
|
473
|
+
vadPlugin.updateSpeakingState(newState.isSpeaking);
|
|
474
|
+
}
|
|
445
475
|
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
446
476
|
emitter.emit("vadChange", newState);
|
|
447
477
|
lastVadState = newState;
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import {
|
|
2
2
|
attachProcessingToTrack
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-TWQJGBBU.mjs";
|
|
4
|
+
import "../chunk-DF4AYGHJ.mjs";
|
|
5
5
|
import "../chunk-N553RHTI.mjs";
|
|
6
6
|
import "../chunk-OZ7KMC4S.mjs";
|
|
7
|
-
import "../chunk-
|
|
7
|
+
import "../chunk-ZCC7ID7L.mjs";
|
|
8
8
|
import "../chunk-XO6B3D4A.mjs";
|
|
9
|
-
import "../chunk-
|
|
9
|
+
import "../chunk-TLPO52HV.mjs";
|
|
10
10
|
export {
|
|
11
11
|
attachProcessingToTrack
|
|
12
12
|
};
|
|
@@ -126,7 +126,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
126
126
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
127
127
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
128
128
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
129
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-
|
|
129
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
|
|
130
130
|
const minSNR = energyParams.minSNR ?? 2;
|
|
131
131
|
const snrRange = energyParams.snrRange ?? 8;
|
|
132
132
|
return `
|
|
@@ -140,6 +140,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
140
140
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
141
141
|
this.minSNR = ${minSNR};
|
|
142
142
|
this.snrRange = ${snrRange};
|
|
143
|
+
this.isSpeaking = false;
|
|
144
|
+
|
|
145
|
+
this.port.onmessage = (event) => {
|
|
146
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
147
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
148
|
+
}
|
|
149
|
+
};
|
|
143
150
|
}
|
|
144
151
|
|
|
145
152
|
process(inputs, outputs, parameters) {
|
|
@@ -154,14 +161,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
154
161
|
}
|
|
155
162
|
const rms = Math.sqrt(sum / channel.length);
|
|
156
163
|
|
|
157
|
-
// Adaptive noise floor estimation
|
|
158
|
-
//
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
165
|
+
// This prevents the noise floor from rising during speech
|
|
166
|
+
if (!this.isSpeaking) {
|
|
167
|
+
if (rms < this.noiseFloor) {
|
|
168
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
169
|
+
} else {
|
|
170
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
171
|
+
// This could be brief noise we haven't classified as speech yet
|
|
172
|
+
|
|
173
|
+
// SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
|
|
174
|
+
// assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
|
|
175
|
+
// This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
|
|
176
|
+
const instantSnr = rms / (this.noiseFloor + 1e-6);
|
|
177
|
+
|
|
178
|
+
if (instantSnr < 3.0) {
|
|
179
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
164
182
|
}
|
|
183
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
165
184
|
|
|
166
185
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
167
186
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -182,6 +201,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
182
201
|
};
|
|
183
202
|
var EnergyVADPlugin = class {
|
|
184
203
|
name = "energy-vad";
|
|
204
|
+
workletNode = null;
|
|
185
205
|
async createNode(context, config, onDecision) {
|
|
186
206
|
if (!config?.enabled) {
|
|
187
207
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -208,6 +228,7 @@ var EnergyVADPlugin = class {
|
|
|
208
228
|
let node;
|
|
209
229
|
try {
|
|
210
230
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
231
|
+
this.workletNode = node;
|
|
211
232
|
console.log("Energy VAD node created successfully");
|
|
212
233
|
} catch (e) {
|
|
213
234
|
const error = new Error(
|
|
@@ -233,6 +254,11 @@ var EnergyVADPlugin = class {
|
|
|
233
254
|
};
|
|
234
255
|
return node;
|
|
235
256
|
}
|
|
257
|
+
updateSpeakingState(isSpeaking) {
|
|
258
|
+
if (this.workletNode) {
|
|
259
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
260
|
+
}
|
|
261
|
+
}
|
|
236
262
|
};
|
|
237
263
|
|
|
238
264
|
// src/extensibility/plugins.ts
|
|
@@ -434,12 +460,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
434
460
|
throw err;
|
|
435
461
|
}
|
|
436
462
|
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
463
|
+
let vadPlugin;
|
|
437
464
|
try {
|
|
438
|
-
|
|
465
|
+
vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
439
466
|
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
440
467
|
try {
|
|
441
468
|
const timestamp = context.currentTime * 1e3;
|
|
442
469
|
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
470
|
+
if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
|
|
471
|
+
vadPlugin.updateSpeakingState(newState.isSpeaking);
|
|
472
|
+
}
|
|
443
473
|
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
444
474
|
emitter.emit("vadChange", newState);
|
|
445
475
|
lastVadState = newState;
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createAudioPipeline
|
|
3
|
-
} from "../chunk-
|
|
3
|
+
} from "../chunk-DF4AYGHJ.mjs";
|
|
4
4
|
import "../chunk-N553RHTI.mjs";
|
|
5
5
|
import "../chunk-OZ7KMC4S.mjs";
|
|
6
|
-
import "../chunk-
|
|
6
|
+
import "../chunk-ZCC7ID7L.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-TLPO52HV.mjs";
|
|
9
9
|
export {
|
|
10
10
|
createAudioPipeline
|
|
11
11
|
};
|
package/dist/vad/vad-node.d.mts
CHANGED
|
@@ -3,7 +3,9 @@ import 'mitt';
|
|
|
3
3
|
|
|
4
4
|
declare class EnergyVADPlugin implements VADPlugin {
|
|
5
5
|
name: string;
|
|
6
|
+
private workletNode;
|
|
6
7
|
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
8
|
+
updateSpeakingState(isSpeaking: boolean): void;
|
|
7
9
|
}
|
|
8
10
|
|
|
9
11
|
export { EnergyVADPlugin };
|
package/dist/vad/vad-node.d.ts
CHANGED
|
@@ -3,7 +3,9 @@ import 'mitt';
|
|
|
3
3
|
|
|
4
4
|
declare class EnergyVADPlugin implements VADPlugin {
|
|
5
5
|
name: string;
|
|
6
|
+
private workletNode;
|
|
6
7
|
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
8
|
+
updateSpeakingState(isSpeaking: boolean): void;
|
|
7
9
|
}
|
|
8
10
|
|
|
9
11
|
export { EnergyVADPlugin };
|
package/dist/vad/vad-node.js
CHANGED
|
@@ -28,7 +28,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
|
|
|
28
28
|
const smoothing = energyParams.smoothing ?? 0.95;
|
|
29
29
|
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
30
30
|
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
31
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-
|
|
31
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
|
|
32
32
|
const minSNR = energyParams.minSNR ?? 2;
|
|
33
33
|
const snrRange = energyParams.snrRange ?? 8;
|
|
34
34
|
return `
|
|
@@ -42,6 +42,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
42
42
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
43
43
|
this.minSNR = ${minSNR};
|
|
44
44
|
this.snrRange = ${snrRange};
|
|
45
|
+
this.isSpeaking = false;
|
|
46
|
+
|
|
47
|
+
this.port.onmessage = (event) => {
|
|
48
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
49
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
50
|
+
}
|
|
51
|
+
};
|
|
45
52
|
}
|
|
46
53
|
|
|
47
54
|
process(inputs, outputs, parameters) {
|
|
@@ -56,14 +63,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
56
63
|
}
|
|
57
64
|
const rms = Math.sqrt(sum / channel.length);
|
|
58
65
|
|
|
59
|
-
// Adaptive noise floor estimation
|
|
60
|
-
//
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
67
|
+
// This prevents the noise floor from rising during speech
|
|
68
|
+
if (!this.isSpeaking) {
|
|
69
|
+
if (rms < this.noiseFloor) {
|
|
70
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
71
|
+
} else {
|
|
72
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
73
|
+
// This could be brief noise we haven't classified as speech yet
|
|
74
|
+
|
|
75
|
+
// SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
|
|
76
|
+
// assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
|
|
77
|
+
// This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
|
|
78
|
+
const instantSnr = rms / (this.noiseFloor + 1e-6);
|
|
79
|
+
|
|
80
|
+
if (instantSnr < 3.0) {
|
|
81
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
66
84
|
}
|
|
85
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
67
86
|
|
|
68
87
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
69
88
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -84,6 +103,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
84
103
|
};
|
|
85
104
|
var EnergyVADPlugin = class {
|
|
86
105
|
name = "energy-vad";
|
|
106
|
+
workletNode = null;
|
|
87
107
|
async createNode(context, config, onDecision) {
|
|
88
108
|
if (!config?.enabled) {
|
|
89
109
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -110,6 +130,7 @@ var EnergyVADPlugin = class {
|
|
|
110
130
|
let node;
|
|
111
131
|
try {
|
|
112
132
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
133
|
+
this.workletNode = node;
|
|
113
134
|
console.log("Energy VAD node created successfully");
|
|
114
135
|
} catch (e) {
|
|
115
136
|
const error = new Error(
|
|
@@ -135,6 +156,11 @@ var EnergyVADPlugin = class {
|
|
|
135
156
|
};
|
|
136
157
|
return node;
|
|
137
158
|
}
|
|
159
|
+
updateSpeakingState(isSpeaking) {
|
|
160
|
+
if (this.workletNode) {
|
|
161
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
162
|
+
}
|
|
163
|
+
}
|
|
138
164
|
};
|
|
139
165
|
// Annotate the CommonJS export names for ESM import in node:
|
|
140
166
|
0 && (module.exports = {
|
package/dist/vad/vad-node.mjs
CHANGED