@tensamin/audio 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-ERJVV5JR.mjs → chunk-GVKCBKW6.mjs} +1 -1
- package/dist/{chunk-YOSTLLCS.mjs → chunk-H5UKZU2Y.mjs} +1 -1
- package/dist/{chunk-NMHKX64G.mjs → chunk-VEJXAEMM.mjs} +25 -7
- package/dist/{chunk-AHBRT4RD.mjs → chunk-XXTNAUYX.mjs} +6 -2
- package/dist/extensibility/plugins.js +25 -7
- package/dist/extensibility/plugins.mjs +2 -2
- package/dist/index.js +30 -8
- package/dist/index.mjs +4 -4
- package/dist/livekit/integration.js +30 -8
- package/dist/livekit/integration.mjs +4 -4
- package/dist/pipeline/audio-pipeline.js +30 -8
- package/dist/pipeline/audio-pipeline.mjs +3 -3
- package/dist/vad/vad-node.d.mts +2 -0
- package/dist/vad/vad-node.d.ts +2 -0
- package/dist/vad/vad-node.js +25 -7
- package/dist/vad/vad-node.mjs +1 -1
- package/package.json +1 -1
|
@@ -18,6 +18,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
18
18
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
19
19
|
this.minSNR = ${minSNR};
|
|
20
20
|
this.snrRange = ${snrRange};
|
|
21
|
+
this.isSpeaking = false;
|
|
22
|
+
|
|
23
|
+
this.port.onmessage = (event) => {
|
|
24
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
25
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
26
|
+
}
|
|
27
|
+
};
|
|
21
28
|
}
|
|
22
29
|
|
|
23
30
|
process(inputs, outputs, parameters) {
|
|
@@ -32,14 +39,18 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
32
39
|
}
|
|
33
40
|
const rms = Math.sqrt(sum / channel.length);
|
|
34
41
|
|
|
35
|
-
// Adaptive noise floor estimation
|
|
36
|
-
//
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
43
|
+
// This prevents the noise floor from rising during speech
|
|
44
|
+
if (!this.isSpeaking) {
|
|
45
|
+
if (rms < this.noiseFloor) {
|
|
46
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
47
|
+
} else {
|
|
48
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
49
|
+
// This could be brief noise we haven't classified as speech yet
|
|
50
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
51
|
+
}
|
|
42
52
|
}
|
|
53
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
43
54
|
|
|
44
55
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
45
56
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -60,6 +71,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
60
71
|
};
|
|
61
72
|
var EnergyVADPlugin = class {
|
|
62
73
|
name = "energy-vad";
|
|
74
|
+
workletNode = null;
|
|
63
75
|
async createNode(context, config, onDecision) {
|
|
64
76
|
if (!config?.enabled) {
|
|
65
77
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -86,6 +98,7 @@ var EnergyVADPlugin = class {
|
|
|
86
98
|
let node;
|
|
87
99
|
try {
|
|
88
100
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
101
|
+
this.workletNode = node;
|
|
89
102
|
console.log("Energy VAD node created successfully");
|
|
90
103
|
} catch (e) {
|
|
91
104
|
const error = new Error(
|
|
@@ -111,6 +124,11 @@ var EnergyVADPlugin = class {
|
|
|
111
124
|
};
|
|
112
125
|
return node;
|
|
113
126
|
}
|
|
127
|
+
updateSpeakingState(isSpeaking) {
|
|
128
|
+
if (this.workletNode) {
|
|
129
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
130
|
+
}
|
|
131
|
+
}
|
|
114
132
|
};
|
|
115
133
|
|
|
116
134
|
export {
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
import {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-H5UKZU2Y.mjs";
|
|
13
13
|
|
|
14
14
|
// src/pipeline/audio-pipeline.ts
|
|
15
15
|
import mitt from "mitt";
|
|
@@ -93,12 +93,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
93
93
|
throw err;
|
|
94
94
|
}
|
|
95
95
|
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
96
|
+
let vadPlugin;
|
|
96
97
|
try {
|
|
97
|
-
|
|
98
|
+
vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
98
99
|
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
99
100
|
try {
|
|
100
101
|
const timestamp = context.currentTime * 1e3;
|
|
101
102
|
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
103
|
+
if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
|
|
104
|
+
vadPlugin.updateSpeakingState(newState.isSpeaking);
|
|
105
|
+
}
|
|
102
106
|
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
103
107
|
emitter.emit("vadChange", newState);
|
|
104
108
|
lastVadState = newState;
|
|
@@ -121,6 +121,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
121
121
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
122
122
|
this.minSNR = ${minSNR};
|
|
123
123
|
this.snrRange = ${snrRange};
|
|
124
|
+
this.isSpeaking = false;
|
|
125
|
+
|
|
126
|
+
this.port.onmessage = (event) => {
|
|
127
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
128
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
129
|
+
}
|
|
130
|
+
};
|
|
124
131
|
}
|
|
125
132
|
|
|
126
133
|
process(inputs, outputs, parameters) {
|
|
@@ -135,14 +142,18 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
135
142
|
}
|
|
136
143
|
const rms = Math.sqrt(sum / channel.length);
|
|
137
144
|
|
|
138
|
-
// Adaptive noise floor estimation
|
|
139
|
-
//
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
146
|
+
// This prevents the noise floor from rising during speech
|
|
147
|
+
if (!this.isSpeaking) {
|
|
148
|
+
if (rms < this.noiseFloor) {
|
|
149
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
150
|
+
} else {
|
|
151
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
152
|
+
// This could be brief noise we haven't classified as speech yet
|
|
153
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
154
|
+
}
|
|
145
155
|
}
|
|
156
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
146
157
|
|
|
147
158
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
148
159
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -163,6 +174,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
163
174
|
};
|
|
164
175
|
var EnergyVADPlugin = class {
|
|
165
176
|
name = "energy-vad";
|
|
177
|
+
workletNode = null;
|
|
166
178
|
async createNode(context, config, onDecision) {
|
|
167
179
|
if (!config?.enabled) {
|
|
168
180
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -189,6 +201,7 @@ var EnergyVADPlugin = class {
|
|
|
189
201
|
let node;
|
|
190
202
|
try {
|
|
191
203
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
204
|
+
this.workletNode = node;
|
|
192
205
|
console.log("Energy VAD node created successfully");
|
|
193
206
|
} catch (e) {
|
|
194
207
|
const error = new Error(
|
|
@@ -214,6 +227,11 @@ var EnergyVADPlugin = class {
|
|
|
214
227
|
};
|
|
215
228
|
return node;
|
|
216
229
|
}
|
|
230
|
+
updateSpeakingState(isSpeaking) {
|
|
231
|
+
if (this.workletNode) {
|
|
232
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
233
|
+
}
|
|
234
|
+
}
|
|
217
235
|
};
|
|
218
236
|
|
|
219
237
|
// src/extensibility/plugins.ts
|
|
@@ -3,9 +3,9 @@ import {
|
|
|
3
3
|
getVADPlugin,
|
|
4
4
|
registerNoiseSuppressionPlugin,
|
|
5
5
|
registerVADPlugin
|
|
6
|
-
} from "../chunk-
|
|
6
|
+
} from "../chunk-H5UKZU2Y.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-VEJXAEMM.mjs";
|
|
9
9
|
export {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin,
|
package/dist/index.js
CHANGED
|
@@ -173,6 +173,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
173
173
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
174
174
|
this.minSNR = ${minSNR};
|
|
175
175
|
this.snrRange = ${snrRange};
|
|
176
|
+
this.isSpeaking = false;
|
|
177
|
+
|
|
178
|
+
this.port.onmessage = (event) => {
|
|
179
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
180
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
181
|
+
}
|
|
182
|
+
};
|
|
176
183
|
}
|
|
177
184
|
|
|
178
185
|
process(inputs, outputs, parameters) {
|
|
@@ -187,14 +194,18 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
187
194
|
}
|
|
188
195
|
const rms = Math.sqrt(sum / channel.length);
|
|
189
196
|
|
|
190
|
-
// Adaptive noise floor estimation
|
|
191
|
-
//
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
198
|
+
// This prevents the noise floor from rising during speech
|
|
199
|
+
if (!this.isSpeaking) {
|
|
200
|
+
if (rms < this.noiseFloor) {
|
|
201
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
202
|
+
} else {
|
|
203
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
204
|
+
// This could be brief noise we haven't classified as speech yet
|
|
205
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
206
|
+
}
|
|
197
207
|
}
|
|
208
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
198
209
|
|
|
199
210
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
200
211
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -215,6 +226,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
215
226
|
};
|
|
216
227
|
var EnergyVADPlugin = class {
|
|
217
228
|
name = "energy-vad";
|
|
229
|
+
workletNode = null;
|
|
218
230
|
async createNode(context, config, onDecision) {
|
|
219
231
|
if (!config?.enabled) {
|
|
220
232
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -241,6 +253,7 @@ var EnergyVADPlugin = class {
|
|
|
241
253
|
let node;
|
|
242
254
|
try {
|
|
243
255
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
256
|
+
this.workletNode = node;
|
|
244
257
|
console.log("Energy VAD node created successfully");
|
|
245
258
|
} catch (e) {
|
|
246
259
|
const error = new Error(
|
|
@@ -266,6 +279,11 @@ var EnergyVADPlugin = class {
|
|
|
266
279
|
};
|
|
267
280
|
return node;
|
|
268
281
|
}
|
|
282
|
+
updateSpeakingState(isSpeaking) {
|
|
283
|
+
if (this.workletNode) {
|
|
284
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
285
|
+
}
|
|
286
|
+
}
|
|
269
287
|
};
|
|
270
288
|
|
|
271
289
|
// src/extensibility/plugins.ts
|
|
@@ -473,12 +491,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
473
491
|
throw err;
|
|
474
492
|
}
|
|
475
493
|
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
494
|
+
let vadPlugin;
|
|
476
495
|
try {
|
|
477
|
-
|
|
496
|
+
vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
478
497
|
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
479
498
|
try {
|
|
480
499
|
const timestamp = context.currentTime * 1e3;
|
|
481
500
|
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
501
|
+
if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
|
|
502
|
+
vadPlugin.updateSpeakingState(newState.isSpeaking);
|
|
503
|
+
}
|
|
482
504
|
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
483
505
|
emitter.emit("vadChange", newState);
|
|
484
506
|
lastVadState = newState;
|
package/dist/index.mjs
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import "./chunk-WBQAMGXK.mjs";
|
|
2
2
|
import {
|
|
3
3
|
attachProcessingToTrack
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-GVKCBKW6.mjs";
|
|
5
5
|
import {
|
|
6
6
|
createAudioPipeline
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-XXTNAUYX.mjs";
|
|
8
8
|
import {
|
|
9
9
|
VADStateMachine
|
|
10
10
|
} from "./chunk-N553RHTI.mjs";
|
|
@@ -21,13 +21,13 @@ import {
|
|
|
21
21
|
getVADPlugin,
|
|
22
22
|
registerNoiseSuppressionPlugin,
|
|
23
23
|
registerVADPlugin
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-H5UKZU2Y.mjs";
|
|
25
25
|
import {
|
|
26
26
|
RNNoisePlugin
|
|
27
27
|
} from "./chunk-XO6B3D4A.mjs";
|
|
28
28
|
import {
|
|
29
29
|
EnergyVADPlugin
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-VEJXAEMM.mjs";
|
|
31
31
|
export {
|
|
32
32
|
EnergyVADPlugin,
|
|
33
33
|
RNNoisePlugin,
|
|
@@ -142,6 +142,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
142
142
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
143
143
|
this.minSNR = ${minSNR};
|
|
144
144
|
this.snrRange = ${snrRange};
|
|
145
|
+
this.isSpeaking = false;
|
|
146
|
+
|
|
147
|
+
this.port.onmessage = (event) => {
|
|
148
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
149
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
150
|
+
}
|
|
151
|
+
};
|
|
145
152
|
}
|
|
146
153
|
|
|
147
154
|
process(inputs, outputs, parameters) {
|
|
@@ -156,14 +163,18 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
156
163
|
}
|
|
157
164
|
const rms = Math.sqrt(sum / channel.length);
|
|
158
165
|
|
|
159
|
-
// Adaptive noise floor estimation
|
|
160
|
-
//
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
167
|
+
// This prevents the noise floor from rising during speech
|
|
168
|
+
if (!this.isSpeaking) {
|
|
169
|
+
if (rms < this.noiseFloor) {
|
|
170
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
171
|
+
} else {
|
|
172
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
173
|
+
// This could be brief noise we haven't classified as speech yet
|
|
174
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
175
|
+
}
|
|
166
176
|
}
|
|
177
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
167
178
|
|
|
168
179
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
169
180
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -184,6 +195,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
184
195
|
};
|
|
185
196
|
var EnergyVADPlugin = class {
|
|
186
197
|
name = "energy-vad";
|
|
198
|
+
workletNode = null;
|
|
187
199
|
async createNode(context, config, onDecision) {
|
|
188
200
|
if (!config?.enabled) {
|
|
189
201
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -210,6 +222,7 @@ var EnergyVADPlugin = class {
|
|
|
210
222
|
let node;
|
|
211
223
|
try {
|
|
212
224
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
225
|
+
this.workletNode = node;
|
|
213
226
|
console.log("Energy VAD node created successfully");
|
|
214
227
|
} catch (e) {
|
|
215
228
|
const error = new Error(
|
|
@@ -235,6 +248,11 @@ var EnergyVADPlugin = class {
|
|
|
235
248
|
};
|
|
236
249
|
return node;
|
|
237
250
|
}
|
|
251
|
+
updateSpeakingState(isSpeaking) {
|
|
252
|
+
if (this.workletNode) {
|
|
253
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
254
|
+
}
|
|
255
|
+
}
|
|
238
256
|
};
|
|
239
257
|
|
|
240
258
|
// src/extensibility/plugins.ts
|
|
@@ -436,12 +454,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
436
454
|
throw err;
|
|
437
455
|
}
|
|
438
456
|
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
457
|
+
let vadPlugin;
|
|
439
458
|
try {
|
|
440
|
-
|
|
459
|
+
vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
441
460
|
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
442
461
|
try {
|
|
443
462
|
const timestamp = context.currentTime * 1e3;
|
|
444
463
|
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
464
|
+
if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
|
|
465
|
+
vadPlugin.updateSpeakingState(newState.isSpeaking);
|
|
466
|
+
}
|
|
445
467
|
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
446
468
|
emitter.emit("vadChange", newState);
|
|
447
469
|
lastVadState = newState;
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import {
|
|
2
2
|
attachProcessingToTrack
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-GVKCBKW6.mjs";
|
|
4
|
+
import "../chunk-XXTNAUYX.mjs";
|
|
5
5
|
import "../chunk-N553RHTI.mjs";
|
|
6
6
|
import "../chunk-OZ7KMC4S.mjs";
|
|
7
|
-
import "../chunk-
|
|
7
|
+
import "../chunk-H5UKZU2Y.mjs";
|
|
8
8
|
import "../chunk-XO6B3D4A.mjs";
|
|
9
|
-
import "../chunk-
|
|
9
|
+
import "../chunk-VEJXAEMM.mjs";
|
|
10
10
|
export {
|
|
11
11
|
attachProcessingToTrack
|
|
12
12
|
};
|
|
@@ -140,6 +140,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
140
140
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
141
141
|
this.minSNR = ${minSNR};
|
|
142
142
|
this.snrRange = ${snrRange};
|
|
143
|
+
this.isSpeaking = false;
|
|
144
|
+
|
|
145
|
+
this.port.onmessage = (event) => {
|
|
146
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
147
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
148
|
+
}
|
|
149
|
+
};
|
|
143
150
|
}
|
|
144
151
|
|
|
145
152
|
process(inputs, outputs, parameters) {
|
|
@@ -154,14 +161,18 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
154
161
|
}
|
|
155
162
|
const rms = Math.sqrt(sum / channel.length);
|
|
156
163
|
|
|
157
|
-
// Adaptive noise floor estimation
|
|
158
|
-
//
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
165
|
+
// This prevents the noise floor from rising during speech
|
|
166
|
+
if (!this.isSpeaking) {
|
|
167
|
+
if (rms < this.noiseFloor) {
|
|
168
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
169
|
+
} else {
|
|
170
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
171
|
+
// This could be brief noise we haven't classified as speech yet
|
|
172
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
173
|
+
}
|
|
164
174
|
}
|
|
175
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
165
176
|
|
|
166
177
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
167
178
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -182,6 +193,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
182
193
|
};
|
|
183
194
|
var EnergyVADPlugin = class {
|
|
184
195
|
name = "energy-vad";
|
|
196
|
+
workletNode = null;
|
|
185
197
|
async createNode(context, config, onDecision) {
|
|
186
198
|
if (!config?.enabled) {
|
|
187
199
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -208,6 +220,7 @@ var EnergyVADPlugin = class {
|
|
|
208
220
|
let node;
|
|
209
221
|
try {
|
|
210
222
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
223
|
+
this.workletNode = node;
|
|
211
224
|
console.log("Energy VAD node created successfully");
|
|
212
225
|
} catch (e) {
|
|
213
226
|
const error = new Error(
|
|
@@ -233,6 +246,11 @@ var EnergyVADPlugin = class {
|
|
|
233
246
|
};
|
|
234
247
|
return node;
|
|
235
248
|
}
|
|
249
|
+
updateSpeakingState(isSpeaking) {
|
|
250
|
+
if (this.workletNode) {
|
|
251
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
252
|
+
}
|
|
253
|
+
}
|
|
236
254
|
};
|
|
237
255
|
|
|
238
256
|
// src/extensibility/plugins.ts
|
|
@@ -434,12 +452,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
434
452
|
throw err;
|
|
435
453
|
}
|
|
436
454
|
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
455
|
+
let vadPlugin;
|
|
437
456
|
try {
|
|
438
|
-
|
|
457
|
+
vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
439
458
|
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
440
459
|
try {
|
|
441
460
|
const timestamp = context.currentTime * 1e3;
|
|
442
461
|
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
462
|
+
if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
|
|
463
|
+
vadPlugin.updateSpeakingState(newState.isSpeaking);
|
|
464
|
+
}
|
|
443
465
|
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
444
466
|
emitter.emit("vadChange", newState);
|
|
445
467
|
lastVadState = newState;
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createAudioPipeline
|
|
3
|
-
} from "../chunk-
|
|
3
|
+
} from "../chunk-XXTNAUYX.mjs";
|
|
4
4
|
import "../chunk-N553RHTI.mjs";
|
|
5
5
|
import "../chunk-OZ7KMC4S.mjs";
|
|
6
|
-
import "../chunk-
|
|
6
|
+
import "../chunk-H5UKZU2Y.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-VEJXAEMM.mjs";
|
|
9
9
|
export {
|
|
10
10
|
createAudioPipeline
|
|
11
11
|
};
|
package/dist/vad/vad-node.d.mts
CHANGED
|
@@ -3,7 +3,9 @@ import 'mitt';
|
|
|
3
3
|
|
|
4
4
|
declare class EnergyVADPlugin implements VADPlugin {
|
|
5
5
|
name: string;
|
|
6
|
+
private workletNode;
|
|
6
7
|
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
8
|
+
updateSpeakingState(isSpeaking: boolean): void;
|
|
7
9
|
}
|
|
8
10
|
|
|
9
11
|
export { EnergyVADPlugin };
|
package/dist/vad/vad-node.d.ts
CHANGED
|
@@ -3,7 +3,9 @@ import 'mitt';
|
|
|
3
3
|
|
|
4
4
|
declare class EnergyVADPlugin implements VADPlugin {
|
|
5
5
|
name: string;
|
|
6
|
+
private workletNode;
|
|
6
7
|
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
8
|
+
updateSpeakingState(isSpeaking: boolean): void;
|
|
7
9
|
}
|
|
8
10
|
|
|
9
11
|
export { EnergyVADPlugin };
|
package/dist/vad/vad-node.js
CHANGED
|
@@ -42,6 +42,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
42
42
|
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
43
43
|
this.minSNR = ${minSNR};
|
|
44
44
|
this.snrRange = ${snrRange};
|
|
45
|
+
this.isSpeaking = false;
|
|
46
|
+
|
|
47
|
+
this.port.onmessage = (event) => {
|
|
48
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
49
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
50
|
+
}
|
|
51
|
+
};
|
|
45
52
|
}
|
|
46
53
|
|
|
47
54
|
process(inputs, outputs, parameters) {
|
|
@@ -56,14 +63,18 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
56
63
|
}
|
|
57
64
|
const rms = Math.sqrt(sum / channel.length);
|
|
58
65
|
|
|
59
|
-
// Adaptive noise floor estimation
|
|
60
|
-
//
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
67
|
+
// This prevents the noise floor from rising during speech
|
|
68
|
+
if (!this.isSpeaking) {
|
|
69
|
+
if (rms < this.noiseFloor) {
|
|
70
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
71
|
+
} else {
|
|
72
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
73
|
+
// This could be brief noise we haven't classified as speech yet
|
|
74
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
75
|
+
}
|
|
66
76
|
}
|
|
77
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
67
78
|
|
|
68
79
|
// Calculate Signal-to-Noise Ratio (SNR)
|
|
69
80
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
@@ -84,6 +95,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
|
84
95
|
};
|
|
85
96
|
var EnergyVADPlugin = class {
|
|
86
97
|
name = "energy-vad";
|
|
98
|
+
workletNode = null;
|
|
87
99
|
async createNode(context, config, onDecision) {
|
|
88
100
|
if (!config?.enabled) {
|
|
89
101
|
console.log("VAD disabled, using passthrough node");
|
|
@@ -110,6 +122,7 @@ var EnergyVADPlugin = class {
|
|
|
110
122
|
let node;
|
|
111
123
|
try {
|
|
112
124
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
125
|
+
this.workletNode = node;
|
|
113
126
|
console.log("Energy VAD node created successfully");
|
|
114
127
|
} catch (e) {
|
|
115
128
|
const error = new Error(
|
|
@@ -135,6 +148,11 @@ var EnergyVADPlugin = class {
|
|
|
135
148
|
};
|
|
136
149
|
return node;
|
|
137
150
|
}
|
|
151
|
+
updateSpeakingState(isSpeaking) {
|
|
152
|
+
if (this.workletNode) {
|
|
153
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
154
|
+
}
|
|
155
|
+
}
|
|
138
156
|
};
|
|
139
157
|
// Annotate the CommonJS export names for ESM import in node:
|
|
140
158
|
0 && (module.exports = {
|
package/dist/vad/vad-node.mjs
CHANGED