@tensamin/audio 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -54
- package/dist/{chunk-EXH2PNUE.mjs → chunk-AHBRT4RD.mjs} +128 -33
- package/dist/{chunk-XMTQPMQ6.mjs → chunk-ERJVV5JR.mjs} +1 -1
- package/dist/chunk-N553RHTI.mjs +93 -0
- package/dist/{chunk-R5JVHKWA.mjs → chunk-NMHKX64G.mjs} +32 -12
- package/dist/{chunk-6P2RDBW5.mjs → chunk-YOSTLLCS.mjs} +1 -1
- package/dist/extensibility/plugins.js +32 -12
- package/dist/extensibility/plugins.mjs +2 -2
- package/dist/index.js +200 -51
- package/dist/index.mjs +5 -5
- package/dist/livekit/integration.js +200 -51
- package/dist/livekit/integration.mjs +5 -5
- package/dist/pipeline/audio-pipeline.js +200 -51
- package/dist/pipeline/audio-pipeline.mjs +4 -4
- package/dist/types.d.mts +118 -10
- package/dist/types.d.ts +118 -10
- package/dist/vad/vad-node.js +32 -12
- package/dist/vad/vad-node.mjs +1 -1
- package/dist/vad/vad-state.d.mts +1 -0
- package/dist/vad/vad-state.d.ts +1 -0
- package/dist/vad/vad-state.js +42 -8
- package/dist/vad/vad-state.mjs +1 -1
- package/package.json +1 -1
- package/dist/chunk-JJASCVEW.mjs +0 -59
|
@@ -1,11 +1,23 @@
|
|
|
1
1
|
// src/vad/vad-node.ts
|
|
2
|
-
var
|
|
2
|
+
var createEnergyVadWorkletCode = (vadConfig) => {
|
|
3
|
+
const energyParams = vadConfig?.energyVad || {};
|
|
4
|
+
const smoothing = energyParams.smoothing ?? 0.95;
|
|
5
|
+
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
6
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
7
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
|
|
8
|
+
const minSNR = energyParams.minSNR ?? 2;
|
|
9
|
+
const snrRange = energyParams.snrRange ?? 8;
|
|
10
|
+
return `
|
|
3
11
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
4
12
|
constructor() {
|
|
5
13
|
super();
|
|
6
|
-
this.smoothing =
|
|
14
|
+
this.smoothing = ${smoothing};
|
|
7
15
|
this.energy = 0;
|
|
8
|
-
this.noiseFloor =
|
|
16
|
+
this.noiseFloor = ${initialNoiseFloor};
|
|
17
|
+
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
18
|
+
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
19
|
+
this.minSNR = ${minSNR};
|
|
20
|
+
this.snrRange = ${snrRange};
|
|
9
21
|
}
|
|
10
22
|
|
|
11
23
|
process(inputs, outputs, parameters) {
|
|
@@ -13,32 +25,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
13
25
|
if (!input || !input.length) return true;
|
|
14
26
|
const channel = input[0];
|
|
15
27
|
|
|
16
|
-
// Calculate RMS
|
|
28
|
+
// Calculate RMS (Root Mean Square) energy
|
|
17
29
|
let sum = 0;
|
|
18
30
|
for (let i = 0; i < channel.length; i++) {
|
|
19
31
|
sum += channel[i] * channel[i];
|
|
20
32
|
}
|
|
21
33
|
const rms = Math.sqrt(sum / channel.length);
|
|
22
34
|
|
|
23
|
-
//
|
|
35
|
+
// Adaptive noise floor estimation
|
|
36
|
+
// When signal is quiet, adapt quickly to find new noise floor
|
|
37
|
+
// When signal is loud (speech), adapt slowly to avoid raising noise floor
|
|
24
38
|
if (rms < this.noiseFloor) {
|
|
25
|
-
this.noiseFloor = this.noiseFloor *
|
|
39
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
26
40
|
} else {
|
|
27
|
-
this.noiseFloor = this.noiseFloor *
|
|
41
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
28
42
|
}
|
|
29
43
|
|
|
30
|
-
// Calculate
|
|
31
|
-
// This is a heuristic mapping from energy to 0-1
|
|
44
|
+
// Calculate Signal-to-Noise Ratio (SNR)
|
|
32
45
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
33
|
-
|
|
46
|
+
|
|
47
|
+
// Map SNR to probability (0-1)
|
|
48
|
+
// Probability is 0 when SNR <= minSNR
|
|
49
|
+
// Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
|
|
50
|
+
// Probability is 1 when SNR >= (minSNR + snrRange)
|
|
51
|
+
const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
|
|
34
52
|
|
|
35
|
-
this.port.postMessage({ probability });
|
|
53
|
+
this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
|
|
36
54
|
|
|
37
55
|
return true;
|
|
38
56
|
}
|
|
39
57
|
}
|
|
40
58
|
registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
41
59
|
`;
|
|
60
|
+
};
|
|
42
61
|
var EnergyVADPlugin = class {
|
|
43
62
|
name = "energy-vad";
|
|
44
63
|
async createNode(context, config, onDecision) {
|
|
@@ -47,7 +66,8 @@ var EnergyVADPlugin = class {
|
|
|
47
66
|
const pass = context.createGain();
|
|
48
67
|
return pass;
|
|
49
68
|
}
|
|
50
|
-
const
|
|
69
|
+
const workletCode = createEnergyVadWorkletCode(config);
|
|
70
|
+
const blob = new Blob([workletCode], {
|
|
51
71
|
type: "application/javascript"
|
|
52
72
|
});
|
|
53
73
|
const url = URL.createObjectURL(blob);
|
|
@@ -102,13 +102,25 @@ To disable noise suppression, set noiseSuppression.enabled to false.`
|
|
|
102
102
|
};
|
|
103
103
|
|
|
104
104
|
// src/vad/vad-node.ts
|
|
105
|
-
var
|
|
105
|
+
var createEnergyVadWorkletCode = (vadConfig) => {
|
|
106
|
+
const energyParams = vadConfig?.energyVad || {};
|
|
107
|
+
const smoothing = energyParams.smoothing ?? 0.95;
|
|
108
|
+
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
109
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
110
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
|
|
111
|
+
const minSNR = energyParams.minSNR ?? 2;
|
|
112
|
+
const snrRange = energyParams.snrRange ?? 8;
|
|
113
|
+
return `
|
|
106
114
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
107
115
|
constructor() {
|
|
108
116
|
super();
|
|
109
|
-
this.smoothing =
|
|
117
|
+
this.smoothing = ${smoothing};
|
|
110
118
|
this.energy = 0;
|
|
111
|
-
this.noiseFloor =
|
|
119
|
+
this.noiseFloor = ${initialNoiseFloor};
|
|
120
|
+
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
121
|
+
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
122
|
+
this.minSNR = ${minSNR};
|
|
123
|
+
this.snrRange = ${snrRange};
|
|
112
124
|
}
|
|
113
125
|
|
|
114
126
|
process(inputs, outputs, parameters) {
|
|
@@ -116,32 +128,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
116
128
|
if (!input || !input.length) return true;
|
|
117
129
|
const channel = input[0];
|
|
118
130
|
|
|
119
|
-
// Calculate RMS
|
|
131
|
+
// Calculate RMS (Root Mean Square) energy
|
|
120
132
|
let sum = 0;
|
|
121
133
|
for (let i = 0; i < channel.length; i++) {
|
|
122
134
|
sum += channel[i] * channel[i];
|
|
123
135
|
}
|
|
124
136
|
const rms = Math.sqrt(sum / channel.length);
|
|
125
137
|
|
|
126
|
-
//
|
|
138
|
+
// Adaptive noise floor estimation
|
|
139
|
+
// When signal is quiet, adapt quickly to find new noise floor
|
|
140
|
+
// When signal is loud (speech), adapt slowly to avoid raising noise floor
|
|
127
141
|
if (rms < this.noiseFloor) {
|
|
128
|
-
this.noiseFloor = this.noiseFloor *
|
|
142
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
129
143
|
} else {
|
|
130
|
-
this.noiseFloor = this.noiseFloor *
|
|
144
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
131
145
|
}
|
|
132
146
|
|
|
133
|
-
// Calculate
|
|
134
|
-
// This is a heuristic mapping from energy to 0-1
|
|
147
|
+
// Calculate Signal-to-Noise Ratio (SNR)
|
|
135
148
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
136
|
-
|
|
149
|
+
|
|
150
|
+
// Map SNR to probability (0-1)
|
|
151
|
+
// Probability is 0 when SNR <= minSNR
|
|
152
|
+
// Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
|
|
153
|
+
// Probability is 1 when SNR >= (minSNR + snrRange)
|
|
154
|
+
const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
|
|
137
155
|
|
|
138
|
-
this.port.postMessage({ probability });
|
|
156
|
+
this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
|
|
139
157
|
|
|
140
158
|
return true;
|
|
141
159
|
}
|
|
142
160
|
}
|
|
143
161
|
registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
144
162
|
`;
|
|
163
|
+
};
|
|
145
164
|
var EnergyVADPlugin = class {
|
|
146
165
|
name = "energy-vad";
|
|
147
166
|
async createNode(context, config, onDecision) {
|
|
@@ -150,7 +169,8 @@ var EnergyVADPlugin = class {
|
|
|
150
169
|
const pass = context.createGain();
|
|
151
170
|
return pass;
|
|
152
171
|
}
|
|
153
|
-
const
|
|
172
|
+
const workletCode = createEnergyVadWorkletCode(config);
|
|
173
|
+
const blob = new Blob([workletCode], {
|
|
154
174
|
type: "application/javascript"
|
|
155
175
|
});
|
|
156
176
|
const url = URL.createObjectURL(blob);
|
|
@@ -3,9 +3,9 @@ import {
|
|
|
3
3
|
getVADPlugin,
|
|
4
4
|
registerNoiseSuppressionPlugin,
|
|
5
5
|
registerVADPlugin
|
|
6
|
-
} from "../chunk-
|
|
6
|
+
} from "../chunk-YOSTLLCS.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-NMHKX64G.mjs";
|
|
9
9
|
export {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin,
|
package/dist/index.js
CHANGED
|
@@ -154,13 +154,25 @@ To disable noise suppression, set noiseSuppression.enabled to false.`
|
|
|
154
154
|
};
|
|
155
155
|
|
|
156
156
|
// src/vad/vad-node.ts
|
|
157
|
-
var
|
|
157
|
+
var createEnergyVadWorkletCode = (vadConfig) => {
|
|
158
|
+
const energyParams = vadConfig?.energyVad || {};
|
|
159
|
+
const smoothing = energyParams.smoothing ?? 0.95;
|
|
160
|
+
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
161
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
162
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
|
|
163
|
+
const minSNR = energyParams.minSNR ?? 2;
|
|
164
|
+
const snrRange = energyParams.snrRange ?? 8;
|
|
165
|
+
return `
|
|
158
166
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
159
167
|
constructor() {
|
|
160
168
|
super();
|
|
161
|
-
this.smoothing =
|
|
169
|
+
this.smoothing = ${smoothing};
|
|
162
170
|
this.energy = 0;
|
|
163
|
-
this.noiseFloor =
|
|
171
|
+
this.noiseFloor = ${initialNoiseFloor};
|
|
172
|
+
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
173
|
+
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
174
|
+
this.minSNR = ${minSNR};
|
|
175
|
+
this.snrRange = ${snrRange};
|
|
164
176
|
}
|
|
165
177
|
|
|
166
178
|
process(inputs, outputs, parameters) {
|
|
@@ -168,32 +180,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
168
180
|
if (!input || !input.length) return true;
|
|
169
181
|
const channel = input[0];
|
|
170
182
|
|
|
171
|
-
// Calculate RMS
|
|
183
|
+
// Calculate RMS (Root Mean Square) energy
|
|
172
184
|
let sum = 0;
|
|
173
185
|
for (let i = 0; i < channel.length; i++) {
|
|
174
186
|
sum += channel[i] * channel[i];
|
|
175
187
|
}
|
|
176
188
|
const rms = Math.sqrt(sum / channel.length);
|
|
177
189
|
|
|
178
|
-
//
|
|
190
|
+
// Adaptive noise floor estimation
|
|
191
|
+
// When signal is quiet, adapt quickly to find new noise floor
|
|
192
|
+
// When signal is loud (speech), adapt slowly to avoid raising noise floor
|
|
179
193
|
if (rms < this.noiseFloor) {
|
|
180
|
-
this.noiseFloor = this.noiseFloor *
|
|
194
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
181
195
|
} else {
|
|
182
|
-
this.noiseFloor = this.noiseFloor *
|
|
196
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
183
197
|
}
|
|
184
198
|
|
|
185
|
-
// Calculate
|
|
186
|
-
// This is a heuristic mapping from energy to 0-1
|
|
199
|
+
// Calculate Signal-to-Noise Ratio (SNR)
|
|
187
200
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
188
|
-
|
|
201
|
+
|
|
202
|
+
// Map SNR to probability (0-1)
|
|
203
|
+
// Probability is 0 when SNR <= minSNR
|
|
204
|
+
// Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
|
|
205
|
+
// Probability is 1 when SNR >= (minSNR + snrRange)
|
|
206
|
+
const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
|
|
189
207
|
|
|
190
|
-
this.port.postMessage({ probability });
|
|
208
|
+
this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
|
|
191
209
|
|
|
192
210
|
return true;
|
|
193
211
|
}
|
|
194
212
|
}
|
|
195
213
|
registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
196
214
|
`;
|
|
215
|
+
};
|
|
197
216
|
var EnergyVADPlugin = class {
|
|
198
217
|
name = "energy-vad";
|
|
199
218
|
async createNode(context, config, onDecision) {
|
|
@@ -202,7 +221,8 @@ var EnergyVADPlugin = class {
|
|
|
202
221
|
const pass = context.createGain();
|
|
203
222
|
return pass;
|
|
204
223
|
}
|
|
205
|
-
const
|
|
224
|
+
const workletCode = createEnergyVadWorkletCode(config);
|
|
225
|
+
const blob = new Blob([workletCode], {
|
|
206
226
|
type: "application/javascript"
|
|
207
227
|
});
|
|
208
228
|
const url = URL.createObjectURL(blob);
|
|
@@ -288,31 +308,60 @@ var VADStateMachine = class {
|
|
|
288
308
|
currentState = "silent";
|
|
289
309
|
lastSpeechTime = 0;
|
|
290
310
|
speechStartTime = 0;
|
|
311
|
+
lastSilenceTime = 0;
|
|
291
312
|
frameDurationMs = 20;
|
|
292
313
|
// Assumed frame duration, updated by calls
|
|
293
314
|
constructor(config) {
|
|
294
315
|
this.config = {
|
|
295
316
|
enabled: config?.enabled ?? true,
|
|
296
317
|
pluginName: config?.pluginName ?? "energy-vad",
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
318
|
+
// Voice-optimized defaults
|
|
319
|
+
startThreshold: config?.startThreshold ?? 0.6,
|
|
320
|
+
// Higher threshold to avoid noise
|
|
321
|
+
stopThreshold: config?.stopThreshold ?? 0.45,
|
|
322
|
+
// Balanced for voice
|
|
323
|
+
hangoverMs: config?.hangoverMs ?? 400,
|
|
324
|
+
// Smooth for natural speech
|
|
325
|
+
preRollMs: config?.preRollMs ?? 250,
|
|
326
|
+
// Generous pre-roll
|
|
327
|
+
minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
|
|
328
|
+
minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
|
|
329
|
+
energyVad: {
|
|
330
|
+
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
331
|
+
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
332
|
+
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
|
|
333
|
+
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
|
|
334
|
+
minSNR: config?.energyVad?.minSNR ?? 2,
|
|
335
|
+
snrRange: config?.energyVad?.snrRange ?? 8
|
|
336
|
+
}
|
|
301
337
|
};
|
|
338
|
+
this.lastSilenceTime = Date.now();
|
|
302
339
|
}
|
|
303
340
|
updateConfig(config) {
|
|
304
341
|
this.config = { ...this.config, ...config };
|
|
305
342
|
}
|
|
306
343
|
processFrame(probability, timestamp) {
|
|
307
|
-
const {
|
|
344
|
+
const {
|
|
345
|
+
startThreshold,
|
|
346
|
+
stopThreshold,
|
|
347
|
+
hangoverMs,
|
|
348
|
+
minSpeechDurationMs,
|
|
349
|
+
minSilenceDurationMs
|
|
350
|
+
} = this.config;
|
|
308
351
|
let newState = this.currentState;
|
|
309
352
|
if (this.currentState === "silent" || this.currentState === "speech_ending") {
|
|
310
353
|
if (probability >= startThreshold) {
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
354
|
+
const silenceDuration = timestamp - this.lastSilenceTime;
|
|
355
|
+
if (silenceDuration >= minSilenceDurationMs) {
|
|
356
|
+
newState = "speech_starting";
|
|
357
|
+
this.speechStartTime = timestamp;
|
|
358
|
+
this.lastSpeechTime = timestamp;
|
|
359
|
+
} else {
|
|
360
|
+
newState = "silent";
|
|
361
|
+
}
|
|
314
362
|
} else {
|
|
315
363
|
newState = "silent";
|
|
364
|
+
this.lastSilenceTime = timestamp;
|
|
316
365
|
}
|
|
317
366
|
} else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
|
|
318
367
|
if (probability >= stopThreshold) {
|
|
@@ -320,10 +369,15 @@ var VADStateMachine = class {
|
|
|
320
369
|
this.lastSpeechTime = timestamp;
|
|
321
370
|
} else {
|
|
322
371
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
372
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
323
373
|
if (timeSinceSpeech < hangoverMs) {
|
|
324
374
|
newState = "speaking";
|
|
375
|
+
} else if (speechDuration < minSpeechDurationMs) {
|
|
376
|
+
newState = "silent";
|
|
377
|
+
this.lastSilenceTime = timestamp;
|
|
325
378
|
} else {
|
|
326
379
|
newState = "speech_ending";
|
|
380
|
+
this.lastSilenceTime = timestamp;
|
|
327
381
|
}
|
|
328
382
|
}
|
|
329
383
|
}
|
|
@@ -342,7 +396,9 @@ var VADStateMachine = class {
|
|
|
342
396
|
async function createAudioPipeline(sourceTrack, config = {}) {
|
|
343
397
|
const context = getAudioContext();
|
|
344
398
|
registerPipeline();
|
|
345
|
-
const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
|
|
399
|
+
const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
|
|
400
|
+
config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
|
|
401
|
+
);
|
|
346
402
|
const vadEnabled = config.vad?.enabled !== false;
|
|
347
403
|
const fullConfig = {
|
|
348
404
|
noiseSuppression: {
|
|
@@ -351,13 +407,38 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
351
407
|
},
|
|
352
408
|
vad: {
|
|
353
409
|
enabled: vadEnabled,
|
|
410
|
+
// Voice-optimized defaults (will be overridden by config)
|
|
411
|
+
startThreshold: 0.6,
|
|
412
|
+
stopThreshold: 0.45,
|
|
413
|
+
hangoverMs: 400,
|
|
414
|
+
preRollMs: 250,
|
|
415
|
+
minSpeechDurationMs: 100,
|
|
416
|
+
minSilenceDurationMs: 150,
|
|
417
|
+
energyVad: {
|
|
418
|
+
smoothing: 0.95,
|
|
419
|
+
initialNoiseFloor: 1e-3,
|
|
420
|
+
noiseFloorAdaptRateQuiet: 0.01,
|
|
421
|
+
noiseFloorAdaptRateLoud: 1e-3,
|
|
422
|
+
minSNR: 2,
|
|
423
|
+
snrRange: 8
|
|
424
|
+
},
|
|
354
425
|
...config.vad
|
|
355
426
|
},
|
|
356
427
|
output: {
|
|
357
428
|
speechGain: 1,
|
|
358
|
-
silenceGain:
|
|
359
|
-
//
|
|
360
|
-
gainRampTime: 0.
|
|
429
|
+
silenceGain: 0,
|
|
430
|
+
// Full mute for voice-only
|
|
431
|
+
gainRampTime: 0.015,
|
|
432
|
+
// Fast but smooth transitions
|
|
433
|
+
smoothTransitions: true,
|
|
434
|
+
maxGainDb: 6,
|
|
435
|
+
enableCompression: false,
|
|
436
|
+
compression: {
|
|
437
|
+
threshold: -24,
|
|
438
|
+
ratio: 3,
|
|
439
|
+
attack: 3e-3,
|
|
440
|
+
release: 0.05
|
|
441
|
+
},
|
|
361
442
|
...config.output
|
|
362
443
|
},
|
|
363
444
|
livekit: { manageTrackMute: false, ...config.livekit }
|
|
@@ -368,7 +449,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
368
449
|
output: fullConfig.output
|
|
369
450
|
});
|
|
370
451
|
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
371
|
-
throw new Error(
|
|
452
|
+
throw new Error(
|
|
453
|
+
"createAudioPipeline requires a valid audio MediaStreamTrack"
|
|
454
|
+
);
|
|
372
455
|
}
|
|
373
456
|
if (sourceTrack.readyState === "ended") {
|
|
374
457
|
throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
|
|
@@ -382,10 +465,7 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
382
465
|
const nsPlugin = getNoiseSuppressionPlugin(
|
|
383
466
|
fullConfig.noiseSuppression?.pluginName
|
|
384
467
|
);
|
|
385
|
-
nsNode = await nsPlugin.createNode(
|
|
386
|
-
context,
|
|
387
|
-
fullConfig.noiseSuppression
|
|
388
|
-
);
|
|
468
|
+
nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
|
|
389
469
|
} catch (error) {
|
|
390
470
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
391
471
|
console.error("Failed to create noise suppression node:", err);
|
|
@@ -395,25 +475,21 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
395
475
|
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
396
476
|
try {
|
|
397
477
|
const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
398
|
-
vadNode = await vadPlugin.createNode(
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
emitter.emit("vadChange", newState);
|
|
407
|
-
lastVadState = newState;
|
|
408
|
-
updateGain(newState);
|
|
409
|
-
}
|
|
410
|
-
} catch (vadError) {
|
|
411
|
-
const err = vadError instanceof Error ? vadError : new Error(String(vadError));
|
|
412
|
-
console.error("Error in VAD callback:", err);
|
|
413
|
-
emitter.emit("error", err);
|
|
478
|
+
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
479
|
+
try {
|
|
480
|
+
const timestamp = context.currentTime * 1e3;
|
|
481
|
+
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
482
|
+
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
483
|
+
emitter.emit("vadChange", newState);
|
|
484
|
+
lastVadState = newState;
|
|
485
|
+
updateGain(newState);
|
|
414
486
|
}
|
|
487
|
+
} catch (vadError) {
|
|
488
|
+
const err = vadError instanceof Error ? vadError : new Error(String(vadError));
|
|
489
|
+
console.error("Error in VAD callback:", err);
|
|
490
|
+
emitter.emit("error", err);
|
|
415
491
|
}
|
|
416
|
-
);
|
|
492
|
+
});
|
|
417
493
|
} catch (error) {
|
|
418
494
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
419
495
|
console.error("Failed to create VAD node:", err);
|
|
@@ -430,15 +506,31 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
430
506
|
nsNode.connect(splitter);
|
|
431
507
|
splitter.connect(vadNode);
|
|
432
508
|
const delayNode = context.createDelay(1);
|
|
433
|
-
const preRollSeconds = (fullConfig.vad?.preRollMs ??
|
|
509
|
+
const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
|
|
434
510
|
delayNode.delayTime.value = preRollSeconds;
|
|
435
511
|
const gainNode = context.createGain();
|
|
436
512
|
gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
|
|
513
|
+
let compressor = null;
|
|
514
|
+
if (fullConfig.output?.enableCompression) {
|
|
515
|
+
compressor = context.createDynamicsCompressor();
|
|
516
|
+
const comp = fullConfig.output.compression;
|
|
517
|
+
compressor.threshold.value = comp.threshold ?? -24;
|
|
518
|
+
compressor.ratio.value = comp.ratio ?? 3;
|
|
519
|
+
compressor.attack.value = comp.attack ?? 3e-3;
|
|
520
|
+
compressor.release.value = comp.release ?? 0.05;
|
|
521
|
+
compressor.knee.value = 10;
|
|
522
|
+
}
|
|
437
523
|
const destination = context.createMediaStreamDestination();
|
|
438
524
|
try {
|
|
439
525
|
splitter.connect(delayNode);
|
|
440
526
|
delayNode.connect(gainNode);
|
|
441
|
-
|
|
527
|
+
if (compressor) {
|
|
528
|
+
gainNode.connect(compressor);
|
|
529
|
+
compressor.connect(destination);
|
|
530
|
+
console.log("Compression enabled:", fullConfig.output?.compression);
|
|
531
|
+
} else {
|
|
532
|
+
gainNode.connect(destination);
|
|
533
|
+
}
|
|
442
534
|
} catch (error) {
|
|
443
535
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
444
536
|
console.error("Failed to wire audio pipeline:", err);
|
|
@@ -447,10 +539,24 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
447
539
|
}
|
|
448
540
|
function updateGain(state) {
|
|
449
541
|
try {
|
|
450
|
-
const {
|
|
451
|
-
|
|
542
|
+
const {
|
|
543
|
+
speechGain = 1,
|
|
544
|
+
silenceGain = 0,
|
|
545
|
+
gainRampTime = 0.015,
|
|
546
|
+
smoothTransitions = true,
|
|
547
|
+
maxGainDb = 6
|
|
548
|
+
} = fullConfig.output;
|
|
549
|
+
const maxGainLinear = Math.pow(10, maxGainDb / 20);
|
|
550
|
+
const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
|
|
551
|
+
const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
|
|
452
552
|
const now = context.currentTime;
|
|
453
|
-
|
|
553
|
+
if (smoothTransitions) {
|
|
554
|
+
gainNode.gain.cancelScheduledValues(now);
|
|
555
|
+
gainNode.gain.setValueAtTime(gainNode.gain.value, now);
|
|
556
|
+
gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
|
|
557
|
+
} else {
|
|
558
|
+
gainNode.gain.setValueAtTime(targetGain, now);
|
|
559
|
+
}
|
|
454
560
|
} catch (error) {
|
|
455
561
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
456
562
|
console.error("Failed to update gain:", err);
|
|
@@ -506,6 +612,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
506
612
|
vadNode.disconnect();
|
|
507
613
|
delayNode.disconnect();
|
|
508
614
|
gainNode.disconnect();
|
|
615
|
+
if (compressor) {
|
|
616
|
+
compressor.disconnect();
|
|
617
|
+
}
|
|
509
618
|
destination.stream.getTracks().forEach((t) => t.stop());
|
|
510
619
|
unregisterPipeline();
|
|
511
620
|
} catch (error) {
|
|
@@ -522,7 +631,47 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
522
631
|
try {
|
|
523
632
|
if (newConfig.vad) {
|
|
524
633
|
vadStateMachine.updateConfig(newConfig.vad);
|
|
634
|
+
Object.assign(fullConfig.vad, newConfig.vad);
|
|
635
|
+
if (newConfig.vad.preRollMs !== void 0) {
|
|
636
|
+
const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
|
|
637
|
+
delayNode.delayTime.setValueAtTime(
|
|
638
|
+
preRollSeconds2,
|
|
639
|
+
context.currentTime
|
|
640
|
+
);
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
if (newConfig.output) {
|
|
644
|
+
Object.assign(fullConfig.output, newConfig.output);
|
|
645
|
+
updateGain(lastVadState);
|
|
646
|
+
if (compressor && newConfig.output.compression) {
|
|
647
|
+
const comp = newConfig.output.compression;
|
|
648
|
+
if (comp.threshold !== void 0) {
|
|
649
|
+
compressor.threshold.setValueAtTime(
|
|
650
|
+
comp.threshold,
|
|
651
|
+
context.currentTime
|
|
652
|
+
);
|
|
653
|
+
}
|
|
654
|
+
if (comp.ratio !== void 0) {
|
|
655
|
+
compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
|
|
656
|
+
}
|
|
657
|
+
if (comp.attack !== void 0) {
|
|
658
|
+
compressor.attack.setValueAtTime(
|
|
659
|
+
comp.attack,
|
|
660
|
+
context.currentTime
|
|
661
|
+
);
|
|
662
|
+
}
|
|
663
|
+
if (comp.release !== void 0) {
|
|
664
|
+
compressor.release.setValueAtTime(
|
|
665
|
+
comp.release,
|
|
666
|
+
context.currentTime
|
|
667
|
+
);
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
if (newConfig.livekit) {
|
|
672
|
+
Object.assign(fullConfig.livekit, newConfig.livekit);
|
|
525
673
|
}
|
|
674
|
+
console.log("Pipeline config updated:", newConfig);
|
|
526
675
|
} catch (error) {
|
|
527
676
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
528
677
|
console.error("Failed to update config:", err);
|
package/dist/index.mjs
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import "./chunk-WBQAMGXK.mjs";
|
|
2
2
|
import {
|
|
3
3
|
attachProcessingToTrack
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-ERJVV5JR.mjs";
|
|
5
5
|
import {
|
|
6
6
|
createAudioPipeline
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-AHBRT4RD.mjs";
|
|
8
8
|
import {
|
|
9
9
|
VADStateMachine
|
|
10
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-N553RHTI.mjs";
|
|
11
11
|
import {
|
|
12
12
|
closeAudioContext,
|
|
13
13
|
getAudioContext,
|
|
@@ -21,13 +21,13 @@ import {
|
|
|
21
21
|
getVADPlugin,
|
|
22
22
|
registerNoiseSuppressionPlugin,
|
|
23
23
|
registerVADPlugin
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-YOSTLLCS.mjs";
|
|
25
25
|
import {
|
|
26
26
|
RNNoisePlugin
|
|
27
27
|
} from "./chunk-XO6B3D4A.mjs";
|
|
28
28
|
import {
|
|
29
29
|
EnergyVADPlugin
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-NMHKX64G.mjs";
|
|
31
31
|
export {
|
|
32
32
|
EnergyVADPlugin,
|
|
33
33
|
RNNoisePlugin,
|