@tensamin/audio 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +218 -30
- package/dist/chunk-AHBRT4RD.mjs +307 -0
- package/dist/chunk-ERJVV5JR.mjs +91 -0
- package/dist/chunk-N553RHTI.mjs +93 -0
- package/dist/chunk-NMHKX64G.mjs +118 -0
- package/dist/chunk-XO6B3D4A.mjs +67 -0
- package/dist/{chunk-FS635GMR.mjs → chunk-YOSTLLCS.mjs} +2 -2
- package/dist/extensibility/plugins.js +110 -32
- package/dist/extensibility/plugins.mjs +3 -3
- package/dist/index.js +463 -97
- package/dist/index.mjs +6 -6
- package/dist/livekit/integration.js +463 -97
- package/dist/livekit/integration.mjs +6 -6
- package/dist/noise-suppression/rnnoise-node.js +42 -14
- package/dist/noise-suppression/rnnoise-node.mjs +1 -1
- package/dist/pipeline/audio-pipeline.js +396 -83
- package/dist/pipeline/audio-pipeline.mjs +5 -5
- package/dist/types.d.mts +118 -10
- package/dist/types.d.ts +118 -10
- package/dist/vad/vad-node.js +68 -18
- package/dist/vad/vad-node.mjs +1 -1
- package/dist/vad/vad-state.d.mts +1 -0
- package/dist/vad/vad-state.d.ts +1 -0
- package/dist/vad/vad-state.js +42 -8
- package/dist/vad/vad-state.mjs +1 -1
- package/package.json +1 -1
- package/dist/chunk-HFSKQ33X.mjs +0 -38
- package/dist/chunk-JJASCVEW.mjs +0 -59
- package/dist/chunk-QU7E5HBA.mjs +0 -106
- package/dist/chunk-SDTOKWM2.mjs +0 -39
- package/dist/chunk-UMU2KIB6.mjs +0 -68
|
@@ -63,43 +63,83 @@ var RNNoisePlugin = class {
|
|
|
63
63
|
async createNode(context, config) {
|
|
64
64
|
const { loadRnnoise, RnnoiseWorkletNode } = await import("@sapphi-red/web-noise-suppressor");
|
|
65
65
|
if (!config?.enabled) {
|
|
66
|
+
console.log("Noise suppression disabled, using passthrough node");
|
|
66
67
|
const pass = context.createGain();
|
|
67
68
|
return pass;
|
|
68
69
|
}
|
|
69
70
|
if (!config?.wasmUrl || !config?.simdUrl || !config?.workletUrl) {
|
|
70
|
-
|
|
71
|
-
|
|
71
|
+
const error = new Error(
|
|
72
|
+
`RNNoisePlugin requires 'wasmUrl', 'simdUrl', and 'workletUrl' to be configured. Please download the assets from @sapphi-red/web-noise-suppressor and provide the URLs in the config. Current config: wasmUrl=${config?.wasmUrl}, simdUrl=${config?.simdUrl}, workletUrl=${config?.workletUrl}
|
|
73
|
+
To disable noise suppression, set noiseSuppression.enabled to false.`
|
|
72
74
|
);
|
|
75
|
+
console.error(error.message);
|
|
76
|
+
throw error;
|
|
73
77
|
}
|
|
74
|
-
|
|
75
|
-
this.wasmBuffer
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
78
|
+
try {
|
|
79
|
+
if (!this.wasmBuffer) {
|
|
80
|
+
console.log("Loading RNNoise WASM binary...");
|
|
81
|
+
this.wasmBuffer = await loadRnnoise({
|
|
82
|
+
url: config.wasmUrl,
|
|
83
|
+
simdUrl: config.simdUrl
|
|
84
|
+
});
|
|
85
|
+
console.log("RNNoise WASM loaded successfully");
|
|
86
|
+
}
|
|
87
|
+
} catch (error) {
|
|
88
|
+
const err = new Error(
|
|
89
|
+
`Failed to load RNNoise WASM binary: ${error instanceof Error ? error.message : String(error)}`
|
|
90
|
+
);
|
|
91
|
+
console.error(err);
|
|
92
|
+
throw err;
|
|
79
93
|
}
|
|
80
94
|
const workletUrl = config.workletUrl;
|
|
81
95
|
try {
|
|
82
96
|
await context.audioWorklet.addModule(workletUrl);
|
|
97
|
+
console.log("RNNoise worklet loaded successfully");
|
|
83
98
|
} catch (e) {
|
|
84
|
-
|
|
99
|
+
const error = new Error(
|
|
100
|
+
`Failed to load RNNoise worklet from ${workletUrl}: ${e instanceof Error ? e.message : String(e)}. Ensure the workletUrl points to a valid RNNoise worklet script.`
|
|
101
|
+
);
|
|
102
|
+
console.error(error.message);
|
|
103
|
+
throw error;
|
|
104
|
+
}
|
|
105
|
+
try {
|
|
106
|
+
const node = new RnnoiseWorkletNode(context, {
|
|
107
|
+
wasmBinary: this.wasmBuffer,
|
|
108
|
+
maxChannels: 1
|
|
109
|
+
// Mono for now
|
|
110
|
+
});
|
|
111
|
+
console.log("RNNoise worklet node created successfully");
|
|
112
|
+
return node;
|
|
113
|
+
} catch (error) {
|
|
114
|
+
const err = new Error(
|
|
115
|
+
`Failed to create RNNoise worklet node: ${error instanceof Error ? error.message : String(error)}`
|
|
116
|
+
);
|
|
117
|
+
console.error(err);
|
|
118
|
+
throw err;
|
|
85
119
|
}
|
|
86
|
-
const node = new RnnoiseWorkletNode(context, {
|
|
87
|
-
wasmBinary: this.wasmBuffer,
|
|
88
|
-
maxChannels: 1
|
|
89
|
-
// Mono for now
|
|
90
|
-
});
|
|
91
|
-
return node;
|
|
92
120
|
}
|
|
93
121
|
};
|
|
94
122
|
|
|
95
123
|
// src/vad/vad-node.ts
|
|
96
|
-
var
|
|
124
|
+
var createEnergyVadWorkletCode = (vadConfig) => {
|
|
125
|
+
const energyParams = vadConfig?.energyVad || {};
|
|
126
|
+
const smoothing = energyParams.smoothing ?? 0.95;
|
|
127
|
+
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
128
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
129
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
|
|
130
|
+
const minSNR = energyParams.minSNR ?? 2;
|
|
131
|
+
const snrRange = energyParams.snrRange ?? 8;
|
|
132
|
+
return `
|
|
97
133
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
98
134
|
constructor() {
|
|
99
135
|
super();
|
|
100
|
-
this.smoothing =
|
|
136
|
+
this.smoothing = ${smoothing};
|
|
101
137
|
this.energy = 0;
|
|
102
|
-
this.noiseFloor =
|
|
138
|
+
this.noiseFloor = ${initialNoiseFloor};
|
|
139
|
+
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
140
|
+
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
141
|
+
this.minSNR = ${minSNR};
|
|
142
|
+
this.snrRange = ${snrRange};
|
|
103
143
|
}
|
|
104
144
|
|
|
105
145
|
process(inputs, outputs, parameters) {
|
|
@@ -107,51 +147,89 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
107
147
|
if (!input || !input.length) return true;
|
|
108
148
|
const channel = input[0];
|
|
109
149
|
|
|
110
|
-
// Calculate RMS
|
|
150
|
+
// Calculate RMS (Root Mean Square) energy
|
|
111
151
|
let sum = 0;
|
|
112
152
|
for (let i = 0; i < channel.length; i++) {
|
|
113
153
|
sum += channel[i] * channel[i];
|
|
114
154
|
}
|
|
115
155
|
const rms = Math.sqrt(sum / channel.length);
|
|
116
156
|
|
|
117
|
-
//
|
|
157
|
+
// Adaptive noise floor estimation
|
|
158
|
+
// When signal is quiet, adapt quickly to find new noise floor
|
|
159
|
+
// When signal is loud (speech), adapt slowly to avoid raising noise floor
|
|
118
160
|
if (rms < this.noiseFloor) {
|
|
119
|
-
this.noiseFloor = this.noiseFloor *
|
|
161
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
120
162
|
} else {
|
|
121
|
-
this.noiseFloor = this.noiseFloor *
|
|
163
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
122
164
|
}
|
|
123
165
|
|
|
124
|
-
// Calculate
|
|
125
|
-
// This is a heuristic mapping from energy to 0-1
|
|
166
|
+
// Calculate Signal-to-Noise Ratio (SNR)
|
|
126
167
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
127
|
-
|
|
168
|
+
|
|
169
|
+
// Map SNR to probability (0-1)
|
|
170
|
+
// Probability is 0 when SNR <= minSNR
|
|
171
|
+
// Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
|
|
172
|
+
// Probability is 1 when SNR >= (minSNR + snrRange)
|
|
173
|
+
const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
|
|
128
174
|
|
|
129
|
-
this.port.postMessage({ probability });
|
|
175
|
+
this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
|
|
130
176
|
|
|
131
177
|
return true;
|
|
132
178
|
}
|
|
133
179
|
}
|
|
134
180
|
registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
135
181
|
`;
|
|
182
|
+
};
|
|
136
183
|
var EnergyVADPlugin = class {
|
|
137
184
|
name = "energy-vad";
|
|
138
185
|
async createNode(context, config, onDecision) {
|
|
139
|
-
|
|
186
|
+
if (!config?.enabled) {
|
|
187
|
+
console.log("VAD disabled, using passthrough node");
|
|
188
|
+
const pass = context.createGain();
|
|
189
|
+
return pass;
|
|
190
|
+
}
|
|
191
|
+
const workletCode = createEnergyVadWorkletCode(config);
|
|
192
|
+
const blob = new Blob([workletCode], {
|
|
140
193
|
type: "application/javascript"
|
|
141
194
|
});
|
|
142
195
|
const url = URL.createObjectURL(blob);
|
|
143
196
|
try {
|
|
144
197
|
await context.audioWorklet.addModule(url);
|
|
198
|
+
console.log("Energy VAD worklet loaded successfully");
|
|
145
199
|
} catch (e) {
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
200
|
+
const error = new Error(
|
|
201
|
+
`Failed to load Energy VAD worklet: ${e instanceof Error ? e.message : String(e)}`
|
|
202
|
+
);
|
|
203
|
+
console.error(error.message);
|
|
149
204
|
URL.revokeObjectURL(url);
|
|
205
|
+
throw error;
|
|
206
|
+
}
|
|
207
|
+
URL.revokeObjectURL(url);
|
|
208
|
+
let node;
|
|
209
|
+
try {
|
|
210
|
+
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
211
|
+
console.log("Energy VAD node created successfully");
|
|
212
|
+
} catch (e) {
|
|
213
|
+
const error = new Error(
|
|
214
|
+
`Failed to create Energy VAD node: ${e instanceof Error ? e.message : String(e)}`
|
|
215
|
+
);
|
|
216
|
+
console.error(error.message);
|
|
217
|
+
throw error;
|
|
150
218
|
}
|
|
151
|
-
const node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
152
219
|
node.port.onmessage = (event) => {
|
|
153
|
-
|
|
154
|
-
|
|
220
|
+
try {
|
|
221
|
+
const { probability } = event.data;
|
|
222
|
+
if (typeof probability === "number" && !isNaN(probability)) {
|
|
223
|
+
onDecision(probability);
|
|
224
|
+
} else {
|
|
225
|
+
console.warn("Invalid VAD probability received:", event.data);
|
|
226
|
+
}
|
|
227
|
+
} catch (error) {
|
|
228
|
+
console.error("Error in VAD message handler:", error);
|
|
229
|
+
}
|
|
230
|
+
};
|
|
231
|
+
node.port.onmessageerror = (event) => {
|
|
232
|
+
console.error("VAD port message error:", event);
|
|
155
233
|
};
|
|
156
234
|
return node;
|
|
157
235
|
}
|
|
@@ -191,31 +269,60 @@ var VADStateMachine = class {
|
|
|
191
269
|
currentState = "silent";
|
|
192
270
|
lastSpeechTime = 0;
|
|
193
271
|
speechStartTime = 0;
|
|
272
|
+
lastSilenceTime = 0;
|
|
194
273
|
frameDurationMs = 20;
|
|
195
274
|
// Assumed frame duration, updated by calls
|
|
196
275
|
constructor(config) {
|
|
197
276
|
this.config = {
|
|
198
277
|
enabled: config?.enabled ?? true,
|
|
199
278
|
pluginName: config?.pluginName ?? "energy-vad",
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
279
|
+
// Voice-optimized defaults
|
|
280
|
+
startThreshold: config?.startThreshold ?? 0.6,
|
|
281
|
+
// Higher threshold to avoid noise
|
|
282
|
+
stopThreshold: config?.stopThreshold ?? 0.45,
|
|
283
|
+
// Balanced for voice
|
|
284
|
+
hangoverMs: config?.hangoverMs ?? 400,
|
|
285
|
+
// Smooth for natural speech
|
|
286
|
+
preRollMs: config?.preRollMs ?? 250,
|
|
287
|
+
// Generous pre-roll
|
|
288
|
+
minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
|
|
289
|
+
minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
|
|
290
|
+
energyVad: {
|
|
291
|
+
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
292
|
+
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
293
|
+
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
|
|
294
|
+
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
|
|
295
|
+
minSNR: config?.energyVad?.minSNR ?? 2,
|
|
296
|
+
snrRange: config?.energyVad?.snrRange ?? 8
|
|
297
|
+
}
|
|
204
298
|
};
|
|
299
|
+
this.lastSilenceTime = Date.now();
|
|
205
300
|
}
|
|
206
301
|
updateConfig(config) {
|
|
207
302
|
this.config = { ...this.config, ...config };
|
|
208
303
|
}
|
|
209
304
|
processFrame(probability, timestamp) {
|
|
210
|
-
const {
|
|
305
|
+
const {
|
|
306
|
+
startThreshold,
|
|
307
|
+
stopThreshold,
|
|
308
|
+
hangoverMs,
|
|
309
|
+
minSpeechDurationMs,
|
|
310
|
+
minSilenceDurationMs
|
|
311
|
+
} = this.config;
|
|
211
312
|
let newState = this.currentState;
|
|
212
313
|
if (this.currentState === "silent" || this.currentState === "speech_ending") {
|
|
213
314
|
if (probability >= startThreshold) {
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
315
|
+
const silenceDuration = timestamp - this.lastSilenceTime;
|
|
316
|
+
if (silenceDuration >= minSilenceDurationMs) {
|
|
317
|
+
newState = "speech_starting";
|
|
318
|
+
this.speechStartTime = timestamp;
|
|
319
|
+
this.lastSpeechTime = timestamp;
|
|
320
|
+
} else {
|
|
321
|
+
newState = "silent";
|
|
322
|
+
}
|
|
217
323
|
} else {
|
|
218
324
|
newState = "silent";
|
|
325
|
+
this.lastSilenceTime = timestamp;
|
|
219
326
|
}
|
|
220
327
|
} else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
|
|
221
328
|
if (probability >= stopThreshold) {
|
|
@@ -223,10 +330,15 @@ var VADStateMachine = class {
|
|
|
223
330
|
this.lastSpeechTime = timestamp;
|
|
224
331
|
} else {
|
|
225
332
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
333
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
226
334
|
if (timeSinceSpeech < hangoverMs) {
|
|
227
335
|
newState = "speaking";
|
|
336
|
+
} else if (speechDuration < minSpeechDurationMs) {
|
|
337
|
+
newState = "silent";
|
|
338
|
+
this.lastSilenceTime = timestamp;
|
|
228
339
|
} else {
|
|
229
340
|
newState = "speech_ending";
|
|
341
|
+
this.lastSilenceTime = timestamp;
|
|
230
342
|
}
|
|
231
343
|
}
|
|
232
344
|
}
|
|
@@ -245,42 +357,106 @@ var VADStateMachine = class {
|
|
|
245
357
|
async function createAudioPipeline(sourceTrack, config = {}) {
|
|
246
358
|
const context = getAudioContext();
|
|
247
359
|
registerPipeline();
|
|
360
|
+
const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
|
|
361
|
+
config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
|
|
362
|
+
);
|
|
363
|
+
const vadEnabled = config.vad?.enabled !== false;
|
|
248
364
|
const fullConfig = {
|
|
249
|
-
noiseSuppression: {
|
|
250
|
-
|
|
365
|
+
noiseSuppression: {
|
|
366
|
+
enabled: nsEnabled,
|
|
367
|
+
...config.noiseSuppression
|
|
368
|
+
},
|
|
369
|
+
vad: {
|
|
370
|
+
enabled: vadEnabled,
|
|
371
|
+
// Voice-optimized defaults (will be overridden by config)
|
|
372
|
+
startThreshold: 0.6,
|
|
373
|
+
stopThreshold: 0.45,
|
|
374
|
+
hangoverMs: 400,
|
|
375
|
+
preRollMs: 250,
|
|
376
|
+
minSpeechDurationMs: 100,
|
|
377
|
+
minSilenceDurationMs: 150,
|
|
378
|
+
energyVad: {
|
|
379
|
+
smoothing: 0.95,
|
|
380
|
+
initialNoiseFloor: 1e-3,
|
|
381
|
+
noiseFloorAdaptRateQuiet: 0.01,
|
|
382
|
+
noiseFloorAdaptRateLoud: 1e-3,
|
|
383
|
+
minSNR: 2,
|
|
384
|
+
snrRange: 8
|
|
385
|
+
},
|
|
386
|
+
...config.vad
|
|
387
|
+
},
|
|
251
388
|
output: {
|
|
252
389
|
speechGain: 1,
|
|
253
390
|
silenceGain: 0,
|
|
254
|
-
|
|
391
|
+
// Full mute for voice-only
|
|
392
|
+
gainRampTime: 0.015,
|
|
393
|
+
// Fast but smooth transitions
|
|
394
|
+
smoothTransitions: true,
|
|
395
|
+
maxGainDb: 6,
|
|
396
|
+
enableCompression: false,
|
|
397
|
+
compression: {
|
|
398
|
+
threshold: -24,
|
|
399
|
+
ratio: 3,
|
|
400
|
+
attack: 3e-3,
|
|
401
|
+
release: 0.05
|
|
402
|
+
},
|
|
255
403
|
...config.output
|
|
256
404
|
},
|
|
257
405
|
livekit: { manageTrackMute: false, ...config.livekit }
|
|
258
406
|
};
|
|
407
|
+
console.log("Audio pipeline config:", {
|
|
408
|
+
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
409
|
+
vad: fullConfig.vad?.enabled,
|
|
410
|
+
output: fullConfig.output
|
|
411
|
+
});
|
|
412
|
+
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
413
|
+
throw new Error(
|
|
414
|
+
"createAudioPipeline requires a valid audio MediaStreamTrack"
|
|
415
|
+
);
|
|
416
|
+
}
|
|
417
|
+
if (sourceTrack.readyState === "ended") {
|
|
418
|
+
throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
|
|
419
|
+
}
|
|
259
420
|
const sourceStream = new MediaStream([sourceTrack]);
|
|
260
421
|
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
);
|
|
264
|
-
const nsNode = await nsPlugin.createNode(
|
|
265
|
-
context,
|
|
266
|
-
fullConfig.noiseSuppression
|
|
267
|
-
);
|
|
268
|
-
const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
269
|
-
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
422
|
+
let nsNode;
|
|
423
|
+
let vadNode;
|
|
270
424
|
const emitter = (0, import_mitt.default)();
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
425
|
+
try {
|
|
426
|
+
const nsPlugin = getNoiseSuppressionPlugin(
|
|
427
|
+
fullConfig.noiseSuppression?.pluginName
|
|
428
|
+
);
|
|
429
|
+
nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
|
|
430
|
+
} catch (error) {
|
|
431
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
432
|
+
console.error("Failed to create noise suppression node:", err);
|
|
433
|
+
emitter.emit("error", err);
|
|
434
|
+
throw err;
|
|
435
|
+
}
|
|
436
|
+
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
437
|
+
try {
|
|
438
|
+
const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
439
|
+
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
440
|
+
try {
|
|
441
|
+
const timestamp = context.currentTime * 1e3;
|
|
442
|
+
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
443
|
+
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
444
|
+
emitter.emit("vadChange", newState);
|
|
445
|
+
lastVadState = newState;
|
|
446
|
+
updateGain(newState);
|
|
447
|
+
}
|
|
448
|
+
} catch (vadError) {
|
|
449
|
+
const err = vadError instanceof Error ? vadError : new Error(String(vadError));
|
|
450
|
+
console.error("Error in VAD callback:", err);
|
|
451
|
+
emitter.emit("error", err);
|
|
281
452
|
}
|
|
282
|
-
}
|
|
283
|
-
)
|
|
453
|
+
});
|
|
454
|
+
} catch (error) {
|
|
455
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
456
|
+
console.error("Failed to create VAD node:", err);
|
|
457
|
+
emitter.emit("error", err);
|
|
458
|
+
throw err;
|
|
459
|
+
}
|
|
284
460
|
let lastVadState = {
|
|
285
461
|
isSpeaking: false,
|
|
286
462
|
probability: 0,
|
|
@@ -291,39 +467,176 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
291
467
|
nsNode.connect(splitter);
|
|
292
468
|
splitter.connect(vadNode);
|
|
293
469
|
const delayNode = context.createDelay(1);
|
|
294
|
-
const preRollSeconds = (fullConfig.vad?.preRollMs ??
|
|
470
|
+
const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
|
|
295
471
|
delayNode.delayTime.value = preRollSeconds;
|
|
296
472
|
const gainNode = context.createGain();
|
|
297
473
|
gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
|
|
474
|
+
let compressor = null;
|
|
475
|
+
if (fullConfig.output?.enableCompression) {
|
|
476
|
+
compressor = context.createDynamicsCompressor();
|
|
477
|
+
const comp = fullConfig.output.compression;
|
|
478
|
+
compressor.threshold.value = comp.threshold ?? -24;
|
|
479
|
+
compressor.ratio.value = comp.ratio ?? 3;
|
|
480
|
+
compressor.attack.value = comp.attack ?? 3e-3;
|
|
481
|
+
compressor.release.value = comp.release ?? 0.05;
|
|
482
|
+
compressor.knee.value = 10;
|
|
483
|
+
}
|
|
298
484
|
const destination = context.createMediaStreamDestination();
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
485
|
+
try {
|
|
486
|
+
splitter.connect(delayNode);
|
|
487
|
+
delayNode.connect(gainNode);
|
|
488
|
+
if (compressor) {
|
|
489
|
+
gainNode.connect(compressor);
|
|
490
|
+
compressor.connect(destination);
|
|
491
|
+
console.log("Compression enabled:", fullConfig.output?.compression);
|
|
492
|
+
} else {
|
|
493
|
+
gainNode.connect(destination);
|
|
494
|
+
}
|
|
495
|
+
} catch (error) {
|
|
496
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
497
|
+
console.error("Failed to wire audio pipeline:", err);
|
|
498
|
+
emitter.emit("error", err);
|
|
499
|
+
throw err;
|
|
500
|
+
}
|
|
302
501
|
function updateGain(state) {
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
502
|
+
try {
|
|
503
|
+
const {
|
|
504
|
+
speechGain = 1,
|
|
505
|
+
silenceGain = 0,
|
|
506
|
+
gainRampTime = 0.015,
|
|
507
|
+
smoothTransitions = true,
|
|
508
|
+
maxGainDb = 6
|
|
509
|
+
} = fullConfig.output;
|
|
510
|
+
const maxGainLinear = Math.pow(10, maxGainDb / 20);
|
|
511
|
+
const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
|
|
512
|
+
const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
|
|
513
|
+
const now = context.currentTime;
|
|
514
|
+
if (smoothTransitions) {
|
|
515
|
+
gainNode.gain.cancelScheduledValues(now);
|
|
516
|
+
gainNode.gain.setValueAtTime(gainNode.gain.value, now);
|
|
517
|
+
gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
|
|
518
|
+
} else {
|
|
519
|
+
gainNode.gain.setValueAtTime(targetGain, now);
|
|
520
|
+
}
|
|
521
|
+
} catch (error) {
|
|
522
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
523
|
+
console.error("Failed to update gain:", err);
|
|
524
|
+
emitter.emit("error", err);
|
|
525
|
+
}
|
|
307
526
|
}
|
|
527
|
+
const audioTracks = destination.stream.getAudioTracks();
|
|
528
|
+
console.log("Destination stream tracks:", {
|
|
529
|
+
count: audioTracks.length,
|
|
530
|
+
tracks: audioTracks.map((t) => ({
|
|
531
|
+
id: t.id,
|
|
532
|
+
label: t.label,
|
|
533
|
+
enabled: t.enabled,
|
|
534
|
+
readyState: t.readyState
|
|
535
|
+
}))
|
|
536
|
+
});
|
|
537
|
+
if (audioTracks.length === 0) {
|
|
538
|
+
const err = new Error(
|
|
539
|
+
"Failed to create processed audio track: destination stream has no audio tracks. This may indicate an issue with the audio graph connection."
|
|
540
|
+
);
|
|
541
|
+
console.error(err);
|
|
542
|
+
emitter.emit("error", err);
|
|
543
|
+
throw err;
|
|
544
|
+
}
|
|
545
|
+
const processedTrack = audioTracks[0];
|
|
546
|
+
if (!processedTrack || processedTrack.readyState === "ended") {
|
|
547
|
+
const err = new Error("Processed audio track is invalid or ended");
|
|
548
|
+
console.error(err);
|
|
549
|
+
emitter.emit("error", err);
|
|
550
|
+
throw err;
|
|
551
|
+
}
|
|
552
|
+
console.log("Audio pipeline created successfully:", {
|
|
553
|
+
sourceTrack: {
|
|
554
|
+
id: sourceTrack.id,
|
|
555
|
+
label: sourceTrack.label,
|
|
556
|
+
readyState: sourceTrack.readyState
|
|
557
|
+
},
|
|
558
|
+
processedTrack: {
|
|
559
|
+
id: processedTrack.id,
|
|
560
|
+
label: processedTrack.label,
|
|
561
|
+
readyState: processedTrack.readyState
|
|
562
|
+
},
|
|
563
|
+
config: {
|
|
564
|
+
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
565
|
+
vad: fullConfig.vad?.enabled
|
|
566
|
+
}
|
|
567
|
+
});
|
|
308
568
|
function dispose() {
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
569
|
+
try {
|
|
570
|
+
sourceNode.disconnect();
|
|
571
|
+
nsNode.disconnect();
|
|
572
|
+
splitter.disconnect();
|
|
573
|
+
vadNode.disconnect();
|
|
574
|
+
delayNode.disconnect();
|
|
575
|
+
gainNode.disconnect();
|
|
576
|
+
if (compressor) {
|
|
577
|
+
compressor.disconnect();
|
|
578
|
+
}
|
|
579
|
+
destination.stream.getTracks().forEach((t) => t.stop());
|
|
580
|
+
unregisterPipeline();
|
|
581
|
+
} catch (error) {
|
|
582
|
+
console.error("Error during pipeline disposal:", error);
|
|
583
|
+
}
|
|
317
584
|
}
|
|
318
585
|
return {
|
|
319
|
-
processedTrack
|
|
586
|
+
processedTrack,
|
|
320
587
|
events: emitter,
|
|
321
588
|
get state() {
|
|
322
589
|
return lastVadState;
|
|
323
590
|
},
|
|
324
591
|
setConfig: (newConfig) => {
|
|
325
|
-
|
|
326
|
-
|
|
592
|
+
try {
|
|
593
|
+
if (newConfig.vad) {
|
|
594
|
+
vadStateMachine.updateConfig(newConfig.vad);
|
|
595
|
+
Object.assign(fullConfig.vad, newConfig.vad);
|
|
596
|
+
if (newConfig.vad.preRollMs !== void 0) {
|
|
597
|
+
const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
|
|
598
|
+
delayNode.delayTime.setValueAtTime(
|
|
599
|
+
preRollSeconds2,
|
|
600
|
+
context.currentTime
|
|
601
|
+
);
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
if (newConfig.output) {
|
|
605
|
+
Object.assign(fullConfig.output, newConfig.output);
|
|
606
|
+
updateGain(lastVadState);
|
|
607
|
+
if (compressor && newConfig.output.compression) {
|
|
608
|
+
const comp = newConfig.output.compression;
|
|
609
|
+
if (comp.threshold !== void 0) {
|
|
610
|
+
compressor.threshold.setValueAtTime(
|
|
611
|
+
comp.threshold,
|
|
612
|
+
context.currentTime
|
|
613
|
+
);
|
|
614
|
+
}
|
|
615
|
+
if (comp.ratio !== void 0) {
|
|
616
|
+
compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
|
|
617
|
+
}
|
|
618
|
+
if (comp.attack !== void 0) {
|
|
619
|
+
compressor.attack.setValueAtTime(
|
|
620
|
+
comp.attack,
|
|
621
|
+
context.currentTime
|
|
622
|
+
);
|
|
623
|
+
}
|
|
624
|
+
if (comp.release !== void 0) {
|
|
625
|
+
compressor.release.setValueAtTime(
|
|
626
|
+
comp.release,
|
|
627
|
+
context.currentTime
|
|
628
|
+
);
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
if (newConfig.livekit) {
|
|
633
|
+
Object.assign(fullConfig.livekit, newConfig.livekit);
|
|
634
|
+
}
|
|
635
|
+
console.log("Pipeline config updated:", newConfig);
|
|
636
|
+
} catch (error) {
|
|
637
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
638
|
+
console.error("Failed to update config:", err);
|
|
639
|
+
emitter.emit("error", err);
|
|
327
640
|
}
|
|
328
641
|
},
|
|
329
642
|
dispose
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createAudioPipeline
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-
|
|
3
|
+
} from "../chunk-AHBRT4RD.mjs";
|
|
4
|
+
import "../chunk-N553RHTI.mjs";
|
|
5
5
|
import "../chunk-OZ7KMC4S.mjs";
|
|
6
|
-
import "../chunk-
|
|
7
|
-
import "../chunk-
|
|
8
|
-
import "../chunk-
|
|
6
|
+
import "../chunk-YOSTLLCS.mjs";
|
|
7
|
+
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
+
import "../chunk-NMHKX64G.mjs";
|
|
9
9
|
export {
|
|
10
10
|
createAudioPipeline
|
|
11
11
|
};
|