@tensamin/audio 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +218 -30
- package/dist/chunk-AHBRT4RD.mjs +307 -0
- package/dist/chunk-ERJVV5JR.mjs +91 -0
- package/dist/chunk-N553RHTI.mjs +93 -0
- package/dist/chunk-NMHKX64G.mjs +118 -0
- package/dist/chunk-XO6B3D4A.mjs +67 -0
- package/dist/{chunk-FS635GMR.mjs → chunk-YOSTLLCS.mjs} +2 -2
- package/dist/extensibility/plugins.js +110 -32
- package/dist/extensibility/plugins.mjs +3 -3
- package/dist/index.js +463 -97
- package/dist/index.mjs +6 -6
- package/dist/livekit/integration.js +463 -97
- package/dist/livekit/integration.mjs +6 -6
- package/dist/noise-suppression/rnnoise-node.js +42 -14
- package/dist/noise-suppression/rnnoise-node.mjs +1 -1
- package/dist/pipeline/audio-pipeline.js +396 -83
- package/dist/pipeline/audio-pipeline.mjs +5 -5
- package/dist/types.d.mts +118 -10
- package/dist/types.d.ts +118 -10
- package/dist/vad/vad-node.js +68 -18
- package/dist/vad/vad-node.mjs +1 -1
- package/dist/vad/vad-state.d.mts +1 -0
- package/dist/vad/vad-state.d.ts +1 -0
- package/dist/vad/vad-state.js +42 -8
- package/dist/vad/vad-state.mjs +1 -1
- package/package.json +1 -1
- package/dist/chunk-HFSKQ33X.mjs +0 -38
- package/dist/chunk-JJASCVEW.mjs +0 -59
- package/dist/chunk-QU7E5HBA.mjs +0 -106
- package/dist/chunk-SDTOKWM2.mjs +0 -39
- package/dist/chunk-UMU2KIB6.mjs +0 -68
package/dist/index.js
CHANGED
|
@@ -96,43 +96,83 @@ var RNNoisePlugin = class {
|
|
|
96
96
|
async createNode(context, config) {
|
|
97
97
|
const { loadRnnoise, RnnoiseWorkletNode } = await import("@sapphi-red/web-noise-suppressor");
|
|
98
98
|
if (!config?.enabled) {
|
|
99
|
+
console.log("Noise suppression disabled, using passthrough node");
|
|
99
100
|
const pass = context.createGain();
|
|
100
101
|
return pass;
|
|
101
102
|
}
|
|
102
103
|
if (!config?.wasmUrl || !config?.simdUrl || !config?.workletUrl) {
|
|
103
|
-
|
|
104
|
-
|
|
104
|
+
const error = new Error(
|
|
105
|
+
`RNNoisePlugin requires 'wasmUrl', 'simdUrl', and 'workletUrl' to be configured. Please download the assets from @sapphi-red/web-noise-suppressor and provide the URLs in the config. Current config: wasmUrl=${config?.wasmUrl}, simdUrl=${config?.simdUrl}, workletUrl=${config?.workletUrl}
|
|
106
|
+
To disable noise suppression, set noiseSuppression.enabled to false.`
|
|
105
107
|
);
|
|
108
|
+
console.error(error.message);
|
|
109
|
+
throw error;
|
|
106
110
|
}
|
|
107
|
-
|
|
108
|
-
this.wasmBuffer
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
111
|
+
try {
|
|
112
|
+
if (!this.wasmBuffer) {
|
|
113
|
+
console.log("Loading RNNoise WASM binary...");
|
|
114
|
+
this.wasmBuffer = await loadRnnoise({
|
|
115
|
+
url: config.wasmUrl,
|
|
116
|
+
simdUrl: config.simdUrl
|
|
117
|
+
});
|
|
118
|
+
console.log("RNNoise WASM loaded successfully");
|
|
119
|
+
}
|
|
120
|
+
} catch (error) {
|
|
121
|
+
const err = new Error(
|
|
122
|
+
`Failed to load RNNoise WASM binary: ${error instanceof Error ? error.message : String(error)}`
|
|
123
|
+
);
|
|
124
|
+
console.error(err);
|
|
125
|
+
throw err;
|
|
112
126
|
}
|
|
113
127
|
const workletUrl = config.workletUrl;
|
|
114
128
|
try {
|
|
115
129
|
await context.audioWorklet.addModule(workletUrl);
|
|
130
|
+
console.log("RNNoise worklet loaded successfully");
|
|
116
131
|
} catch (e) {
|
|
117
|
-
|
|
132
|
+
const error = new Error(
|
|
133
|
+
`Failed to load RNNoise worklet from ${workletUrl}: ${e instanceof Error ? e.message : String(e)}. Ensure the workletUrl points to a valid RNNoise worklet script.`
|
|
134
|
+
);
|
|
135
|
+
console.error(error.message);
|
|
136
|
+
throw error;
|
|
137
|
+
}
|
|
138
|
+
try {
|
|
139
|
+
const node = new RnnoiseWorkletNode(context, {
|
|
140
|
+
wasmBinary: this.wasmBuffer,
|
|
141
|
+
maxChannels: 1
|
|
142
|
+
// Mono for now
|
|
143
|
+
});
|
|
144
|
+
console.log("RNNoise worklet node created successfully");
|
|
145
|
+
return node;
|
|
146
|
+
} catch (error) {
|
|
147
|
+
const err = new Error(
|
|
148
|
+
`Failed to create RNNoise worklet node: ${error instanceof Error ? error.message : String(error)}`
|
|
149
|
+
);
|
|
150
|
+
console.error(err);
|
|
151
|
+
throw err;
|
|
118
152
|
}
|
|
119
|
-
const node = new RnnoiseWorkletNode(context, {
|
|
120
|
-
wasmBinary: this.wasmBuffer,
|
|
121
|
-
maxChannels: 1
|
|
122
|
-
// Mono for now
|
|
123
|
-
});
|
|
124
|
-
return node;
|
|
125
153
|
}
|
|
126
154
|
};
|
|
127
155
|
|
|
128
156
|
// src/vad/vad-node.ts
|
|
129
|
-
var
|
|
157
|
+
var createEnergyVadWorkletCode = (vadConfig) => {
|
|
158
|
+
const energyParams = vadConfig?.energyVad || {};
|
|
159
|
+
const smoothing = energyParams.smoothing ?? 0.95;
|
|
160
|
+
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
161
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
162
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
|
|
163
|
+
const minSNR = energyParams.minSNR ?? 2;
|
|
164
|
+
const snrRange = energyParams.snrRange ?? 8;
|
|
165
|
+
return `
|
|
130
166
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
131
167
|
constructor() {
|
|
132
168
|
super();
|
|
133
|
-
this.smoothing =
|
|
169
|
+
this.smoothing = ${smoothing};
|
|
134
170
|
this.energy = 0;
|
|
135
|
-
this.noiseFloor =
|
|
171
|
+
this.noiseFloor = ${initialNoiseFloor};
|
|
172
|
+
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
173
|
+
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
174
|
+
this.minSNR = ${minSNR};
|
|
175
|
+
this.snrRange = ${snrRange};
|
|
136
176
|
}
|
|
137
177
|
|
|
138
178
|
process(inputs, outputs, parameters) {
|
|
@@ -140,51 +180,89 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
140
180
|
if (!input || !input.length) return true;
|
|
141
181
|
const channel = input[0];
|
|
142
182
|
|
|
143
|
-
// Calculate RMS
|
|
183
|
+
// Calculate RMS (Root Mean Square) energy
|
|
144
184
|
let sum = 0;
|
|
145
185
|
for (let i = 0; i < channel.length; i++) {
|
|
146
186
|
sum += channel[i] * channel[i];
|
|
147
187
|
}
|
|
148
188
|
const rms = Math.sqrt(sum / channel.length);
|
|
149
189
|
|
|
150
|
-
//
|
|
190
|
+
// Adaptive noise floor estimation
|
|
191
|
+
// When signal is quiet, adapt quickly to find new noise floor
|
|
192
|
+
// When signal is loud (speech), adapt slowly to avoid raising noise floor
|
|
151
193
|
if (rms < this.noiseFloor) {
|
|
152
|
-
this.noiseFloor = this.noiseFloor *
|
|
194
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
153
195
|
} else {
|
|
154
|
-
this.noiseFloor = this.noiseFloor *
|
|
196
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
155
197
|
}
|
|
156
198
|
|
|
157
|
-
// Calculate
|
|
158
|
-
// This is a heuristic mapping from energy to 0-1
|
|
199
|
+
// Calculate Signal-to-Noise Ratio (SNR)
|
|
159
200
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
160
|
-
|
|
201
|
+
|
|
202
|
+
// Map SNR to probability (0-1)
|
|
203
|
+
// Probability is 0 when SNR <= minSNR
|
|
204
|
+
// Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
|
|
205
|
+
// Probability is 1 when SNR >= (minSNR + snrRange)
|
|
206
|
+
const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
|
|
161
207
|
|
|
162
|
-
this.port.postMessage({ probability });
|
|
208
|
+
this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
|
|
163
209
|
|
|
164
210
|
return true;
|
|
165
211
|
}
|
|
166
212
|
}
|
|
167
213
|
registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
168
214
|
`;
|
|
215
|
+
};
|
|
169
216
|
var EnergyVADPlugin = class {
|
|
170
217
|
name = "energy-vad";
|
|
171
218
|
async createNode(context, config, onDecision) {
|
|
172
|
-
|
|
219
|
+
if (!config?.enabled) {
|
|
220
|
+
console.log("VAD disabled, using passthrough node");
|
|
221
|
+
const pass = context.createGain();
|
|
222
|
+
return pass;
|
|
223
|
+
}
|
|
224
|
+
const workletCode = createEnergyVadWorkletCode(config);
|
|
225
|
+
const blob = new Blob([workletCode], {
|
|
173
226
|
type: "application/javascript"
|
|
174
227
|
});
|
|
175
228
|
const url = URL.createObjectURL(blob);
|
|
176
229
|
try {
|
|
177
230
|
await context.audioWorklet.addModule(url);
|
|
231
|
+
console.log("Energy VAD worklet loaded successfully");
|
|
178
232
|
} catch (e) {
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
233
|
+
const error = new Error(
|
|
234
|
+
`Failed to load Energy VAD worklet: ${e instanceof Error ? e.message : String(e)}`
|
|
235
|
+
);
|
|
236
|
+
console.error(error.message);
|
|
182
237
|
URL.revokeObjectURL(url);
|
|
238
|
+
throw error;
|
|
239
|
+
}
|
|
240
|
+
URL.revokeObjectURL(url);
|
|
241
|
+
let node;
|
|
242
|
+
try {
|
|
243
|
+
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
244
|
+
console.log("Energy VAD node created successfully");
|
|
245
|
+
} catch (e) {
|
|
246
|
+
const error = new Error(
|
|
247
|
+
`Failed to create Energy VAD node: ${e instanceof Error ? e.message : String(e)}`
|
|
248
|
+
);
|
|
249
|
+
console.error(error.message);
|
|
250
|
+
throw error;
|
|
183
251
|
}
|
|
184
|
-
const node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
185
252
|
node.port.onmessage = (event) => {
|
|
186
|
-
|
|
187
|
-
|
|
253
|
+
try {
|
|
254
|
+
const { probability } = event.data;
|
|
255
|
+
if (typeof probability === "number" && !isNaN(probability)) {
|
|
256
|
+
onDecision(probability);
|
|
257
|
+
} else {
|
|
258
|
+
console.warn("Invalid VAD probability received:", event.data);
|
|
259
|
+
}
|
|
260
|
+
} catch (error) {
|
|
261
|
+
console.error("Error in VAD message handler:", error);
|
|
262
|
+
}
|
|
263
|
+
};
|
|
264
|
+
node.port.onmessageerror = (event) => {
|
|
265
|
+
console.error("VAD port message error:", event);
|
|
188
266
|
};
|
|
189
267
|
return node;
|
|
190
268
|
}
|
|
@@ -230,31 +308,60 @@ var VADStateMachine = class {
|
|
|
230
308
|
currentState = "silent";
|
|
231
309
|
lastSpeechTime = 0;
|
|
232
310
|
speechStartTime = 0;
|
|
311
|
+
lastSilenceTime = 0;
|
|
233
312
|
frameDurationMs = 20;
|
|
234
313
|
// Assumed frame duration, updated by calls
|
|
235
314
|
constructor(config) {
|
|
236
315
|
this.config = {
|
|
237
316
|
enabled: config?.enabled ?? true,
|
|
238
317
|
pluginName: config?.pluginName ?? "energy-vad",
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
318
|
+
// Voice-optimized defaults
|
|
319
|
+
startThreshold: config?.startThreshold ?? 0.6,
|
|
320
|
+
// Higher threshold to avoid noise
|
|
321
|
+
stopThreshold: config?.stopThreshold ?? 0.45,
|
|
322
|
+
// Balanced for voice
|
|
323
|
+
hangoverMs: config?.hangoverMs ?? 400,
|
|
324
|
+
// Smooth for natural speech
|
|
325
|
+
preRollMs: config?.preRollMs ?? 250,
|
|
326
|
+
// Generous pre-roll
|
|
327
|
+
minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
|
|
328
|
+
minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
|
|
329
|
+
energyVad: {
|
|
330
|
+
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
331
|
+
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
332
|
+
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
|
|
333
|
+
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
|
|
334
|
+
minSNR: config?.energyVad?.minSNR ?? 2,
|
|
335
|
+
snrRange: config?.energyVad?.snrRange ?? 8
|
|
336
|
+
}
|
|
243
337
|
};
|
|
338
|
+
this.lastSilenceTime = Date.now();
|
|
244
339
|
}
|
|
245
340
|
updateConfig(config) {
|
|
246
341
|
this.config = { ...this.config, ...config };
|
|
247
342
|
}
|
|
248
343
|
processFrame(probability, timestamp) {
|
|
249
|
-
const {
|
|
344
|
+
const {
|
|
345
|
+
startThreshold,
|
|
346
|
+
stopThreshold,
|
|
347
|
+
hangoverMs,
|
|
348
|
+
minSpeechDurationMs,
|
|
349
|
+
minSilenceDurationMs
|
|
350
|
+
} = this.config;
|
|
250
351
|
let newState = this.currentState;
|
|
251
352
|
if (this.currentState === "silent" || this.currentState === "speech_ending") {
|
|
252
353
|
if (probability >= startThreshold) {
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
354
|
+
const silenceDuration = timestamp - this.lastSilenceTime;
|
|
355
|
+
if (silenceDuration >= minSilenceDurationMs) {
|
|
356
|
+
newState = "speech_starting";
|
|
357
|
+
this.speechStartTime = timestamp;
|
|
358
|
+
this.lastSpeechTime = timestamp;
|
|
359
|
+
} else {
|
|
360
|
+
newState = "silent";
|
|
361
|
+
}
|
|
256
362
|
} else {
|
|
257
363
|
newState = "silent";
|
|
364
|
+
this.lastSilenceTime = timestamp;
|
|
258
365
|
}
|
|
259
366
|
} else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
|
|
260
367
|
if (probability >= stopThreshold) {
|
|
@@ -262,10 +369,15 @@ var VADStateMachine = class {
|
|
|
262
369
|
this.lastSpeechTime = timestamp;
|
|
263
370
|
} else {
|
|
264
371
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
372
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
265
373
|
if (timeSinceSpeech < hangoverMs) {
|
|
266
374
|
newState = "speaking";
|
|
375
|
+
} else if (speechDuration < minSpeechDurationMs) {
|
|
376
|
+
newState = "silent";
|
|
377
|
+
this.lastSilenceTime = timestamp;
|
|
267
378
|
} else {
|
|
268
379
|
newState = "speech_ending";
|
|
380
|
+
this.lastSilenceTime = timestamp;
|
|
269
381
|
}
|
|
270
382
|
}
|
|
271
383
|
}
|
|
@@ -284,42 +396,106 @@ var VADStateMachine = class {
|
|
|
284
396
|
async function createAudioPipeline(sourceTrack, config = {}) {
|
|
285
397
|
const context = getAudioContext();
|
|
286
398
|
registerPipeline();
|
|
399
|
+
const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
|
|
400
|
+
config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
|
|
401
|
+
);
|
|
402
|
+
const vadEnabled = config.vad?.enabled !== false;
|
|
287
403
|
const fullConfig = {
|
|
288
|
-
noiseSuppression: {
|
|
289
|
-
|
|
404
|
+
noiseSuppression: {
|
|
405
|
+
enabled: nsEnabled,
|
|
406
|
+
...config.noiseSuppression
|
|
407
|
+
},
|
|
408
|
+
vad: {
|
|
409
|
+
enabled: vadEnabled,
|
|
410
|
+
// Voice-optimized defaults (will be overridden by config)
|
|
411
|
+
startThreshold: 0.6,
|
|
412
|
+
stopThreshold: 0.45,
|
|
413
|
+
hangoverMs: 400,
|
|
414
|
+
preRollMs: 250,
|
|
415
|
+
minSpeechDurationMs: 100,
|
|
416
|
+
minSilenceDurationMs: 150,
|
|
417
|
+
energyVad: {
|
|
418
|
+
smoothing: 0.95,
|
|
419
|
+
initialNoiseFloor: 1e-3,
|
|
420
|
+
noiseFloorAdaptRateQuiet: 0.01,
|
|
421
|
+
noiseFloorAdaptRateLoud: 1e-3,
|
|
422
|
+
minSNR: 2,
|
|
423
|
+
snrRange: 8
|
|
424
|
+
},
|
|
425
|
+
...config.vad
|
|
426
|
+
},
|
|
290
427
|
output: {
|
|
291
428
|
speechGain: 1,
|
|
292
429
|
silenceGain: 0,
|
|
293
|
-
|
|
430
|
+
// Full mute for voice-only
|
|
431
|
+
gainRampTime: 0.015,
|
|
432
|
+
// Fast but smooth transitions
|
|
433
|
+
smoothTransitions: true,
|
|
434
|
+
maxGainDb: 6,
|
|
435
|
+
enableCompression: false,
|
|
436
|
+
compression: {
|
|
437
|
+
threshold: -24,
|
|
438
|
+
ratio: 3,
|
|
439
|
+
attack: 3e-3,
|
|
440
|
+
release: 0.05
|
|
441
|
+
},
|
|
294
442
|
...config.output
|
|
295
443
|
},
|
|
296
444
|
livekit: { manageTrackMute: false, ...config.livekit }
|
|
297
445
|
};
|
|
446
|
+
console.log("Audio pipeline config:", {
|
|
447
|
+
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
448
|
+
vad: fullConfig.vad?.enabled,
|
|
449
|
+
output: fullConfig.output
|
|
450
|
+
});
|
|
451
|
+
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
452
|
+
throw new Error(
|
|
453
|
+
"createAudioPipeline requires a valid audio MediaStreamTrack"
|
|
454
|
+
);
|
|
455
|
+
}
|
|
456
|
+
if (sourceTrack.readyState === "ended") {
|
|
457
|
+
throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
|
|
458
|
+
}
|
|
298
459
|
const sourceStream = new MediaStream([sourceTrack]);
|
|
299
460
|
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
);
|
|
303
|
-
const nsNode = await nsPlugin.createNode(
|
|
304
|
-
context,
|
|
305
|
-
fullConfig.noiseSuppression
|
|
306
|
-
);
|
|
307
|
-
const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
308
|
-
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
461
|
+
let nsNode;
|
|
462
|
+
let vadNode;
|
|
309
463
|
const emitter = (0, import_mitt.default)();
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
464
|
+
try {
|
|
465
|
+
const nsPlugin = getNoiseSuppressionPlugin(
|
|
466
|
+
fullConfig.noiseSuppression?.pluginName
|
|
467
|
+
);
|
|
468
|
+
nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
|
|
469
|
+
} catch (error) {
|
|
470
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
471
|
+
console.error("Failed to create noise suppression node:", err);
|
|
472
|
+
emitter.emit("error", err);
|
|
473
|
+
throw err;
|
|
474
|
+
}
|
|
475
|
+
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
476
|
+
try {
|
|
477
|
+
const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
478
|
+
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
479
|
+
try {
|
|
480
|
+
const timestamp = context.currentTime * 1e3;
|
|
481
|
+
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
482
|
+
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
483
|
+
emitter.emit("vadChange", newState);
|
|
484
|
+
lastVadState = newState;
|
|
485
|
+
updateGain(newState);
|
|
486
|
+
}
|
|
487
|
+
} catch (vadError) {
|
|
488
|
+
const err = vadError instanceof Error ? vadError : new Error(String(vadError));
|
|
489
|
+
console.error("Error in VAD callback:", err);
|
|
490
|
+
emitter.emit("error", err);
|
|
320
491
|
}
|
|
321
|
-
}
|
|
322
|
-
)
|
|
492
|
+
});
|
|
493
|
+
} catch (error) {
|
|
494
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
495
|
+
console.error("Failed to create VAD node:", err);
|
|
496
|
+
emitter.emit("error", err);
|
|
497
|
+
throw err;
|
|
498
|
+
}
|
|
323
499
|
let lastVadState = {
|
|
324
500
|
isSpeaking: false,
|
|
325
501
|
probability: 0,
|
|
@@ -330,39 +506,176 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
330
506
|
nsNode.connect(splitter);
|
|
331
507
|
splitter.connect(vadNode);
|
|
332
508
|
const delayNode = context.createDelay(1);
|
|
333
|
-
const preRollSeconds = (fullConfig.vad?.preRollMs ??
|
|
509
|
+
const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
|
|
334
510
|
delayNode.delayTime.value = preRollSeconds;
|
|
335
511
|
const gainNode = context.createGain();
|
|
336
512
|
gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
|
|
513
|
+
let compressor = null;
|
|
514
|
+
if (fullConfig.output?.enableCompression) {
|
|
515
|
+
compressor = context.createDynamicsCompressor();
|
|
516
|
+
const comp = fullConfig.output.compression;
|
|
517
|
+
compressor.threshold.value = comp.threshold ?? -24;
|
|
518
|
+
compressor.ratio.value = comp.ratio ?? 3;
|
|
519
|
+
compressor.attack.value = comp.attack ?? 3e-3;
|
|
520
|
+
compressor.release.value = comp.release ?? 0.05;
|
|
521
|
+
compressor.knee.value = 10;
|
|
522
|
+
}
|
|
337
523
|
const destination = context.createMediaStreamDestination();
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
524
|
+
try {
|
|
525
|
+
splitter.connect(delayNode);
|
|
526
|
+
delayNode.connect(gainNode);
|
|
527
|
+
if (compressor) {
|
|
528
|
+
gainNode.connect(compressor);
|
|
529
|
+
compressor.connect(destination);
|
|
530
|
+
console.log("Compression enabled:", fullConfig.output?.compression);
|
|
531
|
+
} else {
|
|
532
|
+
gainNode.connect(destination);
|
|
533
|
+
}
|
|
534
|
+
} catch (error) {
|
|
535
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
536
|
+
console.error("Failed to wire audio pipeline:", err);
|
|
537
|
+
emitter.emit("error", err);
|
|
538
|
+
throw err;
|
|
539
|
+
}
|
|
341
540
|
function updateGain(state) {
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
541
|
+
try {
|
|
542
|
+
const {
|
|
543
|
+
speechGain = 1,
|
|
544
|
+
silenceGain = 0,
|
|
545
|
+
gainRampTime = 0.015,
|
|
546
|
+
smoothTransitions = true,
|
|
547
|
+
maxGainDb = 6
|
|
548
|
+
} = fullConfig.output;
|
|
549
|
+
const maxGainLinear = Math.pow(10, maxGainDb / 20);
|
|
550
|
+
const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
|
|
551
|
+
const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
|
|
552
|
+
const now = context.currentTime;
|
|
553
|
+
if (smoothTransitions) {
|
|
554
|
+
gainNode.gain.cancelScheduledValues(now);
|
|
555
|
+
gainNode.gain.setValueAtTime(gainNode.gain.value, now);
|
|
556
|
+
gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
|
|
557
|
+
} else {
|
|
558
|
+
gainNode.gain.setValueAtTime(targetGain, now);
|
|
559
|
+
}
|
|
560
|
+
} catch (error) {
|
|
561
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
562
|
+
console.error("Failed to update gain:", err);
|
|
563
|
+
emitter.emit("error", err);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
const audioTracks = destination.stream.getAudioTracks();
|
|
567
|
+
console.log("Destination stream tracks:", {
|
|
568
|
+
count: audioTracks.length,
|
|
569
|
+
tracks: audioTracks.map((t) => ({
|
|
570
|
+
id: t.id,
|
|
571
|
+
label: t.label,
|
|
572
|
+
enabled: t.enabled,
|
|
573
|
+
readyState: t.readyState
|
|
574
|
+
}))
|
|
575
|
+
});
|
|
576
|
+
if (audioTracks.length === 0) {
|
|
577
|
+
const err = new Error(
|
|
578
|
+
"Failed to create processed audio track: destination stream has no audio tracks. This may indicate an issue with the audio graph connection."
|
|
579
|
+
);
|
|
580
|
+
console.error(err);
|
|
581
|
+
emitter.emit("error", err);
|
|
582
|
+
throw err;
|
|
583
|
+
}
|
|
584
|
+
const processedTrack = audioTracks[0];
|
|
585
|
+
if (!processedTrack || processedTrack.readyState === "ended") {
|
|
586
|
+
const err = new Error("Processed audio track is invalid or ended");
|
|
587
|
+
console.error(err);
|
|
588
|
+
emitter.emit("error", err);
|
|
589
|
+
throw err;
|
|
346
590
|
}
|
|
591
|
+
console.log("Audio pipeline created successfully:", {
|
|
592
|
+
sourceTrack: {
|
|
593
|
+
id: sourceTrack.id,
|
|
594
|
+
label: sourceTrack.label,
|
|
595
|
+
readyState: sourceTrack.readyState
|
|
596
|
+
},
|
|
597
|
+
processedTrack: {
|
|
598
|
+
id: processedTrack.id,
|
|
599
|
+
label: processedTrack.label,
|
|
600
|
+
readyState: processedTrack.readyState
|
|
601
|
+
},
|
|
602
|
+
config: {
|
|
603
|
+
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
604
|
+
vad: fullConfig.vad?.enabled
|
|
605
|
+
}
|
|
606
|
+
});
|
|
347
607
|
function dispose() {
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
608
|
+
try {
|
|
609
|
+
sourceNode.disconnect();
|
|
610
|
+
nsNode.disconnect();
|
|
611
|
+
splitter.disconnect();
|
|
612
|
+
vadNode.disconnect();
|
|
613
|
+
delayNode.disconnect();
|
|
614
|
+
gainNode.disconnect();
|
|
615
|
+
if (compressor) {
|
|
616
|
+
compressor.disconnect();
|
|
617
|
+
}
|
|
618
|
+
destination.stream.getTracks().forEach((t) => t.stop());
|
|
619
|
+
unregisterPipeline();
|
|
620
|
+
} catch (error) {
|
|
621
|
+
console.error("Error during pipeline disposal:", error);
|
|
622
|
+
}
|
|
356
623
|
}
|
|
357
624
|
return {
|
|
358
|
-
processedTrack
|
|
625
|
+
processedTrack,
|
|
359
626
|
events: emitter,
|
|
360
627
|
get state() {
|
|
361
628
|
return lastVadState;
|
|
362
629
|
},
|
|
363
630
|
setConfig: (newConfig) => {
|
|
364
|
-
|
|
365
|
-
|
|
631
|
+
try {
|
|
632
|
+
if (newConfig.vad) {
|
|
633
|
+
vadStateMachine.updateConfig(newConfig.vad);
|
|
634
|
+
Object.assign(fullConfig.vad, newConfig.vad);
|
|
635
|
+
if (newConfig.vad.preRollMs !== void 0) {
|
|
636
|
+
const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
|
|
637
|
+
delayNode.delayTime.setValueAtTime(
|
|
638
|
+
preRollSeconds2,
|
|
639
|
+
context.currentTime
|
|
640
|
+
);
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
if (newConfig.output) {
|
|
644
|
+
Object.assign(fullConfig.output, newConfig.output);
|
|
645
|
+
updateGain(lastVadState);
|
|
646
|
+
if (compressor && newConfig.output.compression) {
|
|
647
|
+
const comp = newConfig.output.compression;
|
|
648
|
+
if (comp.threshold !== void 0) {
|
|
649
|
+
compressor.threshold.setValueAtTime(
|
|
650
|
+
comp.threshold,
|
|
651
|
+
context.currentTime
|
|
652
|
+
);
|
|
653
|
+
}
|
|
654
|
+
if (comp.ratio !== void 0) {
|
|
655
|
+
compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
|
|
656
|
+
}
|
|
657
|
+
if (comp.attack !== void 0) {
|
|
658
|
+
compressor.attack.setValueAtTime(
|
|
659
|
+
comp.attack,
|
|
660
|
+
context.currentTime
|
|
661
|
+
);
|
|
662
|
+
}
|
|
663
|
+
if (comp.release !== void 0) {
|
|
664
|
+
compressor.release.setValueAtTime(
|
|
665
|
+
comp.release,
|
|
666
|
+
context.currentTime
|
|
667
|
+
);
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
if (newConfig.livekit) {
|
|
672
|
+
Object.assign(fullConfig.livekit, newConfig.livekit);
|
|
673
|
+
}
|
|
674
|
+
console.log("Pipeline config updated:", newConfig);
|
|
675
|
+
} catch (error) {
|
|
676
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
677
|
+
console.error("Failed to update config:", err);
|
|
678
|
+
emitter.emit("error", err);
|
|
366
679
|
}
|
|
367
680
|
},
|
|
368
681
|
dispose
|
|
@@ -371,31 +684,84 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
371
684
|
|
|
372
685
|
// src/livekit/integration.ts
|
|
373
686
|
async function attachProcessingToTrack(track, config = {}) {
|
|
687
|
+
if (!track) {
|
|
688
|
+
throw new Error("attachProcessingToTrack requires a valid LocalAudioTrack");
|
|
689
|
+
}
|
|
374
690
|
const originalTrack = track.mediaStreamTrack;
|
|
375
|
-
|
|
376
|
-
|
|
691
|
+
if (!originalTrack) {
|
|
692
|
+
throw new Error("LocalAudioTrack has no underlying MediaStreamTrack");
|
|
693
|
+
}
|
|
694
|
+
if (originalTrack.readyState === "ended") {
|
|
695
|
+
throw new Error("Cannot attach processing to an ended MediaStreamTrack");
|
|
696
|
+
}
|
|
697
|
+
let pipeline;
|
|
698
|
+
try {
|
|
699
|
+
console.log("Creating audio processing pipeline...");
|
|
700
|
+
pipeline = await createAudioPipeline(originalTrack, config);
|
|
701
|
+
console.log("Audio processing pipeline created successfully");
|
|
702
|
+
} catch (error) {
|
|
703
|
+
const err = new Error(
|
|
704
|
+
`Failed to create audio pipeline: ${error instanceof Error ? error.message : String(error)}`
|
|
705
|
+
);
|
|
706
|
+
console.error(err);
|
|
707
|
+
throw err;
|
|
708
|
+
}
|
|
709
|
+
if (!pipeline.processedTrack) {
|
|
710
|
+
throw new Error("Pipeline did not return a processed track");
|
|
711
|
+
}
|
|
712
|
+
try {
|
|
713
|
+
console.log("Replacing LiveKit track with processed track...");
|
|
714
|
+
await track.replaceTrack(pipeline.processedTrack);
|
|
715
|
+
console.log("LiveKit track replaced successfully");
|
|
716
|
+
} catch (error) {
|
|
717
|
+
pipeline.dispose();
|
|
718
|
+
const err = new Error(
|
|
719
|
+
`Failed to replace LiveKit track: ${error instanceof Error ? error.message : String(error)}`
|
|
720
|
+
);
|
|
721
|
+
console.error(err);
|
|
722
|
+
throw err;
|
|
723
|
+
}
|
|
377
724
|
if (config.livekit?.manageTrackMute) {
|
|
378
725
|
let isVadMuted = false;
|
|
379
726
|
pipeline.events.on("vadChange", async (state) => {
|
|
380
|
-
|
|
381
|
-
if (
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
727
|
+
try {
|
|
728
|
+
if (state.isSpeaking) {
|
|
729
|
+
if (isVadMuted) {
|
|
730
|
+
await track.unmute();
|
|
731
|
+
isVadMuted = false;
|
|
732
|
+
}
|
|
733
|
+
} else {
|
|
734
|
+
if (!track.isMuted) {
|
|
735
|
+
await track.mute();
|
|
736
|
+
isVadMuted = true;
|
|
737
|
+
}
|
|
389
738
|
}
|
|
739
|
+
} catch (error) {
|
|
740
|
+
console.error("Error handling VAD-based track muting:", error);
|
|
390
741
|
}
|
|
391
742
|
});
|
|
392
743
|
}
|
|
744
|
+
pipeline.events.on("error", (error) => {
|
|
745
|
+
console.error("Audio pipeline error:", error);
|
|
746
|
+
});
|
|
393
747
|
const originalDispose = pipeline.dispose;
|
|
394
748
|
pipeline.dispose = () => {
|
|
395
|
-
|
|
396
|
-
|
|
749
|
+
try {
|
|
750
|
+
if (originalTrack.readyState === "live") {
|
|
751
|
+
console.log("Restoring original track...");
|
|
752
|
+
track.replaceTrack(originalTrack).catch((error) => {
|
|
753
|
+
console.error("Failed to restore original track:", error);
|
|
754
|
+
});
|
|
755
|
+
}
|
|
756
|
+
originalDispose();
|
|
757
|
+
} catch (error) {
|
|
758
|
+
console.error("Error during pipeline disposal:", error);
|
|
759
|
+
try {
|
|
760
|
+
originalDispose();
|
|
761
|
+
} catch (disposeError) {
|
|
762
|
+
console.error("Error calling original dispose:", disposeError);
|
|
763
|
+
}
|
|
397
764
|
}
|
|
398
|
-
originalDispose();
|
|
399
765
|
};
|
|
400
766
|
return pipeline;
|
|
401
767
|
}
|