@tensamin/audio 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +218 -30
- package/dist/chunk-AHBRT4RD.mjs +307 -0
- package/dist/chunk-ERJVV5JR.mjs +91 -0
- package/dist/chunk-N553RHTI.mjs +93 -0
- package/dist/chunk-NMHKX64G.mjs +118 -0
- package/dist/chunk-XO6B3D4A.mjs +67 -0
- package/dist/{chunk-FS635GMR.mjs → chunk-YOSTLLCS.mjs} +2 -2
- package/dist/extensibility/plugins.js +110 -32
- package/dist/extensibility/plugins.mjs +3 -3
- package/dist/index.js +463 -97
- package/dist/index.mjs +6 -6
- package/dist/livekit/integration.js +463 -97
- package/dist/livekit/integration.mjs +6 -6
- package/dist/noise-suppression/rnnoise-node.js +42 -14
- package/dist/noise-suppression/rnnoise-node.mjs +1 -1
- package/dist/pipeline/audio-pipeline.js +396 -83
- package/dist/pipeline/audio-pipeline.mjs +5 -5
- package/dist/types.d.mts +118 -10
- package/dist/types.d.ts +118 -10
- package/dist/vad/vad-node.js +68 -18
- package/dist/vad/vad-node.mjs +1 -1
- package/dist/vad/vad-state.d.mts +1 -0
- package/dist/vad/vad-state.d.ts +1 -0
- package/dist/vad/vad-state.js +42 -8
- package/dist/vad/vad-state.mjs +1 -1
- package/package.json +1 -1
- package/dist/chunk-HFSKQ33X.mjs +0 -38
- package/dist/chunk-JJASCVEW.mjs +0 -59
- package/dist/chunk-QU7E5HBA.mjs +0 -106
- package/dist/chunk-SDTOKWM2.mjs +0 -39
- package/dist/chunk-UMU2KIB6.mjs +0 -68
|
@@ -65,43 +65,83 @@ var RNNoisePlugin = class {
|
|
|
65
65
|
async createNode(context, config) {
|
|
66
66
|
const { loadRnnoise, RnnoiseWorkletNode } = await import("@sapphi-red/web-noise-suppressor");
|
|
67
67
|
if (!config?.enabled) {
|
|
68
|
+
console.log("Noise suppression disabled, using passthrough node");
|
|
68
69
|
const pass = context.createGain();
|
|
69
70
|
return pass;
|
|
70
71
|
}
|
|
71
72
|
if (!config?.wasmUrl || !config?.simdUrl || !config?.workletUrl) {
|
|
72
|
-
|
|
73
|
-
|
|
73
|
+
const error = new Error(
|
|
74
|
+
`RNNoisePlugin requires 'wasmUrl', 'simdUrl', and 'workletUrl' to be configured. Please download the assets from @sapphi-red/web-noise-suppressor and provide the URLs in the config. Current config: wasmUrl=${config?.wasmUrl}, simdUrl=${config?.simdUrl}, workletUrl=${config?.workletUrl}
|
|
75
|
+
To disable noise suppression, set noiseSuppression.enabled to false.`
|
|
74
76
|
);
|
|
77
|
+
console.error(error.message);
|
|
78
|
+
throw error;
|
|
75
79
|
}
|
|
76
|
-
|
|
77
|
-
this.wasmBuffer
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
80
|
+
try {
|
|
81
|
+
if (!this.wasmBuffer) {
|
|
82
|
+
console.log("Loading RNNoise WASM binary...");
|
|
83
|
+
this.wasmBuffer = await loadRnnoise({
|
|
84
|
+
url: config.wasmUrl,
|
|
85
|
+
simdUrl: config.simdUrl
|
|
86
|
+
});
|
|
87
|
+
console.log("RNNoise WASM loaded successfully");
|
|
88
|
+
}
|
|
89
|
+
} catch (error) {
|
|
90
|
+
const err = new Error(
|
|
91
|
+
`Failed to load RNNoise WASM binary: ${error instanceof Error ? error.message : String(error)}`
|
|
92
|
+
);
|
|
93
|
+
console.error(err);
|
|
94
|
+
throw err;
|
|
81
95
|
}
|
|
82
96
|
const workletUrl = config.workletUrl;
|
|
83
97
|
try {
|
|
84
98
|
await context.audioWorklet.addModule(workletUrl);
|
|
99
|
+
console.log("RNNoise worklet loaded successfully");
|
|
85
100
|
} catch (e) {
|
|
86
|
-
|
|
101
|
+
const error = new Error(
|
|
102
|
+
`Failed to load RNNoise worklet from ${workletUrl}: ${e instanceof Error ? e.message : String(e)}. Ensure the workletUrl points to a valid RNNoise worklet script.`
|
|
103
|
+
);
|
|
104
|
+
console.error(error.message);
|
|
105
|
+
throw error;
|
|
106
|
+
}
|
|
107
|
+
try {
|
|
108
|
+
const node = new RnnoiseWorkletNode(context, {
|
|
109
|
+
wasmBinary: this.wasmBuffer,
|
|
110
|
+
maxChannels: 1
|
|
111
|
+
// Mono for now
|
|
112
|
+
});
|
|
113
|
+
console.log("RNNoise worklet node created successfully");
|
|
114
|
+
return node;
|
|
115
|
+
} catch (error) {
|
|
116
|
+
const err = new Error(
|
|
117
|
+
`Failed to create RNNoise worklet node: ${error instanceof Error ? error.message : String(error)}`
|
|
118
|
+
);
|
|
119
|
+
console.error(err);
|
|
120
|
+
throw err;
|
|
87
121
|
}
|
|
88
|
-
const node = new RnnoiseWorkletNode(context, {
|
|
89
|
-
wasmBinary: this.wasmBuffer,
|
|
90
|
-
maxChannels: 1
|
|
91
|
-
// Mono for now
|
|
92
|
-
});
|
|
93
|
-
return node;
|
|
94
122
|
}
|
|
95
123
|
};
|
|
96
124
|
|
|
97
125
|
// src/vad/vad-node.ts
|
|
98
|
-
var
|
|
126
|
+
var createEnergyVadWorkletCode = (vadConfig) => {
|
|
127
|
+
const energyParams = vadConfig?.energyVad || {};
|
|
128
|
+
const smoothing = energyParams.smoothing ?? 0.95;
|
|
129
|
+
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
130
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
131
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
|
|
132
|
+
const minSNR = energyParams.minSNR ?? 2;
|
|
133
|
+
const snrRange = energyParams.snrRange ?? 8;
|
|
134
|
+
return `
|
|
99
135
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
100
136
|
constructor() {
|
|
101
137
|
super();
|
|
102
|
-
this.smoothing =
|
|
138
|
+
this.smoothing = ${smoothing};
|
|
103
139
|
this.energy = 0;
|
|
104
|
-
this.noiseFloor =
|
|
140
|
+
this.noiseFloor = ${initialNoiseFloor};
|
|
141
|
+
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
142
|
+
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
143
|
+
this.minSNR = ${minSNR};
|
|
144
|
+
this.snrRange = ${snrRange};
|
|
105
145
|
}
|
|
106
146
|
|
|
107
147
|
process(inputs, outputs, parameters) {
|
|
@@ -109,51 +149,89 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
109
149
|
if (!input || !input.length) return true;
|
|
110
150
|
const channel = input[0];
|
|
111
151
|
|
|
112
|
-
// Calculate RMS
|
|
152
|
+
// Calculate RMS (Root Mean Square) energy
|
|
113
153
|
let sum = 0;
|
|
114
154
|
for (let i = 0; i < channel.length; i++) {
|
|
115
155
|
sum += channel[i] * channel[i];
|
|
116
156
|
}
|
|
117
157
|
const rms = Math.sqrt(sum / channel.length);
|
|
118
158
|
|
|
119
|
-
//
|
|
159
|
+
// Adaptive noise floor estimation
|
|
160
|
+
// When signal is quiet, adapt quickly to find new noise floor
|
|
161
|
+
// When signal is loud (speech), adapt slowly to avoid raising noise floor
|
|
120
162
|
if (rms < this.noiseFloor) {
|
|
121
|
-
this.noiseFloor = this.noiseFloor *
|
|
163
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
122
164
|
} else {
|
|
123
|
-
this.noiseFloor = this.noiseFloor *
|
|
165
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
124
166
|
}
|
|
125
167
|
|
|
126
|
-
// Calculate
|
|
127
|
-
// This is a heuristic mapping from energy to 0-1
|
|
168
|
+
// Calculate Signal-to-Noise Ratio (SNR)
|
|
128
169
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
129
|
-
|
|
170
|
+
|
|
171
|
+
// Map SNR to probability (0-1)
|
|
172
|
+
// Probability is 0 when SNR <= minSNR
|
|
173
|
+
// Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
|
|
174
|
+
// Probability is 1 when SNR >= (minSNR + snrRange)
|
|
175
|
+
const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
|
|
130
176
|
|
|
131
|
-
this.port.postMessage({ probability });
|
|
177
|
+
this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
|
|
132
178
|
|
|
133
179
|
return true;
|
|
134
180
|
}
|
|
135
181
|
}
|
|
136
182
|
registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
137
183
|
`;
|
|
184
|
+
};
|
|
138
185
|
var EnergyVADPlugin = class {
|
|
139
186
|
name = "energy-vad";
|
|
140
187
|
async createNode(context, config, onDecision) {
|
|
141
|
-
|
|
188
|
+
if (!config?.enabled) {
|
|
189
|
+
console.log("VAD disabled, using passthrough node");
|
|
190
|
+
const pass = context.createGain();
|
|
191
|
+
return pass;
|
|
192
|
+
}
|
|
193
|
+
const workletCode = createEnergyVadWorkletCode(config);
|
|
194
|
+
const blob = new Blob([workletCode], {
|
|
142
195
|
type: "application/javascript"
|
|
143
196
|
});
|
|
144
197
|
const url = URL.createObjectURL(blob);
|
|
145
198
|
try {
|
|
146
199
|
await context.audioWorklet.addModule(url);
|
|
200
|
+
console.log("Energy VAD worklet loaded successfully");
|
|
147
201
|
} catch (e) {
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
202
|
+
const error = new Error(
|
|
203
|
+
`Failed to load Energy VAD worklet: ${e instanceof Error ? e.message : String(e)}`
|
|
204
|
+
);
|
|
205
|
+
console.error(error.message);
|
|
151
206
|
URL.revokeObjectURL(url);
|
|
207
|
+
throw error;
|
|
208
|
+
}
|
|
209
|
+
URL.revokeObjectURL(url);
|
|
210
|
+
let node;
|
|
211
|
+
try {
|
|
212
|
+
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
213
|
+
console.log("Energy VAD node created successfully");
|
|
214
|
+
} catch (e) {
|
|
215
|
+
const error = new Error(
|
|
216
|
+
`Failed to create Energy VAD node: ${e instanceof Error ? e.message : String(e)}`
|
|
217
|
+
);
|
|
218
|
+
console.error(error.message);
|
|
219
|
+
throw error;
|
|
152
220
|
}
|
|
153
|
-
const node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
154
221
|
node.port.onmessage = (event) => {
|
|
155
|
-
|
|
156
|
-
|
|
222
|
+
try {
|
|
223
|
+
const { probability } = event.data;
|
|
224
|
+
if (typeof probability === "number" && !isNaN(probability)) {
|
|
225
|
+
onDecision(probability);
|
|
226
|
+
} else {
|
|
227
|
+
console.warn("Invalid VAD probability received:", event.data);
|
|
228
|
+
}
|
|
229
|
+
} catch (error) {
|
|
230
|
+
console.error("Error in VAD message handler:", error);
|
|
231
|
+
}
|
|
232
|
+
};
|
|
233
|
+
node.port.onmessageerror = (event) => {
|
|
234
|
+
console.error("VAD port message error:", event);
|
|
157
235
|
};
|
|
158
236
|
return node;
|
|
159
237
|
}
|
|
@@ -193,31 +271,60 @@ var VADStateMachine = class {
|
|
|
193
271
|
currentState = "silent";
|
|
194
272
|
lastSpeechTime = 0;
|
|
195
273
|
speechStartTime = 0;
|
|
274
|
+
lastSilenceTime = 0;
|
|
196
275
|
frameDurationMs = 20;
|
|
197
276
|
// Assumed frame duration, updated by calls
|
|
198
277
|
constructor(config) {
|
|
199
278
|
this.config = {
|
|
200
279
|
enabled: config?.enabled ?? true,
|
|
201
280
|
pluginName: config?.pluginName ?? "energy-vad",
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
281
|
+
// Voice-optimized defaults
|
|
282
|
+
startThreshold: config?.startThreshold ?? 0.6,
|
|
283
|
+
// Higher threshold to avoid noise
|
|
284
|
+
stopThreshold: config?.stopThreshold ?? 0.45,
|
|
285
|
+
// Balanced for voice
|
|
286
|
+
hangoverMs: config?.hangoverMs ?? 400,
|
|
287
|
+
// Smooth for natural speech
|
|
288
|
+
preRollMs: config?.preRollMs ?? 250,
|
|
289
|
+
// Generous pre-roll
|
|
290
|
+
minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
|
|
291
|
+
minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
|
|
292
|
+
energyVad: {
|
|
293
|
+
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
294
|
+
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
295
|
+
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
|
|
296
|
+
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
|
|
297
|
+
minSNR: config?.energyVad?.minSNR ?? 2,
|
|
298
|
+
snrRange: config?.energyVad?.snrRange ?? 8
|
|
299
|
+
}
|
|
206
300
|
};
|
|
301
|
+
this.lastSilenceTime = Date.now();
|
|
207
302
|
}
|
|
208
303
|
updateConfig(config) {
|
|
209
304
|
this.config = { ...this.config, ...config };
|
|
210
305
|
}
|
|
211
306
|
processFrame(probability, timestamp) {
|
|
212
|
-
const {
|
|
307
|
+
const {
|
|
308
|
+
startThreshold,
|
|
309
|
+
stopThreshold,
|
|
310
|
+
hangoverMs,
|
|
311
|
+
minSpeechDurationMs,
|
|
312
|
+
minSilenceDurationMs
|
|
313
|
+
} = this.config;
|
|
213
314
|
let newState = this.currentState;
|
|
214
315
|
if (this.currentState === "silent" || this.currentState === "speech_ending") {
|
|
215
316
|
if (probability >= startThreshold) {
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
317
|
+
const silenceDuration = timestamp - this.lastSilenceTime;
|
|
318
|
+
if (silenceDuration >= minSilenceDurationMs) {
|
|
319
|
+
newState = "speech_starting";
|
|
320
|
+
this.speechStartTime = timestamp;
|
|
321
|
+
this.lastSpeechTime = timestamp;
|
|
322
|
+
} else {
|
|
323
|
+
newState = "silent";
|
|
324
|
+
}
|
|
219
325
|
} else {
|
|
220
326
|
newState = "silent";
|
|
327
|
+
this.lastSilenceTime = timestamp;
|
|
221
328
|
}
|
|
222
329
|
} else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
|
|
223
330
|
if (probability >= stopThreshold) {
|
|
@@ -225,10 +332,15 @@ var VADStateMachine = class {
|
|
|
225
332
|
this.lastSpeechTime = timestamp;
|
|
226
333
|
} else {
|
|
227
334
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
335
|
+
const speechDuration = timestamp - this.speechStartTime;
|
|
228
336
|
if (timeSinceSpeech < hangoverMs) {
|
|
229
337
|
newState = "speaking";
|
|
338
|
+
} else if (speechDuration < minSpeechDurationMs) {
|
|
339
|
+
newState = "silent";
|
|
340
|
+
this.lastSilenceTime = timestamp;
|
|
230
341
|
} else {
|
|
231
342
|
newState = "speech_ending";
|
|
343
|
+
this.lastSilenceTime = timestamp;
|
|
232
344
|
}
|
|
233
345
|
}
|
|
234
346
|
}
|
|
@@ -247,42 +359,106 @@ var VADStateMachine = class {
|
|
|
247
359
|
async function createAudioPipeline(sourceTrack, config = {}) {
|
|
248
360
|
const context = getAudioContext();
|
|
249
361
|
registerPipeline();
|
|
362
|
+
const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
|
|
363
|
+
config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
|
|
364
|
+
);
|
|
365
|
+
const vadEnabled = config.vad?.enabled !== false;
|
|
250
366
|
const fullConfig = {
|
|
251
|
-
noiseSuppression: {
|
|
252
|
-
|
|
367
|
+
noiseSuppression: {
|
|
368
|
+
enabled: nsEnabled,
|
|
369
|
+
...config.noiseSuppression
|
|
370
|
+
},
|
|
371
|
+
vad: {
|
|
372
|
+
enabled: vadEnabled,
|
|
373
|
+
// Voice-optimized defaults (will be overridden by config)
|
|
374
|
+
startThreshold: 0.6,
|
|
375
|
+
stopThreshold: 0.45,
|
|
376
|
+
hangoverMs: 400,
|
|
377
|
+
preRollMs: 250,
|
|
378
|
+
minSpeechDurationMs: 100,
|
|
379
|
+
minSilenceDurationMs: 150,
|
|
380
|
+
energyVad: {
|
|
381
|
+
smoothing: 0.95,
|
|
382
|
+
initialNoiseFloor: 1e-3,
|
|
383
|
+
noiseFloorAdaptRateQuiet: 0.01,
|
|
384
|
+
noiseFloorAdaptRateLoud: 1e-3,
|
|
385
|
+
minSNR: 2,
|
|
386
|
+
snrRange: 8
|
|
387
|
+
},
|
|
388
|
+
...config.vad
|
|
389
|
+
},
|
|
253
390
|
output: {
|
|
254
391
|
speechGain: 1,
|
|
255
392
|
silenceGain: 0,
|
|
256
|
-
|
|
393
|
+
// Full mute for voice-only
|
|
394
|
+
gainRampTime: 0.015,
|
|
395
|
+
// Fast but smooth transitions
|
|
396
|
+
smoothTransitions: true,
|
|
397
|
+
maxGainDb: 6,
|
|
398
|
+
enableCompression: false,
|
|
399
|
+
compression: {
|
|
400
|
+
threshold: -24,
|
|
401
|
+
ratio: 3,
|
|
402
|
+
attack: 3e-3,
|
|
403
|
+
release: 0.05
|
|
404
|
+
},
|
|
257
405
|
...config.output
|
|
258
406
|
},
|
|
259
407
|
livekit: { manageTrackMute: false, ...config.livekit }
|
|
260
408
|
};
|
|
409
|
+
console.log("Audio pipeline config:", {
|
|
410
|
+
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
411
|
+
vad: fullConfig.vad?.enabled,
|
|
412
|
+
output: fullConfig.output
|
|
413
|
+
});
|
|
414
|
+
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
415
|
+
throw new Error(
|
|
416
|
+
"createAudioPipeline requires a valid audio MediaStreamTrack"
|
|
417
|
+
);
|
|
418
|
+
}
|
|
419
|
+
if (sourceTrack.readyState === "ended") {
|
|
420
|
+
throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
|
|
421
|
+
}
|
|
261
422
|
const sourceStream = new MediaStream([sourceTrack]);
|
|
262
423
|
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
);
|
|
266
|
-
const nsNode = await nsPlugin.createNode(
|
|
267
|
-
context,
|
|
268
|
-
fullConfig.noiseSuppression
|
|
269
|
-
);
|
|
270
|
-
const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
271
|
-
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
424
|
+
let nsNode;
|
|
425
|
+
let vadNode;
|
|
272
426
|
const emitter = (0, import_mitt.default)();
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
427
|
+
try {
|
|
428
|
+
const nsPlugin = getNoiseSuppressionPlugin(
|
|
429
|
+
fullConfig.noiseSuppression?.pluginName
|
|
430
|
+
);
|
|
431
|
+
nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
|
|
432
|
+
} catch (error) {
|
|
433
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
434
|
+
console.error("Failed to create noise suppression node:", err);
|
|
435
|
+
emitter.emit("error", err);
|
|
436
|
+
throw err;
|
|
437
|
+
}
|
|
438
|
+
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
439
|
+
try {
|
|
440
|
+
const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
441
|
+
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
442
|
+
try {
|
|
443
|
+
const timestamp = context.currentTime * 1e3;
|
|
444
|
+
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
445
|
+
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
446
|
+
emitter.emit("vadChange", newState);
|
|
447
|
+
lastVadState = newState;
|
|
448
|
+
updateGain(newState);
|
|
449
|
+
}
|
|
450
|
+
} catch (vadError) {
|
|
451
|
+
const err = vadError instanceof Error ? vadError : new Error(String(vadError));
|
|
452
|
+
console.error("Error in VAD callback:", err);
|
|
453
|
+
emitter.emit("error", err);
|
|
283
454
|
}
|
|
284
|
-
}
|
|
285
|
-
)
|
|
455
|
+
});
|
|
456
|
+
} catch (error) {
|
|
457
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
458
|
+
console.error("Failed to create VAD node:", err);
|
|
459
|
+
emitter.emit("error", err);
|
|
460
|
+
throw err;
|
|
461
|
+
}
|
|
286
462
|
let lastVadState = {
|
|
287
463
|
isSpeaking: false,
|
|
288
464
|
probability: 0,
|
|
@@ -293,39 +469,176 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
293
469
|
nsNode.connect(splitter);
|
|
294
470
|
splitter.connect(vadNode);
|
|
295
471
|
const delayNode = context.createDelay(1);
|
|
296
|
-
const preRollSeconds = (fullConfig.vad?.preRollMs ??
|
|
472
|
+
const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
|
|
297
473
|
delayNode.delayTime.value = preRollSeconds;
|
|
298
474
|
const gainNode = context.createGain();
|
|
299
475
|
gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
|
|
476
|
+
let compressor = null;
|
|
477
|
+
if (fullConfig.output?.enableCompression) {
|
|
478
|
+
compressor = context.createDynamicsCompressor();
|
|
479
|
+
const comp = fullConfig.output.compression;
|
|
480
|
+
compressor.threshold.value = comp.threshold ?? -24;
|
|
481
|
+
compressor.ratio.value = comp.ratio ?? 3;
|
|
482
|
+
compressor.attack.value = comp.attack ?? 3e-3;
|
|
483
|
+
compressor.release.value = comp.release ?? 0.05;
|
|
484
|
+
compressor.knee.value = 10;
|
|
485
|
+
}
|
|
300
486
|
const destination = context.createMediaStreamDestination();
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
487
|
+
try {
|
|
488
|
+
splitter.connect(delayNode);
|
|
489
|
+
delayNode.connect(gainNode);
|
|
490
|
+
if (compressor) {
|
|
491
|
+
gainNode.connect(compressor);
|
|
492
|
+
compressor.connect(destination);
|
|
493
|
+
console.log("Compression enabled:", fullConfig.output?.compression);
|
|
494
|
+
} else {
|
|
495
|
+
gainNode.connect(destination);
|
|
496
|
+
}
|
|
497
|
+
} catch (error) {
|
|
498
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
499
|
+
console.error("Failed to wire audio pipeline:", err);
|
|
500
|
+
emitter.emit("error", err);
|
|
501
|
+
throw err;
|
|
502
|
+
}
|
|
304
503
|
function updateGain(state) {
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
504
|
+
try {
|
|
505
|
+
const {
|
|
506
|
+
speechGain = 1,
|
|
507
|
+
silenceGain = 0,
|
|
508
|
+
gainRampTime = 0.015,
|
|
509
|
+
smoothTransitions = true,
|
|
510
|
+
maxGainDb = 6
|
|
511
|
+
} = fullConfig.output;
|
|
512
|
+
const maxGainLinear = Math.pow(10, maxGainDb / 20);
|
|
513
|
+
const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
|
|
514
|
+
const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
|
|
515
|
+
const now = context.currentTime;
|
|
516
|
+
if (smoothTransitions) {
|
|
517
|
+
gainNode.gain.cancelScheduledValues(now);
|
|
518
|
+
gainNode.gain.setValueAtTime(gainNode.gain.value, now);
|
|
519
|
+
gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
|
|
520
|
+
} else {
|
|
521
|
+
gainNode.gain.setValueAtTime(targetGain, now);
|
|
522
|
+
}
|
|
523
|
+
} catch (error) {
|
|
524
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
525
|
+
console.error("Failed to update gain:", err);
|
|
526
|
+
emitter.emit("error", err);
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
const audioTracks = destination.stream.getAudioTracks();
|
|
530
|
+
console.log("Destination stream tracks:", {
|
|
531
|
+
count: audioTracks.length,
|
|
532
|
+
tracks: audioTracks.map((t) => ({
|
|
533
|
+
id: t.id,
|
|
534
|
+
label: t.label,
|
|
535
|
+
enabled: t.enabled,
|
|
536
|
+
readyState: t.readyState
|
|
537
|
+
}))
|
|
538
|
+
});
|
|
539
|
+
if (audioTracks.length === 0) {
|
|
540
|
+
const err = new Error(
|
|
541
|
+
"Failed to create processed audio track: destination stream has no audio tracks. This may indicate an issue with the audio graph connection."
|
|
542
|
+
);
|
|
543
|
+
console.error(err);
|
|
544
|
+
emitter.emit("error", err);
|
|
545
|
+
throw err;
|
|
546
|
+
}
|
|
547
|
+
const processedTrack = audioTracks[0];
|
|
548
|
+
if (!processedTrack || processedTrack.readyState === "ended") {
|
|
549
|
+
const err = new Error("Processed audio track is invalid or ended");
|
|
550
|
+
console.error(err);
|
|
551
|
+
emitter.emit("error", err);
|
|
552
|
+
throw err;
|
|
309
553
|
}
|
|
554
|
+
console.log("Audio pipeline created successfully:", {
|
|
555
|
+
sourceTrack: {
|
|
556
|
+
id: sourceTrack.id,
|
|
557
|
+
label: sourceTrack.label,
|
|
558
|
+
readyState: sourceTrack.readyState
|
|
559
|
+
},
|
|
560
|
+
processedTrack: {
|
|
561
|
+
id: processedTrack.id,
|
|
562
|
+
label: processedTrack.label,
|
|
563
|
+
readyState: processedTrack.readyState
|
|
564
|
+
},
|
|
565
|
+
config: {
|
|
566
|
+
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
567
|
+
vad: fullConfig.vad?.enabled
|
|
568
|
+
}
|
|
569
|
+
});
|
|
310
570
|
function dispose() {
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
571
|
+
try {
|
|
572
|
+
sourceNode.disconnect();
|
|
573
|
+
nsNode.disconnect();
|
|
574
|
+
splitter.disconnect();
|
|
575
|
+
vadNode.disconnect();
|
|
576
|
+
delayNode.disconnect();
|
|
577
|
+
gainNode.disconnect();
|
|
578
|
+
if (compressor) {
|
|
579
|
+
compressor.disconnect();
|
|
580
|
+
}
|
|
581
|
+
destination.stream.getTracks().forEach((t) => t.stop());
|
|
582
|
+
unregisterPipeline();
|
|
583
|
+
} catch (error) {
|
|
584
|
+
console.error("Error during pipeline disposal:", error);
|
|
585
|
+
}
|
|
319
586
|
}
|
|
320
587
|
return {
|
|
321
|
-
processedTrack
|
|
588
|
+
processedTrack,
|
|
322
589
|
events: emitter,
|
|
323
590
|
get state() {
|
|
324
591
|
return lastVadState;
|
|
325
592
|
},
|
|
326
593
|
setConfig: (newConfig) => {
|
|
327
|
-
|
|
328
|
-
|
|
594
|
+
try {
|
|
595
|
+
if (newConfig.vad) {
|
|
596
|
+
vadStateMachine.updateConfig(newConfig.vad);
|
|
597
|
+
Object.assign(fullConfig.vad, newConfig.vad);
|
|
598
|
+
if (newConfig.vad.preRollMs !== void 0) {
|
|
599
|
+
const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
|
|
600
|
+
delayNode.delayTime.setValueAtTime(
|
|
601
|
+
preRollSeconds2,
|
|
602
|
+
context.currentTime
|
|
603
|
+
);
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
if (newConfig.output) {
|
|
607
|
+
Object.assign(fullConfig.output, newConfig.output);
|
|
608
|
+
updateGain(lastVadState);
|
|
609
|
+
if (compressor && newConfig.output.compression) {
|
|
610
|
+
const comp = newConfig.output.compression;
|
|
611
|
+
if (comp.threshold !== void 0) {
|
|
612
|
+
compressor.threshold.setValueAtTime(
|
|
613
|
+
comp.threshold,
|
|
614
|
+
context.currentTime
|
|
615
|
+
);
|
|
616
|
+
}
|
|
617
|
+
if (comp.ratio !== void 0) {
|
|
618
|
+
compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
|
|
619
|
+
}
|
|
620
|
+
if (comp.attack !== void 0) {
|
|
621
|
+
compressor.attack.setValueAtTime(
|
|
622
|
+
comp.attack,
|
|
623
|
+
context.currentTime
|
|
624
|
+
);
|
|
625
|
+
}
|
|
626
|
+
if (comp.release !== void 0) {
|
|
627
|
+
compressor.release.setValueAtTime(
|
|
628
|
+
comp.release,
|
|
629
|
+
context.currentTime
|
|
630
|
+
);
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
if (newConfig.livekit) {
|
|
635
|
+
Object.assign(fullConfig.livekit, newConfig.livekit);
|
|
636
|
+
}
|
|
637
|
+
console.log("Pipeline config updated:", newConfig);
|
|
638
|
+
} catch (error) {
|
|
639
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
640
|
+
console.error("Failed to update config:", err);
|
|
641
|
+
emitter.emit("error", err);
|
|
329
642
|
}
|
|
330
643
|
},
|
|
331
644
|
dispose
|
|
@@ -334,31 +647,84 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
334
647
|
|
|
335
648
|
// src/livekit/integration.ts
|
|
336
649
|
async function attachProcessingToTrack(track, config = {}) {
|
|
650
|
+
if (!track) {
|
|
651
|
+
throw new Error("attachProcessingToTrack requires a valid LocalAudioTrack");
|
|
652
|
+
}
|
|
337
653
|
const originalTrack = track.mediaStreamTrack;
|
|
338
|
-
|
|
339
|
-
|
|
654
|
+
if (!originalTrack) {
|
|
655
|
+
throw new Error("LocalAudioTrack has no underlying MediaStreamTrack");
|
|
656
|
+
}
|
|
657
|
+
if (originalTrack.readyState === "ended") {
|
|
658
|
+
throw new Error("Cannot attach processing to an ended MediaStreamTrack");
|
|
659
|
+
}
|
|
660
|
+
let pipeline;
|
|
661
|
+
try {
|
|
662
|
+
console.log("Creating audio processing pipeline...");
|
|
663
|
+
pipeline = await createAudioPipeline(originalTrack, config);
|
|
664
|
+
console.log("Audio processing pipeline created successfully");
|
|
665
|
+
} catch (error) {
|
|
666
|
+
const err = new Error(
|
|
667
|
+
`Failed to create audio pipeline: ${error instanceof Error ? error.message : String(error)}`
|
|
668
|
+
);
|
|
669
|
+
console.error(err);
|
|
670
|
+
throw err;
|
|
671
|
+
}
|
|
672
|
+
if (!pipeline.processedTrack) {
|
|
673
|
+
throw new Error("Pipeline did not return a processed track");
|
|
674
|
+
}
|
|
675
|
+
try {
|
|
676
|
+
console.log("Replacing LiveKit track with processed track...");
|
|
677
|
+
await track.replaceTrack(pipeline.processedTrack);
|
|
678
|
+
console.log("LiveKit track replaced successfully");
|
|
679
|
+
} catch (error) {
|
|
680
|
+
pipeline.dispose();
|
|
681
|
+
const err = new Error(
|
|
682
|
+
`Failed to replace LiveKit track: ${error instanceof Error ? error.message : String(error)}`
|
|
683
|
+
);
|
|
684
|
+
console.error(err);
|
|
685
|
+
throw err;
|
|
686
|
+
}
|
|
340
687
|
if (config.livekit?.manageTrackMute) {
|
|
341
688
|
let isVadMuted = false;
|
|
342
689
|
pipeline.events.on("vadChange", async (state) => {
|
|
343
|
-
|
|
344
|
-
if (
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
690
|
+
try {
|
|
691
|
+
if (state.isSpeaking) {
|
|
692
|
+
if (isVadMuted) {
|
|
693
|
+
await track.unmute();
|
|
694
|
+
isVadMuted = false;
|
|
695
|
+
}
|
|
696
|
+
} else {
|
|
697
|
+
if (!track.isMuted) {
|
|
698
|
+
await track.mute();
|
|
699
|
+
isVadMuted = true;
|
|
700
|
+
}
|
|
352
701
|
}
|
|
702
|
+
} catch (error) {
|
|
703
|
+
console.error("Error handling VAD-based track muting:", error);
|
|
353
704
|
}
|
|
354
705
|
});
|
|
355
706
|
}
|
|
707
|
+
pipeline.events.on("error", (error) => {
|
|
708
|
+
console.error("Audio pipeline error:", error);
|
|
709
|
+
});
|
|
356
710
|
const originalDispose = pipeline.dispose;
|
|
357
711
|
pipeline.dispose = () => {
|
|
358
|
-
|
|
359
|
-
|
|
712
|
+
try {
|
|
713
|
+
if (originalTrack.readyState === "live") {
|
|
714
|
+
console.log("Restoring original track...");
|
|
715
|
+
track.replaceTrack(originalTrack).catch((error) => {
|
|
716
|
+
console.error("Failed to restore original track:", error);
|
|
717
|
+
});
|
|
718
|
+
}
|
|
719
|
+
originalDispose();
|
|
720
|
+
} catch (error) {
|
|
721
|
+
console.error("Error during pipeline disposal:", error);
|
|
722
|
+
try {
|
|
723
|
+
originalDispose();
|
|
724
|
+
} catch (disposeError) {
|
|
725
|
+
console.error("Error calling original dispose:", disposeError);
|
|
726
|
+
}
|
|
360
727
|
}
|
|
361
|
-
originalDispose();
|
|
362
728
|
};
|
|
363
729
|
return pipeline;
|
|
364
730
|
}
|