@tensamin/audio 0.1.14 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -231
- package/dist/chunk-6BJ4XGSA.mjs +80 -0
- package/dist/chunk-AQ5RVY33.mjs +74 -0
- package/dist/chunk-IS37FHDN.mjs +33 -0
- package/dist/chunk-K4J3UUOR.mjs +178 -0
- package/dist/chunk-QNQK6QFB.mjs +71 -0
- package/dist/context/audio-context.d.mts +0 -24
- package/dist/context/audio-context.d.ts +0 -24
- package/dist/index.d.mts +2 -8
- package/dist/index.d.ts +2 -8
- package/dist/index.js +285 -680
- package/dist/index.mjs +8 -43
- package/dist/livekit/integration.d.mts +3 -7
- package/dist/livekit/integration.d.ts +3 -7
- package/dist/livekit/integration.js +280 -626
- package/dist/livekit/integration.mjs +7 -8
- package/dist/noise-suppression/deepfilternet-node.d.mts +12 -0
- package/dist/noise-suppression/deepfilternet-node.d.ts +12 -0
- package/dist/noise-suppression/deepfilternet-node.js +57 -0
- package/dist/noise-suppression/deepfilternet-node.mjs +6 -0
- package/dist/pipeline/audio-pipeline.d.mts +2 -2
- package/dist/pipeline/audio-pipeline.d.ts +2 -2
- package/dist/pipeline/audio-pipeline.js +219 -554
- package/dist/pipeline/audio-pipeline.mjs +4 -5
- package/dist/types.d.mts +42 -257
- package/dist/types.d.ts +42 -257
- package/dist/vad/vad-node.d.mts +7 -9
- package/dist/vad/vad-node.d.ts +7 -9
- package/dist/vad/vad-node.js +47 -156
- package/dist/vad/vad-node.mjs +3 -3
- package/dist/vad/vad-state.d.mts +9 -11
- package/dist/vad/vad-state.d.ts +9 -11
- package/dist/vad/vad-state.js +50 -79
- package/dist/vad/vad-state.mjs +3 -3
- package/package.json +21 -21
- package/dist/chunk-2G2JFHJY.mjs +0 -180
- package/dist/chunk-6F2HZUYO.mjs +0 -91
- package/dist/chunk-K4YLH73B.mjs +0 -103
- package/dist/chunk-R5M2DGAQ.mjs +0 -311
- package/dist/chunk-UFKIAMG3.mjs +0 -47
- package/dist/chunk-XO6B3D4A.mjs +0 -67
- package/dist/extensibility/plugins.d.mts +0 -9
- package/dist/extensibility/plugins.d.ts +0 -9
- package/dist/extensibility/plugins.js +0 -320
- package/dist/extensibility/plugins.mjs +0 -14
- package/dist/noise-suppression/rnnoise-node.d.mts +0 -10
- package/dist/noise-suppression/rnnoise-node.d.ts +0 -10
- package/dist/noise-suppression/rnnoise-node.js +0 -101
- package/dist/noise-suppression/rnnoise-node.mjs +0 -6
package/dist/index.js
CHANGED
|
@@ -30,24 +30,16 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
30
30
|
// src/index.ts
|
|
31
31
|
var index_exports = {};
|
|
32
32
|
__export(index_exports, {
|
|
33
|
-
|
|
34
|
-
RNNoisePlugin: () => RNNoisePlugin,
|
|
35
|
-
VADStateMachine: () => VADStateMachine,
|
|
36
|
-
attachProcessingToTrack: () => attachProcessingToTrack,
|
|
37
|
-
closeAudioContext: () => closeAudioContext,
|
|
38
|
-
createAudioPipeline: () => createAudioPipeline,
|
|
39
|
-
getAudioContext: () => getAudioContext,
|
|
40
|
-
getNoiseSuppressionPlugin: () => getNoiseSuppressionPlugin,
|
|
41
|
-
getVADPlugin: () => getVADPlugin,
|
|
42
|
-
registerNoiseSuppressionPlugin: () => registerNoiseSuppressionPlugin,
|
|
43
|
-
registerPipeline: () => registerPipeline,
|
|
44
|
-
registerVADPlugin: () => registerVADPlugin,
|
|
45
|
-
resumeAudioContext: () => resumeAudioContext,
|
|
46
|
-
suspendAudioContext: () => suspendAudioContext,
|
|
47
|
-
unregisterPipeline: () => unregisterPipeline
|
|
33
|
+
attachSpeakingDetectionToTrack: () => attachSpeakingDetectionToTrack
|
|
48
34
|
});
|
|
49
35
|
module.exports = __toCommonJS(index_exports);
|
|
50
36
|
|
|
37
|
+
// src/livekit/integration.ts
|
|
38
|
+
var import_mitt2 = require("mitt");
|
|
39
|
+
|
|
40
|
+
// src/pipeline/audio-pipeline.ts
|
|
41
|
+
var import_mitt = __toESM(require("mitt"));
|
|
42
|
+
|
|
51
43
|
// src/context/audio-context.ts
|
|
52
44
|
var sharedContext = null;
|
|
53
45
|
var activePipelines = 0;
|
|
@@ -68,398 +60,172 @@ function registerPipeline() {
|
|
|
68
60
|
function unregisterPipeline() {
|
|
69
61
|
activePipelines = Math.max(0, activePipelines - 1);
|
|
70
62
|
}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
63
|
+
|
|
64
|
+
// src/noise-suppression/deepfilternet-node.ts
|
|
65
|
+
var import_deepfilternet3_noise_filter = require("deepfilternet3-noise-filter");
|
|
66
|
+
async function createDeepFilterNet3Node(context, config) {
|
|
67
|
+
const processorConfig = {
|
|
68
|
+
sampleRate: context.sampleRate,
|
|
69
|
+
noiseReductionLevel: config?.noiseReductionLevel ?? 60
|
|
70
|
+
};
|
|
71
|
+
if (config?.assetConfig) {
|
|
72
|
+
processorConfig.assetConfig = config.assetConfig;
|
|
79
73
|
}
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
74
|
+
const processor = new import_deepfilternet3_noise_filter.DeepFilterNet3Processor(processorConfig);
|
|
75
|
+
await processor.initialize();
|
|
76
|
+
const node = await processor.createAudioWorkletNode(context);
|
|
77
|
+
const enabled = config?.enabled ?? true;
|
|
78
|
+
if (!enabled) {
|
|
79
|
+
processor.setNoiseSuppressionEnabled(false);
|
|
84
80
|
}
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
var RNNoisePlugin = class {
|
|
94
|
-
name = "rnnoise-ns";
|
|
95
|
-
wasmBuffer = null;
|
|
96
|
-
async createNode(context, config) {
|
|
97
|
-
const { loadRnnoise, RnnoiseWorkletNode } = await import("@sapphi-red/web-noise-suppressor");
|
|
98
|
-
if (!config?.enabled) {
|
|
99
|
-
console.log("Noise suppression disabled, using passthrough node");
|
|
100
|
-
const pass = context.createGain();
|
|
101
|
-
return pass;
|
|
102
|
-
}
|
|
103
|
-
if (!config?.wasmUrl || !config?.simdUrl || !config?.workletUrl) {
|
|
104
|
-
const error = new Error(
|
|
105
|
-
`RNNoisePlugin requires 'wasmUrl', 'simdUrl', and 'workletUrl' to be configured. Please download the assets from @sapphi-red/web-noise-suppressor and provide the URLs in the config. Current config: wasmUrl=${config?.wasmUrl}, simdUrl=${config?.simdUrl}, workletUrl=${config?.workletUrl}
|
|
106
|
-
To disable noise suppression, set noiseSuppression.enabled to false.`
|
|
107
|
-
);
|
|
108
|
-
console.error(error.message);
|
|
109
|
-
throw error;
|
|
110
|
-
}
|
|
111
|
-
try {
|
|
112
|
-
if (!this.wasmBuffer) {
|
|
113
|
-
console.log("Loading RNNoise WASM binary...");
|
|
114
|
-
this.wasmBuffer = await loadRnnoise({
|
|
115
|
-
url: config.wasmUrl,
|
|
116
|
-
simdUrl: config.simdUrl
|
|
117
|
-
});
|
|
118
|
-
console.log("RNNoise WASM loaded successfully");
|
|
81
|
+
return {
|
|
82
|
+
node,
|
|
83
|
+
processor,
|
|
84
|
+
dispose: () => {
|
|
85
|
+
try {
|
|
86
|
+
processor.destroy();
|
|
87
|
+
} catch (error) {
|
|
88
|
+
console.error("Failed to dispose DeepFilterNet3 processor", error);
|
|
119
89
|
}
|
|
120
|
-
} catch (error) {
|
|
121
|
-
const err = new Error(
|
|
122
|
-
`Failed to load RNNoise WASM binary: ${error instanceof Error ? error.message : String(error)}`
|
|
123
|
-
);
|
|
124
|
-
console.error(err);
|
|
125
|
-
throw err;
|
|
126
|
-
}
|
|
127
|
-
const workletUrl = config.workletUrl;
|
|
128
|
-
try {
|
|
129
|
-
await context.audioWorklet.addModule(workletUrl);
|
|
130
|
-
console.log("RNNoise worklet loaded successfully");
|
|
131
|
-
} catch (e) {
|
|
132
|
-
const error = new Error(
|
|
133
|
-
`Failed to load RNNoise worklet from ${workletUrl}: ${e instanceof Error ? e.message : String(e)}. Ensure the workletUrl points to a valid RNNoise worklet script.`
|
|
134
|
-
);
|
|
135
|
-
console.error(error.message);
|
|
136
|
-
throw error;
|
|
137
90
|
}
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
wasmBinary: this.wasmBuffer,
|
|
141
|
-
maxChannels: 1
|
|
142
|
-
// Mono for now
|
|
143
|
-
});
|
|
144
|
-
console.log("RNNoise worklet node created successfully");
|
|
145
|
-
return node;
|
|
146
|
-
} catch (error) {
|
|
147
|
-
const err = new Error(
|
|
148
|
-
`Failed to create RNNoise worklet node: ${error instanceof Error ? error.message : String(error)}`
|
|
149
|
-
);
|
|
150
|
-
console.error(err);
|
|
151
|
-
throw err;
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
};
|
|
91
|
+
};
|
|
92
|
+
}
|
|
155
93
|
|
|
156
94
|
// src/vad/vad-node.ts
|
|
157
|
-
|
|
158
|
-
const energyParams = vadConfig?.energyVad || {};
|
|
159
|
-
const smoothing = energyParams.smoothing ?? 0.95;
|
|
160
|
-
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
161
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 2e-3;
|
|
162
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.02;
|
|
163
|
-
const minSNR = energyParams.minSNR ?? 12;
|
|
164
|
-
const snrRange = energyParams.snrRange ?? 10;
|
|
165
|
-
const minEnergy = energyParams.minEnergy ?? 3e-3;
|
|
95
|
+
function createLevelDetectorWorkletCode(smoothing) {
|
|
166
96
|
return `
|
|
167
|
-
class
|
|
97
|
+
class LevelDetectorProcessor extends AudioWorkletProcessor {
|
|
168
98
|
constructor() {
|
|
169
99
|
super();
|
|
100
|
+
this.smoothed = 0;
|
|
170
101
|
this.smoothing = ${smoothing};
|
|
171
|
-
this.energy = 0;
|
|
172
|
-
this.noiseFloor = ${initialNoiseFloor};
|
|
173
|
-
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
174
|
-
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
175
|
-
this.minSNR = ${minSNR};
|
|
176
|
-
this.snrRange = ${snrRange};
|
|
177
|
-
this.minEnergy = ${minEnergy};
|
|
178
|
-
this.isSpeaking = false;
|
|
179
|
-
|
|
180
|
-
this.port.onmessage = (event) => {
|
|
181
|
-
if (event.data && event.data.isSpeaking !== undefined) {
|
|
182
|
-
this.isSpeaking = event.data.isSpeaking;
|
|
183
|
-
}
|
|
184
|
-
};
|
|
185
102
|
}
|
|
186
103
|
|
|
187
|
-
process(inputs
|
|
104
|
+
process(inputs) {
|
|
188
105
|
const input = inputs[0];
|
|
189
|
-
if (!input ||
|
|
106
|
+
if (!input || input.length === 0) return true;
|
|
190
107
|
const channel = input[0];
|
|
191
|
-
|
|
192
|
-
|
|
108
|
+
if (!channel || channel.length === 0) return true;
|
|
109
|
+
|
|
193
110
|
let sum = 0;
|
|
194
|
-
let peak = 0;
|
|
195
111
|
for (let i = 0; i < channel.length; i++) {
|
|
196
|
-
const sample =
|
|
197
|
-
sum +=
|
|
198
|
-
peak = Math.max(peak, sample);
|
|
199
|
-
}
|
|
200
|
-
const instantRms = Math.sqrt(sum / channel.length);
|
|
201
|
-
|
|
202
|
-
// Smooth the RMS energy to reduce jitter
|
|
203
|
-
// this.energy acts as the smoothed RMS value
|
|
204
|
-
this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
|
|
205
|
-
|
|
206
|
-
// Calculate Crest Factor (peak-to-RMS ratio)
|
|
207
|
-
// Voice typically has crest factor of 2-4 (6-12dB)
|
|
208
|
-
// Keyboard clicks have crest factor of 10-30+ (20-30dB)
|
|
209
|
-
const crestFactor = peak / (instantRms + 1e-10);
|
|
210
|
-
const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
|
|
211
|
-
|
|
212
|
-
// Adaptive noise floor estimation using SMOOTHED energy (not instantaneous)
|
|
213
|
-
// This prevents sharp transients from affecting the noise floor
|
|
214
|
-
if (this.energy < this.noiseFloor) {
|
|
215
|
-
// Signal is quieter than noise floor, adapt downwards slowly
|
|
216
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + this.energy * this.noiseFloorAdaptRateQuiet;
|
|
217
|
-
} else {
|
|
218
|
-
// Calculate SNR based on smoothed energy
|
|
219
|
-
const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
|
|
220
|
-
const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
|
|
221
|
-
|
|
222
|
-
// Only adapt upwards if:
|
|
223
|
-
// 1. SNR is low (< 10dB) - likely just background noise
|
|
224
|
-
// 2. AND crest factor is low (< 15dB) - not a sharp transient
|
|
225
|
-
if (smoothedSnrDb < 10 && crestFactorDb < 15) {
|
|
226
|
-
// This is persistent background noise, adapt upwards
|
|
227
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
|
|
228
|
-
} else {
|
|
229
|
-
// Either high SNR (speech) or high crest factor (click) - adapt very slowly
|
|
230
|
-
const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
|
|
231
|
-
this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
|
|
232
|
-
}
|
|
112
|
+
const sample = channel[i];
|
|
113
|
+
sum += sample * sample;
|
|
233
114
|
}
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
// SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
239
|
-
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
240
|
-
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
241
|
-
|
|
242
|
-
// Map SNR dB to probability (0-1)
|
|
243
|
-
// Probability is 0 when snrDb <= minSNR
|
|
244
|
-
// Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
|
|
245
|
-
let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
|
|
246
|
-
|
|
247
|
-
// Apply absolute energy threshold with soft knee
|
|
248
|
-
if (this.energy < this.minEnergy) {
|
|
249
|
-
const energyRatio = this.energy / (this.minEnergy + 1e-6);
|
|
250
|
-
probability *= Math.pow(energyRatio, 2);
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
// Apply crest factor penalty
|
|
254
|
-
// Reject signals with high crest factor (sharp transients like keyboard clicks)
|
|
255
|
-
// Voice: 6-12dB, Keyboard: 20-30dB
|
|
256
|
-
// We penalize anything above 14dB
|
|
257
|
-
if (crestFactorDb > 14) {
|
|
258
|
-
const excess = crestFactorDb - 14;
|
|
259
|
-
const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
|
|
260
|
-
probability *= penalty;
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
|
|
264
|
-
|
|
115
|
+
const rms = Math.sqrt(sum / channel.length);
|
|
116
|
+
this.smoothed = this.smoothed * this.smoothing + rms * (1 - this.smoothing);
|
|
117
|
+
const levelDb = 20 * Math.log10(Math.max(1e-8, this.smoothed));
|
|
118
|
+
this.port.postMessage({ levelDb });
|
|
265
119
|
return true;
|
|
266
120
|
}
|
|
267
121
|
}
|
|
268
|
-
|
|
122
|
+
|
|
123
|
+
registerProcessor('level-detector-processor', LevelDetectorProcessor);
|
|
269
124
|
`;
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
}
|
|
280
|
-
const workletCode = createEnergyVadWorkletCode(config);
|
|
281
|
-
const blob = new Blob([workletCode], {
|
|
282
|
-
type: "application/javascript"
|
|
283
|
-
});
|
|
284
|
-
const url = URL.createObjectURL(blob);
|
|
285
|
-
try {
|
|
286
|
-
await context.audioWorklet.addModule(url);
|
|
287
|
-
console.log("Energy VAD worklet loaded successfully");
|
|
288
|
-
} catch (e) {
|
|
289
|
-
const error = new Error(
|
|
290
|
-
`Failed to load Energy VAD worklet: ${e instanceof Error ? e.message : String(e)}`
|
|
291
|
-
);
|
|
292
|
-
console.error(error.message);
|
|
293
|
-
URL.revokeObjectURL(url);
|
|
294
|
-
throw error;
|
|
295
|
-
}
|
|
125
|
+
}
|
|
126
|
+
async function createLevelDetectorNode(context, onLevel, options) {
|
|
127
|
+
const smoothing = options?.smoothing ?? 0.9;
|
|
128
|
+
const workletCode = createLevelDetectorWorkletCode(smoothing);
|
|
129
|
+
const blob = new Blob([workletCode], { type: "application/javascript" });
|
|
130
|
+
const url = URL.createObjectURL(blob);
|
|
131
|
+
try {
|
|
132
|
+
await context.audioWorklet.addModule(url);
|
|
133
|
+
} finally {
|
|
296
134
|
URL.revokeObjectURL(url);
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
);
|
|
306
|
-
console.error(error.message);
|
|
307
|
-
throw error;
|
|
135
|
+
}
|
|
136
|
+
const node = new AudioWorkletNode(context, "level-detector-processor", {
|
|
137
|
+
numberOfInputs: 1,
|
|
138
|
+
numberOfOutputs: 0
|
|
139
|
+
});
|
|
140
|
+
node.port.onmessage = (event) => {
|
|
141
|
+
const { levelDb } = event.data ?? {};
|
|
142
|
+
if (typeof levelDb === "number" && !Number.isNaN(levelDb)) {
|
|
143
|
+
onLevel(levelDb);
|
|
308
144
|
}
|
|
309
|
-
|
|
145
|
+
};
|
|
146
|
+
node.port.onmessageerror = (event) => {
|
|
147
|
+
console.error("Level detector port error", event);
|
|
148
|
+
};
|
|
149
|
+
return {
|
|
150
|
+
node,
|
|
151
|
+
dispose: () => {
|
|
310
152
|
try {
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
onDecision(probability);
|
|
314
|
-
} else {
|
|
315
|
-
console.warn("Invalid VAD probability received:", event.data);
|
|
316
|
-
}
|
|
153
|
+
node.port.onmessage = null;
|
|
154
|
+
node.port.close();
|
|
317
155
|
} catch (error) {
|
|
318
|
-
console.error("
|
|
156
|
+
console.error("Failed to dispose level detector node", error);
|
|
319
157
|
}
|
|
320
|
-
};
|
|
321
|
-
node.port.onmessageerror = (event) => {
|
|
322
|
-
console.error("VAD port message error:", event);
|
|
323
|
-
};
|
|
324
|
-
return node;
|
|
325
|
-
}
|
|
326
|
-
updateSpeakingState(isSpeaking) {
|
|
327
|
-
if (this.workletNode) {
|
|
328
|
-
this.workletNode.port.postMessage({ isSpeaking });
|
|
329
158
|
}
|
|
330
|
-
}
|
|
331
|
-
};
|
|
332
|
-
|
|
333
|
-
// src/extensibility/plugins.ts
|
|
334
|
-
var nsPlugins = /* @__PURE__ */ new Map();
|
|
335
|
-
var vadPlugins = /* @__PURE__ */ new Map();
|
|
336
|
-
var defaultNs = new RNNoisePlugin();
|
|
337
|
-
nsPlugins.set(defaultNs.name, defaultNs);
|
|
338
|
-
var defaultVad = new EnergyVADPlugin();
|
|
339
|
-
vadPlugins.set(defaultVad.name, defaultVad);
|
|
340
|
-
function registerNoiseSuppressionPlugin(plugin) {
|
|
341
|
-
nsPlugins.set(plugin.name, plugin);
|
|
342
|
-
}
|
|
343
|
-
function registerVADPlugin(plugin) {
|
|
344
|
-
vadPlugins.set(plugin.name, plugin);
|
|
345
|
-
}
|
|
346
|
-
function getNoiseSuppressionPlugin(name) {
|
|
347
|
-
if (!name) return defaultNs;
|
|
348
|
-
const plugin = nsPlugins.get(name);
|
|
349
|
-
if (!plugin) {
|
|
350
|
-
console.warn(
|
|
351
|
-
`Noise suppression plugin '${name}' not found, falling back to default.`
|
|
352
|
-
);
|
|
353
|
-
return defaultNs;
|
|
354
|
-
}
|
|
355
|
-
return plugin;
|
|
356
|
-
}
|
|
357
|
-
function getVADPlugin(name) {
|
|
358
|
-
if (!name) return defaultVad;
|
|
359
|
-
const plugin = vadPlugins.get(name);
|
|
360
|
-
if (!plugin) {
|
|
361
|
-
console.warn(`VAD plugin '${name}' not found, falling back to default.`);
|
|
362
|
-
return defaultVad;
|
|
363
|
-
}
|
|
364
|
-
return plugin;
|
|
159
|
+
};
|
|
365
160
|
}
|
|
366
161
|
|
|
367
162
|
// src/vad/vad-state.ts
|
|
368
|
-
var
|
|
163
|
+
var LevelBasedVAD = class {
|
|
369
164
|
config;
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
lastSilenceTime = 0;
|
|
374
|
-
frameDurationMs = 20;
|
|
375
|
-
// Assumed frame duration, updated by calls
|
|
165
|
+
speaking = false;
|
|
166
|
+
pendingSpeechSince = null;
|
|
167
|
+
pendingSilenceSince = null;
|
|
376
168
|
constructor(config) {
|
|
377
169
|
this.config = {
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
hangoverMs: config?.hangoverMs ?? 300,
|
|
386
|
-
// Smooth for natural speech
|
|
387
|
-
preRollMs: config?.preRollMs ?? 250,
|
|
388
|
-
// Generous pre-roll
|
|
389
|
-
minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
|
|
390
|
-
// Aggressive transient rejection
|
|
391
|
-
minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
|
|
392
|
-
energyVad: {
|
|
393
|
-
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
394
|
-
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
395
|
-
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 2e-3,
|
|
396
|
-
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.02,
|
|
397
|
-
minSNR: config?.energyVad?.minSNR ?? 12,
|
|
398
|
-
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
399
|
-
minEnergy: config?.energyVad?.minEnergy ?? 3e-3
|
|
400
|
-
}
|
|
170
|
+
minDb: config.minDb,
|
|
171
|
+
maxDb: config.maxDb,
|
|
172
|
+
speakOnRatio: config.speakOnRatio ?? 0.6,
|
|
173
|
+
speakOffRatio: config.speakOffRatio ?? 0.3,
|
|
174
|
+
hangoverMs: config.hangoverMs ?? 350,
|
|
175
|
+
attackMs: config.attackMs ?? 50,
|
|
176
|
+
releaseMs: config.releaseMs ?? 120
|
|
401
177
|
};
|
|
402
|
-
this.lastSilenceTime = Date.now();
|
|
403
178
|
}
|
|
404
179
|
updateConfig(config) {
|
|
405
|
-
this.config = {
|
|
180
|
+
this.config = {
|
|
181
|
+
...this.config,
|
|
182
|
+
...config,
|
|
183
|
+
speakOnRatio: config.speakOnRatio ?? this.config.speakOnRatio,
|
|
184
|
+
speakOffRatio: config.speakOffRatio ?? this.config.speakOffRatio,
|
|
185
|
+
hangoverMs: config.hangoverMs ?? this.config.hangoverMs,
|
|
186
|
+
attackMs: config.attackMs ?? this.config.attackMs,
|
|
187
|
+
releaseMs: config.releaseMs ?? this.config.releaseMs
|
|
188
|
+
};
|
|
406
189
|
}
|
|
407
|
-
|
|
190
|
+
process(levelDb, timestampMs) {
|
|
408
191
|
const {
|
|
409
|
-
|
|
410
|
-
|
|
192
|
+
minDb,
|
|
193
|
+
maxDb,
|
|
194
|
+
speakOnRatio,
|
|
195
|
+
speakOffRatio,
|
|
411
196
|
hangoverMs,
|
|
412
|
-
|
|
413
|
-
|
|
197
|
+
attackMs,
|
|
198
|
+
releaseMs
|
|
414
199
|
} = this.config;
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
this.
|
|
422
|
-
this.
|
|
423
|
-
|
|
424
|
-
newState = "silent";
|
|
200
|
+
const clamped = Math.min(maxDb, Math.max(minDb, levelDb));
|
|
201
|
+
const norm = (clamped - minDb) / Math.max(1, maxDb - minDb);
|
|
202
|
+
if (!this.speaking) {
|
|
203
|
+
if (norm >= speakOnRatio) {
|
|
204
|
+
this.pendingSpeechSince = this.pendingSpeechSince ?? timestampMs;
|
|
205
|
+
if (timestampMs - this.pendingSpeechSince >= attackMs) {
|
|
206
|
+
this.speaking = true;
|
|
207
|
+
this.pendingSpeechSince = null;
|
|
208
|
+
this.pendingSilenceSince = null;
|
|
425
209
|
}
|
|
426
210
|
} else {
|
|
427
|
-
|
|
428
|
-
this.lastSilenceTime = timestamp;
|
|
211
|
+
this.pendingSpeechSince = null;
|
|
429
212
|
}
|
|
430
|
-
} else
|
|
431
|
-
if (
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
213
|
+
} else {
|
|
214
|
+
if (norm <= speakOffRatio) {
|
|
215
|
+
this.pendingSilenceSince = this.pendingSilenceSince ?? timestampMs;
|
|
216
|
+
const releaseWindow = Math.max(releaseMs, hangoverMs);
|
|
217
|
+
if (timestampMs - this.pendingSilenceSince >= releaseWindow) {
|
|
218
|
+
this.speaking = false;
|
|
219
|
+
this.pendingSilenceSince = null;
|
|
220
|
+
this.pendingSpeechSince = null;
|
|
437
221
|
}
|
|
438
|
-
this.lastSpeechTime = timestamp;
|
|
439
222
|
} else {
|
|
440
|
-
|
|
441
|
-
this.lastSilenceTime = timestamp;
|
|
442
|
-
}
|
|
443
|
-
} else if (this.currentState === "speaking") {
|
|
444
|
-
if (probability >= stopThreshold) {
|
|
445
|
-
newState = "speaking";
|
|
446
|
-
this.lastSpeechTime = timestamp;
|
|
447
|
-
} else {
|
|
448
|
-
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
449
|
-
if (timeSinceSpeech < hangoverMs) {
|
|
450
|
-
newState = "speaking";
|
|
451
|
-
} else {
|
|
452
|
-
newState = "speech_ending";
|
|
453
|
-
this.lastSilenceTime = timestamp;
|
|
454
|
-
}
|
|
223
|
+
this.pendingSilenceSince = null;
|
|
455
224
|
}
|
|
456
225
|
}
|
|
457
|
-
if (newState === "speech_ending") newState = "silent";
|
|
458
|
-
this.currentState = newState;
|
|
459
226
|
return {
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
state: newState
|
|
227
|
+
speaking: this.speaking,
|
|
228
|
+
levelDb: clamped
|
|
463
229
|
};
|
|
464
230
|
}
|
|
465
231
|
};
|
|
@@ -468,58 +234,33 @@ var VADStateMachine = class {
|
|
|
468
234
|
async function createAudioPipeline(sourceTrack, config = {}) {
|
|
469
235
|
const context = getAudioContext();
|
|
470
236
|
registerPipeline();
|
|
471
|
-
const
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
237
|
+
const nsConfig = {
|
|
238
|
+
enabled: config.noiseSuppression?.enabled ?? true,
|
|
239
|
+
noiseReductionLevel: config.noiseSuppression?.noiseReductionLevel ?? 60
|
|
240
|
+
};
|
|
241
|
+
if (config.noiseSuppression?.assetConfig) {
|
|
242
|
+
nsConfig.assetConfig = config.noiseSuppression.assetConfig;
|
|
243
|
+
}
|
|
475
244
|
const fullConfig = {
|
|
476
|
-
noiseSuppression:
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
hangoverMs: 400,
|
|
486
|
-
preRollMs: 250,
|
|
487
|
-
minSpeechDurationMs: 100,
|
|
488
|
-
minSilenceDurationMs: 150,
|
|
489
|
-
energyVad: {
|
|
490
|
-
smoothing: 0.95,
|
|
491
|
-
initialNoiseFloor: 1e-3,
|
|
492
|
-
noiseFloorAdaptRateQuiet: 0.01,
|
|
493
|
-
noiseFloorAdaptRateLoud: 1e-3,
|
|
494
|
-
minSNR: 2,
|
|
495
|
-
snrRange: 8
|
|
496
|
-
},
|
|
497
|
-
...config.vad
|
|
245
|
+
noiseSuppression: nsConfig,
|
|
246
|
+
speaking: {
|
|
247
|
+
minDb: config.speaking?.minDb ?? -60,
|
|
248
|
+
maxDb: config.speaking?.maxDb ?? -20,
|
|
249
|
+
speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
|
|
250
|
+
speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
|
|
251
|
+
hangoverMs: config.speaking?.hangoverMs ?? 350,
|
|
252
|
+
attackMs: config.speaking?.attackMs ?? 50,
|
|
253
|
+
releaseMs: config.speaking?.releaseMs ?? 120
|
|
498
254
|
},
|
|
499
255
|
output: {
|
|
500
|
-
speechGain: 1,
|
|
501
|
-
silenceGain: 0,
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
smoothTransitions: true,
|
|
506
|
-
maxGainDb: 6,
|
|
507
|
-
enableCompression: false,
|
|
508
|
-
compression: {
|
|
509
|
-
threshold: -24,
|
|
510
|
-
ratio: 3,
|
|
511
|
-
attack: 3e-3,
|
|
512
|
-
release: 0.05
|
|
513
|
-
},
|
|
514
|
-
...config.output
|
|
256
|
+
speechGain: config.output?.speechGain ?? 1,
|
|
257
|
+
silenceGain: config.output?.silenceGain ?? 0,
|
|
258
|
+
gainRampTime: config.output?.gainRampTime ?? 0.015,
|
|
259
|
+
maxGainDb: config.output?.maxGainDb ?? 6,
|
|
260
|
+
smoothTransitions: config.output?.smoothTransitions ?? true
|
|
515
261
|
},
|
|
516
|
-
|
|
262
|
+
muteWhenSilent: config.muteWhenSilent ?? false
|
|
517
263
|
};
|
|
518
|
-
console.log("Audio pipeline config:", {
|
|
519
|
-
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
520
|
-
vad: fullConfig.vad?.enabled,
|
|
521
|
-
output: fullConfig.output
|
|
522
|
-
});
|
|
523
264
|
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
524
265
|
throw new Error(
|
|
525
266
|
"createAudioPipeline requires a valid audio MediaStreamTrack"
|
|
@@ -530,332 +271,196 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
530
271
|
}
|
|
531
272
|
const sourceStream = new MediaStream([sourceTrack]);
|
|
532
273
|
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
533
|
-
let nsNode;
|
|
534
|
-
let vadNode;
|
|
535
274
|
const emitter = (0, import_mitt.default)();
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
try {
|
|
553
|
-
const timestamp = context.currentTime * 1e3;
|
|
554
|
-
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
555
|
-
if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
|
|
556
|
-
vadPlugin.updateSpeakingState(newState.isSpeaking);
|
|
557
|
-
}
|
|
558
|
-
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
559
|
-
emitter.emit("vadChange", newState);
|
|
560
|
-
lastVadState = newState;
|
|
561
|
-
updateGain(newState);
|
|
562
|
-
}
|
|
563
|
-
} catch (vadError) {
|
|
564
|
-
const err = vadError instanceof Error ? vadError : new Error(String(vadError));
|
|
565
|
-
console.error("Error in VAD callback:", err);
|
|
566
|
-
emitter.emit("error", err);
|
|
275
|
+
const vad = new LevelBasedVAD(fullConfig.speaking);
|
|
276
|
+
let lastState = { speaking: false, levelDb: -Infinity };
|
|
277
|
+
const nsHandle = await createDeepFilterNet3Node(
|
|
278
|
+
context,
|
|
279
|
+
fullConfig.noiseSuppression
|
|
280
|
+
);
|
|
281
|
+
const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
|
|
282
|
+
try {
|
|
283
|
+
const timestamp = context.currentTime * 1e3;
|
|
284
|
+
const nextState = vad.process(levelDb, timestamp);
|
|
285
|
+
const speakingChanged = nextState.speaking !== lastState.speaking;
|
|
286
|
+
const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
|
|
287
|
+
if (speakingChanged || levelChanged) {
|
|
288
|
+
lastState = nextState;
|
|
289
|
+
updateGain(nextState);
|
|
290
|
+
emitter.emit("speakingChange", nextState);
|
|
567
291
|
}
|
|
568
|
-
})
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
throw err;
|
|
574
|
-
}
|
|
575
|
-
let lastVadState = {
|
|
576
|
-
isSpeaking: false,
|
|
577
|
-
probability: 0,
|
|
578
|
-
state: "silent"
|
|
579
|
-
};
|
|
292
|
+
} catch (error) {
|
|
293
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
294
|
+
emitter.emit("error", err);
|
|
295
|
+
}
|
|
296
|
+
});
|
|
580
297
|
const splitter = context.createGain();
|
|
581
|
-
sourceNode.connect(
|
|
582
|
-
|
|
583
|
-
splitter.connect(
|
|
584
|
-
const delayNode = context.createDelay(1);
|
|
585
|
-
const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
|
|
586
|
-
delayNode.delayTime.value = preRollSeconds;
|
|
298
|
+
sourceNode.connect(nsHandle.node);
|
|
299
|
+
nsHandle.node.connect(splitter);
|
|
300
|
+
splitter.connect(levelHandle.node);
|
|
587
301
|
const gainNode = context.createGain();
|
|
588
302
|
gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
|
|
589
|
-
|
|
590
|
-
if (fullConfig.output?.enableCompression) {
|
|
591
|
-
compressor = context.createDynamicsCompressor();
|
|
592
|
-
const comp = fullConfig.output.compression;
|
|
593
|
-
compressor.threshold.value = comp.threshold ?? -24;
|
|
594
|
-
compressor.ratio.value = comp.ratio ?? 3;
|
|
595
|
-
compressor.attack.value = comp.attack ?? 3e-3;
|
|
596
|
-
compressor.release.value = comp.release ?? 0.05;
|
|
597
|
-
compressor.knee.value = 10;
|
|
598
|
-
}
|
|
303
|
+
splitter.connect(gainNode);
|
|
599
304
|
const destination = context.createMediaStreamDestination();
|
|
600
|
-
|
|
601
|
-
splitter.connect(delayNode);
|
|
602
|
-
delayNode.connect(gainNode);
|
|
603
|
-
if (compressor) {
|
|
604
|
-
gainNode.connect(compressor);
|
|
605
|
-
compressor.connect(destination);
|
|
606
|
-
console.log("Compression enabled:", fullConfig.output?.compression);
|
|
607
|
-
} else {
|
|
608
|
-
gainNode.connect(destination);
|
|
609
|
-
}
|
|
610
|
-
} catch (error) {
|
|
611
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
612
|
-
console.error("Failed to wire audio pipeline:", err);
|
|
613
|
-
emitter.emit("error", err);
|
|
614
|
-
throw err;
|
|
615
|
-
}
|
|
305
|
+
gainNode.connect(destination);
|
|
616
306
|
function updateGain(state) {
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
gainNode.gain.setValueAtTime(targetGain, now);
|
|
635
|
-
}
|
|
636
|
-
} catch (error) {
|
|
637
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
638
|
-
console.error("Failed to update gain:", err);
|
|
639
|
-
emitter.emit("error", err);
|
|
307
|
+
const {
|
|
308
|
+
speechGain = 1,
|
|
309
|
+
silenceGain = 0,
|
|
310
|
+
gainRampTime = 0.015,
|
|
311
|
+
smoothTransitions = true,
|
|
312
|
+
maxGainDb = 6
|
|
313
|
+
} = fullConfig.output ?? {};
|
|
314
|
+
const maxGainLinear = Math.pow(10, maxGainDb / 20);
|
|
315
|
+
const limitedSpeechGain = Math.min(speechGain ?? 1, maxGainLinear);
|
|
316
|
+
const target = state.speaking ? limitedSpeechGain : silenceGain ?? 0;
|
|
317
|
+
const now = context.currentTime;
|
|
318
|
+
gainNode.gain.cancelScheduledValues(now);
|
|
319
|
+
gainNode.gain.setValueAtTime(gainNode.gain.value, now);
|
|
320
|
+
if (smoothTransitions) {
|
|
321
|
+
gainNode.gain.setTargetAtTime(target, now, gainRampTime / 3);
|
|
322
|
+
} else {
|
|
323
|
+
gainNode.gain.setValueAtTime(target, now);
|
|
640
324
|
}
|
|
641
325
|
}
|
|
642
326
|
const audioTracks = destination.stream.getAudioTracks();
|
|
643
|
-
console.log("Destination stream tracks:", {
|
|
644
|
-
count: audioTracks.length,
|
|
645
|
-
tracks: audioTracks.map((t) => ({
|
|
646
|
-
id: t.id,
|
|
647
|
-
label: t.label,
|
|
648
|
-
enabled: t.enabled,
|
|
649
|
-
readyState: t.readyState
|
|
650
|
-
}))
|
|
651
|
-
});
|
|
652
327
|
if (audioTracks.length === 0) {
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
);
|
|
656
|
-
|
|
657
|
-
emitter.emit("error", err);
|
|
658
|
-
throw err;
|
|
328
|
+
nsHandle.dispose();
|
|
329
|
+
levelHandle.dispose();
|
|
330
|
+
unregisterPipeline();
|
|
331
|
+
throw new Error("Failed to create processed audio track");
|
|
659
332
|
}
|
|
660
333
|
const processedTrack = audioTracks[0];
|
|
661
|
-
if (!processedTrack || processedTrack.readyState === "ended") {
|
|
662
|
-
const err = new Error("Processed audio track is invalid or ended");
|
|
663
|
-
console.error(err);
|
|
664
|
-
emitter.emit("error", err);
|
|
665
|
-
throw err;
|
|
666
|
-
}
|
|
667
|
-
console.log("Audio pipeline created successfully:", {
|
|
668
|
-
sourceTrack: {
|
|
669
|
-
id: sourceTrack.id,
|
|
670
|
-
label: sourceTrack.label,
|
|
671
|
-
readyState: sourceTrack.readyState
|
|
672
|
-
},
|
|
673
|
-
processedTrack: {
|
|
674
|
-
id: processedTrack.id,
|
|
675
|
-
label: processedTrack.label,
|
|
676
|
-
readyState: processedTrack.readyState
|
|
677
|
-
},
|
|
678
|
-
config: {
|
|
679
|
-
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
680
|
-
vad: fullConfig.vad?.enabled
|
|
681
|
-
}
|
|
682
|
-
});
|
|
683
334
|
function dispose() {
|
|
684
335
|
try {
|
|
685
336
|
sourceNode.disconnect();
|
|
686
|
-
|
|
337
|
+
nsHandle.node.disconnect();
|
|
687
338
|
splitter.disconnect();
|
|
688
|
-
|
|
689
|
-
delayNode.disconnect();
|
|
339
|
+
levelHandle.node.disconnect();
|
|
690
340
|
gainNode.disconnect();
|
|
691
|
-
if (compressor) {
|
|
692
|
-
compressor.disconnect();
|
|
693
|
-
}
|
|
694
341
|
destination.stream.getTracks().forEach((t) => t.stop());
|
|
695
|
-
|
|
342
|
+
levelHandle.dispose();
|
|
343
|
+
nsHandle.dispose();
|
|
696
344
|
} catch (error) {
|
|
697
|
-
console.error("Error during pipeline disposal
|
|
345
|
+
console.error("Error during pipeline disposal", error);
|
|
346
|
+
} finally {
|
|
347
|
+
unregisterPipeline();
|
|
698
348
|
}
|
|
699
349
|
}
|
|
700
|
-
|
|
350
|
+
const handle = {
|
|
701
351
|
processedTrack,
|
|
702
352
|
events: emitter,
|
|
703
353
|
get state() {
|
|
704
|
-
return
|
|
354
|
+
return lastState;
|
|
705
355
|
},
|
|
706
|
-
setConfig: (
|
|
356
|
+
setConfig: (next) => {
|
|
707
357
|
try {
|
|
708
|
-
if (
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
context.currentTime
|
|
716
|
-
);
|
|
717
|
-
}
|
|
358
|
+
if (next.speaking) {
|
|
359
|
+
vad.updateConfig(next.speaking);
|
|
360
|
+
fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
|
|
361
|
+
}
|
|
362
|
+
if (next.output) {
|
|
363
|
+
fullConfig.output = { ...fullConfig.output, ...next.output };
|
|
364
|
+
updateGain(lastState);
|
|
718
365
|
}
|
|
719
|
-
if (
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
if (comp.ratio !== void 0) {
|
|
731
|
-
compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
|
|
732
|
-
}
|
|
733
|
-
if (comp.attack !== void 0) {
|
|
734
|
-
compressor.attack.setValueAtTime(
|
|
735
|
-
comp.attack,
|
|
736
|
-
context.currentTime
|
|
737
|
-
);
|
|
738
|
-
}
|
|
739
|
-
if (comp.release !== void 0) {
|
|
740
|
-
compressor.release.setValueAtTime(
|
|
741
|
-
comp.release,
|
|
742
|
-
context.currentTime
|
|
743
|
-
);
|
|
744
|
-
}
|
|
366
|
+
if (next.noiseSuppression) {
|
|
367
|
+
const ns = next.noiseSuppression;
|
|
368
|
+
fullConfig.noiseSuppression = {
|
|
369
|
+
...fullConfig.noiseSuppression,
|
|
370
|
+
...ns
|
|
371
|
+
};
|
|
372
|
+
if (typeof ns.noiseReductionLevel === "number") {
|
|
373
|
+
nsHandle.processor.setSuppressionLevel(ns.noiseReductionLevel);
|
|
374
|
+
}
|
|
375
|
+
if (typeof ns.enabled === "boolean") {
|
|
376
|
+
nsHandle.processor.setNoiseSuppressionEnabled(ns.enabled);
|
|
745
377
|
}
|
|
746
378
|
}
|
|
747
|
-
if (
|
|
748
|
-
|
|
379
|
+
if (typeof next.muteWhenSilent === "boolean") {
|
|
380
|
+
fullConfig.muteWhenSilent = next.muteWhenSilent;
|
|
749
381
|
}
|
|
750
|
-
console.log("Pipeline config updated:", newConfig);
|
|
751
382
|
} catch (error) {
|
|
752
383
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
753
|
-
console.error("Failed to update config:", err);
|
|
754
384
|
emitter.emit("error", err);
|
|
755
385
|
}
|
|
756
386
|
},
|
|
757
387
|
dispose
|
|
758
388
|
};
|
|
389
|
+
return handle;
|
|
759
390
|
}
|
|
760
391
|
|
|
761
392
|
// src/livekit/integration.ts
|
|
762
|
-
async function
|
|
393
|
+
async function attachSpeakingDetectionToTrack(track, options = {}) {
|
|
763
394
|
if (!track) {
|
|
764
|
-
throw new Error(
|
|
765
|
-
|
|
766
|
-
const originalTrack = track.mediaStreamTrack;
|
|
767
|
-
if (!originalTrack) {
|
|
768
|
-
throw new Error("LocalAudioTrack has no underlying MediaStreamTrack");
|
|
769
|
-
}
|
|
770
|
-
if (originalTrack.readyState === "ended") {
|
|
771
|
-
throw new Error("Cannot attach processing to an ended MediaStreamTrack");
|
|
772
|
-
}
|
|
773
|
-
let pipeline;
|
|
774
|
-
try {
|
|
775
|
-
console.log("Creating audio processing pipeline...");
|
|
776
|
-
pipeline = await createAudioPipeline(originalTrack, config);
|
|
777
|
-
console.log("Audio processing pipeline created successfully");
|
|
778
|
-
} catch (error) {
|
|
779
|
-
const err = new Error(
|
|
780
|
-
`Failed to create audio pipeline: ${error instanceof Error ? error.message : String(error)}`
|
|
781
|
-
);
|
|
782
|
-
console.error(err);
|
|
783
|
-
throw err;
|
|
784
|
-
}
|
|
785
|
-
if (!pipeline.processedTrack) {
|
|
786
|
-
throw new Error("Pipeline did not return a processed track");
|
|
787
|
-
}
|
|
788
|
-
try {
|
|
789
|
-
console.log("Replacing LiveKit track with processed track...");
|
|
790
|
-
await track.replaceTrack(pipeline.processedTrack);
|
|
791
|
-
console.log("LiveKit track replaced successfully");
|
|
792
|
-
} catch (error) {
|
|
793
|
-
pipeline.dispose();
|
|
794
|
-
const err = new Error(
|
|
795
|
-
`Failed to replace LiveKit track: ${error instanceof Error ? error.message : String(error)}`
|
|
395
|
+
throw new Error(
|
|
396
|
+
"attachSpeakingDetectionToTrack requires a valid LocalAudioTrack"
|
|
796
397
|
);
|
|
797
|
-
console.error(err);
|
|
798
|
-
throw err;
|
|
799
398
|
}
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
console.error("Error handling VAD-based track muting:", error);
|
|
399
|
+
const originalTrack = track.mediaStreamTrack;
|
|
400
|
+
if (!originalTrack || originalTrack.readyState === "ended") {
|
|
401
|
+
throw new Error("LocalAudioTrack has no live MediaStreamTrack to process");
|
|
402
|
+
}
|
|
403
|
+
const pipeline = await createAudioPipeline(originalTrack, options);
|
|
404
|
+
await track.replaceTrack(pipeline.processedTrack);
|
|
405
|
+
const listeners = /* @__PURE__ */ new Set();
|
|
406
|
+
let mutedByController = false;
|
|
407
|
+
let currentState = pipeline.state;
|
|
408
|
+
const speakingHandler = (state) => {
|
|
409
|
+
currentState = state;
|
|
410
|
+
listeners.forEach((listener) => listener(state));
|
|
411
|
+
if (options.muteWhenSilent) {
|
|
412
|
+
if (!state.speaking && !track.isMuted) {
|
|
413
|
+
track.mute().catch((error) => console.error("mute failed", error));
|
|
414
|
+
mutedByController = true;
|
|
817
415
|
}
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
pipeline.
|
|
825
|
-
|
|
416
|
+
if (state.speaking && mutedByController) {
|
|
417
|
+
track.unmute().catch((error) => console.error("unmute failed", error));
|
|
418
|
+
mutedByController = false;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
};
|
|
422
|
+
pipeline.events.on("speakingChange", speakingHandler);
|
|
423
|
+
const errorHandler = (error) => {
|
|
424
|
+
console.error("Audio pipeline error", error);
|
|
425
|
+
};
|
|
426
|
+
pipeline.events.on("error", errorHandler);
|
|
427
|
+
const controller = {
|
|
428
|
+
get speaking() {
|
|
429
|
+
return currentState.speaking;
|
|
430
|
+
},
|
|
431
|
+
get levelDb() {
|
|
432
|
+
return currentState.levelDb;
|
|
433
|
+
},
|
|
434
|
+
onChange: (listener) => {
|
|
435
|
+
listeners.add(listener);
|
|
436
|
+
listener(currentState);
|
|
437
|
+
return () => listeners.delete(listener);
|
|
438
|
+
},
|
|
439
|
+
setConfig: (config) => {
|
|
440
|
+
pipeline.setConfig(config);
|
|
441
|
+
if (typeof config.muteWhenSilent === "boolean") {
|
|
442
|
+
options.muteWhenSilent = config.muteWhenSilent;
|
|
443
|
+
}
|
|
444
|
+
},
|
|
445
|
+
dispose: () => {
|
|
446
|
+
pipeline.events.off("speakingChange", speakingHandler);
|
|
447
|
+
pipeline.events.off("error", errorHandler);
|
|
448
|
+
listeners.clear();
|
|
449
|
+
if (mutedByController && !track.isMuted) {
|
|
450
|
+
track.unmute().catch((error) => console.error("unmute failed", error));
|
|
451
|
+
mutedByController = false;
|
|
452
|
+
}
|
|
453
|
+
pipeline.dispose();
|
|
826
454
|
if (originalTrack.readyState === "live") {
|
|
827
|
-
console.log("Restoring original track...");
|
|
828
455
|
track.replaceTrack(originalTrack).catch((error) => {
|
|
829
|
-
console.error("Failed to restore original track
|
|
456
|
+
console.error("Failed to restore original track", error);
|
|
830
457
|
});
|
|
831
458
|
}
|
|
832
|
-
originalDispose();
|
|
833
|
-
} catch (error) {
|
|
834
|
-
console.error("Error during pipeline disposal:", error);
|
|
835
|
-
try {
|
|
836
|
-
originalDispose();
|
|
837
|
-
} catch (disposeError) {
|
|
838
|
-
console.error("Error calling original dispose:", disposeError);
|
|
839
|
-
}
|
|
840
459
|
}
|
|
841
460
|
};
|
|
842
|
-
return
|
|
461
|
+
return controller;
|
|
843
462
|
}
|
|
844
463
|
// Annotate the CommonJS export names for ESM import in node:
|
|
845
464
|
0 && (module.exports = {
|
|
846
|
-
|
|
847
|
-
RNNoisePlugin,
|
|
848
|
-
VADStateMachine,
|
|
849
|
-
attachProcessingToTrack,
|
|
850
|
-
closeAudioContext,
|
|
851
|
-
createAudioPipeline,
|
|
852
|
-
getAudioContext,
|
|
853
|
-
getNoiseSuppressionPlugin,
|
|
854
|
-
getVADPlugin,
|
|
855
|
-
registerNoiseSuppressionPlugin,
|
|
856
|
-
registerPipeline,
|
|
857
|
-
registerVADPlugin,
|
|
858
|
-
resumeAudioContext,
|
|
859
|
-
suspendAudioContext,
|
|
860
|
-
unregisterPipeline
|
|
465
|
+
attachSpeakingDetectionToTrack
|
|
861
466
|
});
|