@tensamin/audio 0.1.14 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +48 -231
  2. package/dist/chunk-6BJ4XGSA.mjs +80 -0
  3. package/dist/chunk-AQ5RVY33.mjs +74 -0
  4. package/dist/chunk-IS37FHDN.mjs +33 -0
  5. package/dist/chunk-K4J3UUOR.mjs +178 -0
  6. package/dist/chunk-QNQK6QFB.mjs +71 -0
  7. package/dist/context/audio-context.d.mts +0 -24
  8. package/dist/context/audio-context.d.ts +0 -24
  9. package/dist/index.d.mts +2 -8
  10. package/dist/index.d.ts +2 -8
  11. package/dist/index.js +285 -680
  12. package/dist/index.mjs +8 -43
  13. package/dist/livekit/integration.d.mts +3 -7
  14. package/dist/livekit/integration.d.ts +3 -7
  15. package/dist/livekit/integration.js +280 -626
  16. package/dist/livekit/integration.mjs +7 -8
  17. package/dist/noise-suppression/deepfilternet-node.d.mts +12 -0
  18. package/dist/noise-suppression/deepfilternet-node.d.ts +12 -0
  19. package/dist/noise-suppression/deepfilternet-node.js +57 -0
  20. package/dist/noise-suppression/deepfilternet-node.mjs +6 -0
  21. package/dist/pipeline/audio-pipeline.d.mts +2 -2
  22. package/dist/pipeline/audio-pipeline.d.ts +2 -2
  23. package/dist/pipeline/audio-pipeline.js +219 -554
  24. package/dist/pipeline/audio-pipeline.mjs +4 -5
  25. package/dist/types.d.mts +42 -257
  26. package/dist/types.d.ts +42 -257
  27. package/dist/vad/vad-node.d.mts +7 -9
  28. package/dist/vad/vad-node.d.ts +7 -9
  29. package/dist/vad/vad-node.js +47 -156
  30. package/dist/vad/vad-node.mjs +3 -3
  31. package/dist/vad/vad-state.d.mts +9 -11
  32. package/dist/vad/vad-state.d.ts +9 -11
  33. package/dist/vad/vad-state.js +50 -79
  34. package/dist/vad/vad-state.mjs +3 -3
  35. package/package.json +21 -21
  36. package/dist/chunk-2G2JFHJY.mjs +0 -180
  37. package/dist/chunk-6F2HZUYO.mjs +0 -91
  38. package/dist/chunk-K4YLH73B.mjs +0 -103
  39. package/dist/chunk-R5M2DGAQ.mjs +0 -311
  40. package/dist/chunk-UFKIAMG3.mjs +0 -47
  41. package/dist/chunk-XO6B3D4A.mjs +0 -67
  42. package/dist/extensibility/plugins.d.mts +0 -9
  43. package/dist/extensibility/plugins.d.ts +0 -9
  44. package/dist/extensibility/plugins.js +0 -320
  45. package/dist/extensibility/plugins.mjs +0 -14
  46. package/dist/noise-suppression/rnnoise-node.d.mts +0 -10
  47. package/dist/noise-suppression/rnnoise-node.d.ts +0 -10
  48. package/dist/noise-suppression/rnnoise-node.js +0 -101
  49. package/dist/noise-suppression/rnnoise-node.mjs +0 -6
@@ -56,371 +56,171 @@ function unregisterPipeline() {
56
56
  activePipelines = Math.max(0, activePipelines - 1);
57
57
  }
58
58
 
59
- // src/noise-suppression/rnnoise-node.ts
60
- var RNNoisePlugin = class {
61
- name = "rnnoise-ns";
62
- wasmBuffer = null;
63
- async createNode(context, config) {
64
- const { loadRnnoise, RnnoiseWorkletNode } = await import("@sapphi-red/web-noise-suppressor");
65
- if (!config?.enabled) {
66
- console.log("Noise suppression disabled, using passthrough node");
67
- const pass = context.createGain();
68
- return pass;
69
- }
70
- if (!config?.wasmUrl || !config?.simdUrl || !config?.workletUrl) {
71
- const error = new Error(
72
- `RNNoisePlugin requires 'wasmUrl', 'simdUrl', and 'workletUrl' to be configured. Please download the assets from @sapphi-red/web-noise-suppressor and provide the URLs in the config. Current config: wasmUrl=${config?.wasmUrl}, simdUrl=${config?.simdUrl}, workletUrl=${config?.workletUrl}
73
- To disable noise suppression, set noiseSuppression.enabled to false.`
74
- );
75
- console.error(error.message);
76
- throw error;
77
- }
78
- try {
79
- if (!this.wasmBuffer) {
80
- console.log("Loading RNNoise WASM binary...");
81
- this.wasmBuffer = await loadRnnoise({
82
- url: config.wasmUrl,
83
- simdUrl: config.simdUrl
84
- });
85
- console.log("RNNoise WASM loaded successfully");
59
+ // src/noise-suppression/deepfilternet-node.ts
60
+ var import_deepfilternet3_noise_filter = require("deepfilternet3-noise-filter");
61
+ async function createDeepFilterNet3Node(context, config) {
62
+ const processorConfig = {
63
+ sampleRate: context.sampleRate,
64
+ noiseReductionLevel: config?.noiseReductionLevel ?? 60
65
+ };
66
+ if (config?.assetConfig) {
67
+ processorConfig.assetConfig = config.assetConfig;
68
+ }
69
+ const processor = new import_deepfilternet3_noise_filter.DeepFilterNet3Processor(processorConfig);
70
+ await processor.initialize();
71
+ const node = await processor.createAudioWorkletNode(context);
72
+ const enabled = config?.enabled ?? true;
73
+ if (!enabled) {
74
+ processor.setNoiseSuppressionEnabled(false);
75
+ }
76
+ return {
77
+ node,
78
+ processor,
79
+ dispose: () => {
80
+ try {
81
+ processor.destroy();
82
+ } catch (error) {
83
+ console.error("Failed to dispose DeepFilterNet3 processor", error);
86
84
  }
87
- } catch (error) {
88
- const err = new Error(
89
- `Failed to load RNNoise WASM binary: ${error instanceof Error ? error.message : String(error)}`
90
- );
91
- console.error(err);
92
- throw err;
93
- }
94
- const workletUrl = config.workletUrl;
95
- try {
96
- await context.audioWorklet.addModule(workletUrl);
97
- console.log("RNNoise worklet loaded successfully");
98
- } catch (e) {
99
- const error = new Error(
100
- `Failed to load RNNoise worklet from ${workletUrl}: ${e instanceof Error ? e.message : String(e)}. Ensure the workletUrl points to a valid RNNoise worklet script.`
101
- );
102
- console.error(error.message);
103
- throw error;
104
- }
105
- try {
106
- const node = new RnnoiseWorkletNode(context, {
107
- wasmBinary: this.wasmBuffer,
108
- maxChannels: 1
109
- // Mono for now
110
- });
111
- console.log("RNNoise worklet node created successfully");
112
- return node;
113
- } catch (error) {
114
- const err = new Error(
115
- `Failed to create RNNoise worklet node: ${error instanceof Error ? error.message : String(error)}`
116
- );
117
- console.error(err);
118
- throw err;
119
85
  }
120
- }
121
- };
86
+ };
87
+ }
122
88
 
123
89
  // src/vad/vad-node.ts
124
- var createEnergyVadWorkletCode = (vadConfig) => {
125
- const energyParams = vadConfig?.energyVad || {};
126
- const smoothing = energyParams.smoothing ?? 0.95;
127
- const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
128
- const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 2e-3;
129
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.02;
130
- const minSNR = energyParams.minSNR ?? 12;
131
- const snrRange = energyParams.snrRange ?? 10;
132
- const minEnergy = energyParams.minEnergy ?? 3e-3;
90
+ function createLevelDetectorWorkletCode(smoothing) {
133
91
  return `
134
- class EnergyVadProcessor extends AudioWorkletProcessor {
92
+ class LevelDetectorProcessor extends AudioWorkletProcessor {
135
93
  constructor() {
136
94
  super();
95
+ this.smoothed = 0;
137
96
  this.smoothing = ${smoothing};
138
- this.energy = 0;
139
- this.noiseFloor = ${initialNoiseFloor};
140
- this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
141
- this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
142
- this.minSNR = ${minSNR};
143
- this.snrRange = ${snrRange};
144
- this.minEnergy = ${minEnergy};
145
- this.isSpeaking = false;
146
-
147
- this.port.onmessage = (event) => {
148
- if (event.data && event.data.isSpeaking !== undefined) {
149
- this.isSpeaking = event.data.isSpeaking;
150
- }
151
- };
152
97
  }
153
98
 
154
- process(inputs, outputs, parameters) {
99
+ process(inputs) {
155
100
  const input = inputs[0];
156
- if (!input || !input.length) return true;
101
+ if (!input || input.length === 0) return true;
157
102
  const channel = input[0];
158
-
159
- // Calculate instantaneous RMS (Root Mean Square) energy
103
+ if (!channel || channel.length === 0) return true;
104
+
160
105
  let sum = 0;
161
- let peak = 0;
162
106
  for (let i = 0; i < channel.length; i++) {
163
- const sample = Math.abs(channel[i]);
164
- sum += channel[i] * channel[i];
165
- peak = Math.max(peak, sample);
107
+ const sample = channel[i];
108
+ sum += sample * sample;
166
109
  }
167
- const instantRms = Math.sqrt(sum / channel.length);
168
-
169
- // Smooth the RMS energy to reduce jitter
170
- // this.energy acts as the smoothed RMS value
171
- this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
172
-
173
- // Calculate Crest Factor (peak-to-RMS ratio)
174
- // Voice typically has crest factor of 2-4 (6-12dB)
175
- // Keyboard clicks have crest factor of 10-30+ (20-30dB)
176
- const crestFactor = peak / (instantRms + 1e-10);
177
- const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
178
-
179
- // Adaptive noise floor estimation using SMOOTHED energy (not instantaneous)
180
- // This prevents sharp transients from affecting the noise floor
181
- if (this.energy < this.noiseFloor) {
182
- // Signal is quieter than noise floor, adapt downwards slowly
183
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + this.energy * this.noiseFloorAdaptRateQuiet;
184
- } else {
185
- // Calculate SNR based on smoothed energy
186
- const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
187
- const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
188
-
189
- // Only adapt upwards if:
190
- // 1. SNR is low (< 10dB) - likely just background noise
191
- // 2. AND crest factor is low (< 15dB) - not a sharp transient
192
- if (smoothedSnrDb < 10 && crestFactorDb < 15) {
193
- // This is persistent background noise, adapt upwards
194
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
195
- } else {
196
- // Either high SNR (speech) or high crest factor (click) - adapt very slowly
197
- const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
198
- this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
199
- }
200
- }
201
-
202
- // Ensure noise floor doesn't drop to absolute zero
203
- this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
204
-
205
- // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
206
- const snr = this.energy / (this.noiseFloor + 1e-6);
207
- const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
208
-
209
- // Map SNR dB to probability (0-1)
210
- // Probability is 0 when snrDb <= minSNR
211
- // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
212
- let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
213
-
214
- // Apply absolute energy threshold with soft knee
215
- if (this.energy < this.minEnergy) {
216
- const energyRatio = this.energy / (this.minEnergy + 1e-6);
217
- probability *= Math.pow(energyRatio, 2);
218
- }
219
-
220
- // Apply crest factor penalty
221
- // Reject signals with high crest factor (sharp transients like keyboard clicks)
222
- // Voice: 6-12dB, Keyboard: 20-30dB
223
- // We penalize anything above 14dB
224
- if (crestFactorDb > 14) {
225
- const excess = crestFactorDb - 14;
226
- const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
227
- probability *= penalty;
228
- }
229
-
230
- this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
231
-
110
+ const rms = Math.sqrt(sum / channel.length);
111
+ this.smoothed = this.smoothed * this.smoothing + rms * (1 - this.smoothing);
112
+ const levelDb = 20 * Math.log10(Math.max(1e-8, this.smoothed));
113
+ this.port.postMessage({ levelDb });
232
114
  return true;
233
115
  }
234
116
  }
235
- registerProcessor('energy-vad-processor', EnergyVadProcessor);
117
+
118
+ registerProcessor('level-detector-processor', LevelDetectorProcessor);
236
119
  `;
237
- };
238
- var EnergyVADPlugin = class {
239
- name = "energy-vad";
240
- workletNode = null;
241
- async createNode(context, config, onDecision) {
242
- if (!config?.enabled) {
243
- console.log("VAD disabled, using passthrough node");
244
- const pass = context.createGain();
245
- return pass;
246
- }
247
- const workletCode = createEnergyVadWorkletCode(config);
248
- const blob = new Blob([workletCode], {
249
- type: "application/javascript"
250
- });
251
- const url = URL.createObjectURL(blob);
252
- try {
253
- await context.audioWorklet.addModule(url);
254
- console.log("Energy VAD worklet loaded successfully");
255
- } catch (e) {
256
- const error = new Error(
257
- `Failed to load Energy VAD worklet: ${e instanceof Error ? e.message : String(e)}`
258
- );
259
- console.error(error.message);
260
- URL.revokeObjectURL(url);
261
- throw error;
262
- }
120
+ }
121
+ async function createLevelDetectorNode(context, onLevel, options) {
122
+ const smoothing = options?.smoothing ?? 0.9;
123
+ const workletCode = createLevelDetectorWorkletCode(smoothing);
124
+ const blob = new Blob([workletCode], { type: "application/javascript" });
125
+ const url = URL.createObjectURL(blob);
126
+ try {
127
+ await context.audioWorklet.addModule(url);
128
+ } finally {
263
129
  URL.revokeObjectURL(url);
264
- let node;
265
- try {
266
- node = new AudioWorkletNode(context, "energy-vad-processor");
267
- this.workletNode = node;
268
- console.log("Energy VAD node created successfully");
269
- } catch (e) {
270
- const error = new Error(
271
- `Failed to create Energy VAD node: ${e instanceof Error ? e.message : String(e)}`
272
- );
273
- console.error(error.message);
274
- throw error;
130
+ }
131
+ const node = new AudioWorkletNode(context, "level-detector-processor", {
132
+ numberOfInputs: 1,
133
+ numberOfOutputs: 0
134
+ });
135
+ node.port.onmessage = (event) => {
136
+ const { levelDb } = event.data ?? {};
137
+ if (typeof levelDb === "number" && !Number.isNaN(levelDb)) {
138
+ onLevel(levelDb);
275
139
  }
276
- node.port.onmessage = (event) => {
140
+ };
141
+ node.port.onmessageerror = (event) => {
142
+ console.error("Level detector port error", event);
143
+ };
144
+ return {
145
+ node,
146
+ dispose: () => {
277
147
  try {
278
- const { probability } = event.data;
279
- if (typeof probability === "number" && !isNaN(probability)) {
280
- onDecision(probability);
281
- } else {
282
- console.warn("Invalid VAD probability received:", event.data);
283
- }
148
+ node.port.onmessage = null;
149
+ node.port.close();
284
150
  } catch (error) {
285
- console.error("Error in VAD message handler:", error);
151
+ console.error("Failed to dispose level detector node", error);
286
152
  }
287
- };
288
- node.port.onmessageerror = (event) => {
289
- console.error("VAD port message error:", event);
290
- };
291
- return node;
292
- }
293
- updateSpeakingState(isSpeaking) {
294
- if (this.workletNode) {
295
- this.workletNode.port.postMessage({ isSpeaking });
296
153
  }
297
- }
298
- };
299
-
300
- // src/extensibility/plugins.ts
301
- var nsPlugins = /* @__PURE__ */ new Map();
302
- var vadPlugins = /* @__PURE__ */ new Map();
303
- var defaultNs = new RNNoisePlugin();
304
- nsPlugins.set(defaultNs.name, defaultNs);
305
- var defaultVad = new EnergyVADPlugin();
306
- vadPlugins.set(defaultVad.name, defaultVad);
307
- function getNoiseSuppressionPlugin(name) {
308
- if (!name) return defaultNs;
309
- const plugin = nsPlugins.get(name);
310
- if (!plugin) {
311
- console.warn(
312
- `Noise suppression plugin '${name}' not found, falling back to default.`
313
- );
314
- return defaultNs;
315
- }
316
- return plugin;
317
- }
318
- function getVADPlugin(name) {
319
- if (!name) return defaultVad;
320
- const plugin = vadPlugins.get(name);
321
- if (!plugin) {
322
- console.warn(`VAD plugin '${name}' not found, falling back to default.`);
323
- return defaultVad;
324
- }
325
- return plugin;
154
+ };
326
155
  }
327
156
 
328
157
  // src/vad/vad-state.ts
329
- var VADStateMachine = class {
158
+ var LevelBasedVAD = class {
330
159
  config;
331
- currentState = "silent";
332
- lastSpeechTime = 0;
333
- speechStartTime = 0;
334
- lastSilenceTime = 0;
335
- frameDurationMs = 20;
336
- // Assumed frame duration, updated by calls
160
+ speaking = false;
161
+ pendingSpeechSince = null;
162
+ pendingSilenceSince = null;
337
163
  constructor(config) {
338
164
  this.config = {
339
- enabled: config?.enabled ?? true,
340
- pluginName: config?.pluginName ?? "energy-vad",
341
- // Voice-optimized defaults
342
- startThreshold: config?.startThreshold ?? 0.8,
343
- // Higher threshold to avoid noise
344
- stopThreshold: config?.stopThreshold ?? 0.3,
345
- // Balanced for voice
346
- hangoverMs: config?.hangoverMs ?? 300,
347
- // Smooth for natural speech
348
- preRollMs: config?.preRollMs ?? 250,
349
- // Generous pre-roll
350
- minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
351
- // Aggressive transient rejection
352
- minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
353
- energyVad: {
354
- smoothing: config?.energyVad?.smoothing ?? 0.95,
355
- initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
356
- noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 2e-3,
357
- noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.02,
358
- minSNR: config?.energyVad?.minSNR ?? 12,
359
- snrRange: config?.energyVad?.snrRange ?? 10,
360
- minEnergy: config?.energyVad?.minEnergy ?? 3e-3
361
- }
165
+ minDb: config.minDb,
166
+ maxDb: config.maxDb,
167
+ speakOnRatio: config.speakOnRatio ?? 0.6,
168
+ speakOffRatio: config.speakOffRatio ?? 0.3,
169
+ hangoverMs: config.hangoverMs ?? 350,
170
+ attackMs: config.attackMs ?? 50,
171
+ releaseMs: config.releaseMs ?? 120
362
172
  };
363
- this.lastSilenceTime = Date.now();
364
173
  }
365
174
  updateConfig(config) {
366
- this.config = { ...this.config, ...config };
175
+ this.config = {
176
+ ...this.config,
177
+ ...config,
178
+ speakOnRatio: config.speakOnRatio ?? this.config.speakOnRatio,
179
+ speakOffRatio: config.speakOffRatio ?? this.config.speakOffRatio,
180
+ hangoverMs: config.hangoverMs ?? this.config.hangoverMs,
181
+ attackMs: config.attackMs ?? this.config.attackMs,
182
+ releaseMs: config.releaseMs ?? this.config.releaseMs
183
+ };
367
184
  }
368
- processFrame(probability, timestamp) {
185
+ process(levelDb, timestampMs) {
369
186
  const {
370
- startThreshold,
371
- stopThreshold,
187
+ minDb,
188
+ maxDb,
189
+ speakOnRatio,
190
+ speakOffRatio,
372
191
  hangoverMs,
373
- minSpeechDurationMs,
374
- minSilenceDurationMs
192
+ attackMs,
193
+ releaseMs
375
194
  } = this.config;
376
- let newState = this.currentState;
377
- if (this.currentState === "silent" || this.currentState === "speech_ending") {
378
- if (probability >= startThreshold) {
379
- const silenceDuration = timestamp - this.lastSilenceTime;
380
- if (silenceDuration >= minSilenceDurationMs) {
381
- newState = "speech_starting";
382
- this.speechStartTime = timestamp;
383
- this.lastSpeechTime = timestamp;
384
- } else {
385
- newState = "silent";
195
+ const clamped = Math.min(maxDb, Math.max(minDb, levelDb));
196
+ const norm = (clamped - minDb) / Math.max(1, maxDb - minDb);
197
+ if (!this.speaking) {
198
+ if (norm >= speakOnRatio) {
199
+ this.pendingSpeechSince = this.pendingSpeechSince ?? timestampMs;
200
+ if (timestampMs - this.pendingSpeechSince >= attackMs) {
201
+ this.speaking = true;
202
+ this.pendingSpeechSince = null;
203
+ this.pendingSilenceSince = null;
386
204
  }
387
205
  } else {
388
- newState = "silent";
389
- this.lastSilenceTime = timestamp;
206
+ this.pendingSpeechSince = null;
390
207
  }
391
- } else if (this.currentState === "speech_starting") {
392
- if (probability >= stopThreshold) {
393
- const speechDuration = timestamp - this.speechStartTime;
394
- if (speechDuration >= minSpeechDurationMs) {
395
- newState = "speaking";
396
- } else {
397
- newState = "speech_starting";
208
+ } else {
209
+ if (norm <= speakOffRatio) {
210
+ this.pendingSilenceSince = this.pendingSilenceSince ?? timestampMs;
211
+ const releaseWindow = Math.max(releaseMs, hangoverMs);
212
+ if (timestampMs - this.pendingSilenceSince >= releaseWindow) {
213
+ this.speaking = false;
214
+ this.pendingSilenceSince = null;
215
+ this.pendingSpeechSince = null;
398
216
  }
399
- this.lastSpeechTime = timestamp;
400
217
  } else {
401
- newState = "silent";
402
- this.lastSilenceTime = timestamp;
403
- }
404
- } else if (this.currentState === "speaking") {
405
- if (probability >= stopThreshold) {
406
- newState = "speaking";
407
- this.lastSpeechTime = timestamp;
408
- } else {
409
- const timeSinceSpeech = timestamp - this.lastSpeechTime;
410
- if (timeSinceSpeech < hangoverMs) {
411
- newState = "speaking";
412
- } else {
413
- newState = "speech_ending";
414
- this.lastSilenceTime = timestamp;
415
- }
218
+ this.pendingSilenceSince = null;
416
219
  }
417
220
  }
418
- if (newState === "speech_ending") newState = "silent";
419
- this.currentState = newState;
420
221
  return {
421
- isSpeaking: newState === "speaking",
422
- probability,
423
- state: newState
222
+ speaking: this.speaking,
223
+ levelDb: clamped
424
224
  };
425
225
  }
426
226
  };
@@ -429,58 +229,33 @@ var VADStateMachine = class {
429
229
  async function createAudioPipeline(sourceTrack, config = {}) {
430
230
  const context = getAudioContext();
431
231
  registerPipeline();
432
- const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
433
- config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
434
- );
435
- const vadEnabled = config.vad?.enabled !== false;
232
+ const nsConfig = {
233
+ enabled: config.noiseSuppression?.enabled ?? true,
234
+ noiseReductionLevel: config.noiseSuppression?.noiseReductionLevel ?? 60
235
+ };
236
+ if (config.noiseSuppression?.assetConfig) {
237
+ nsConfig.assetConfig = config.noiseSuppression.assetConfig;
238
+ }
436
239
  const fullConfig = {
437
- noiseSuppression: {
438
- enabled: nsEnabled,
439
- ...config.noiseSuppression
440
- },
441
- vad: {
442
- enabled: vadEnabled,
443
- // Voice-optimized defaults (will be overridden by config)
444
- startThreshold: 0.6,
445
- stopThreshold: 0.45,
446
- hangoverMs: 400,
447
- preRollMs: 250,
448
- minSpeechDurationMs: 100,
449
- minSilenceDurationMs: 150,
450
- energyVad: {
451
- smoothing: 0.95,
452
- initialNoiseFloor: 1e-3,
453
- noiseFloorAdaptRateQuiet: 0.01,
454
- noiseFloorAdaptRateLoud: 1e-3,
455
- minSNR: 2,
456
- snrRange: 8
457
- },
458
- ...config.vad
240
+ noiseSuppression: nsConfig,
241
+ speaking: {
242
+ minDb: config.speaking?.minDb ?? -60,
243
+ maxDb: config.speaking?.maxDb ?? -20,
244
+ speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
245
+ speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
246
+ hangoverMs: config.speaking?.hangoverMs ?? 350,
247
+ attackMs: config.speaking?.attackMs ?? 50,
248
+ releaseMs: config.speaking?.releaseMs ?? 120
459
249
  },
460
250
  output: {
461
- speechGain: 1,
462
- silenceGain: 0,
463
- // Full mute for voice-only
464
- gainRampTime: 0.015,
465
- // Fast but smooth transitions
466
- smoothTransitions: true,
467
- maxGainDb: 6,
468
- enableCompression: false,
469
- compression: {
470
- threshold: -24,
471
- ratio: 3,
472
- attack: 3e-3,
473
- release: 0.05
474
- },
475
- ...config.output
251
+ speechGain: config.output?.speechGain ?? 1,
252
+ silenceGain: config.output?.silenceGain ?? 0,
253
+ gainRampTime: config.output?.gainRampTime ?? 0.015,
254
+ maxGainDb: config.output?.maxGainDb ?? 6,
255
+ smoothTransitions: config.output?.smoothTransitions ?? true
476
256
  },
477
- livekit: { manageTrackMute: false, ...config.livekit }
257
+ muteWhenSilent: config.muteWhenSilent ?? false
478
258
  };
479
- console.log("Audio pipeline config:", {
480
- noiseSuppression: fullConfig.noiseSuppression?.enabled,
481
- vad: fullConfig.vad?.enabled,
482
- output: fullConfig.output
483
- });
484
259
  if (!sourceTrack || sourceTrack.kind !== "audio") {
485
260
  throw new Error(
486
261
  "createAudioPipeline requires a valid audio MediaStreamTrack"
@@ -491,232 +266,122 @@ async function createAudioPipeline(sourceTrack, config = {}) {
491
266
  }
492
267
  const sourceStream = new MediaStream([sourceTrack]);
493
268
  const sourceNode = context.createMediaStreamSource(sourceStream);
494
- let nsNode;
495
- let vadNode;
496
269
  const emitter = (0, import_mitt.default)();
497
- try {
498
- const nsPlugin = getNoiseSuppressionPlugin(
499
- fullConfig.noiseSuppression?.pluginName
500
- );
501
- nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
502
- } catch (error) {
503
- const err = error instanceof Error ? error : new Error(String(error));
504
- console.error("Failed to create noise suppression node:", err);
505
- emitter.emit("error", err);
506
- throw err;
507
- }
508
- const vadStateMachine = new VADStateMachine(fullConfig.vad);
509
- let vadPlugin;
510
- try {
511
- vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
512
- vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
513
- try {
514
- const timestamp = context.currentTime * 1e3;
515
- const newState = vadStateMachine.processFrame(prob, timestamp);
516
- if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
517
- vadPlugin.updateSpeakingState(newState.isSpeaking);
518
- }
519
- if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
520
- emitter.emit("vadChange", newState);
521
- lastVadState = newState;
522
- updateGain(newState);
523
- }
524
- } catch (vadError) {
525
- const err = vadError instanceof Error ? vadError : new Error(String(vadError));
526
- console.error("Error in VAD callback:", err);
527
- emitter.emit("error", err);
270
+ const vad = new LevelBasedVAD(fullConfig.speaking);
271
+ let lastState = { speaking: false, levelDb: -Infinity };
272
+ const nsHandle = await createDeepFilterNet3Node(
273
+ context,
274
+ fullConfig.noiseSuppression
275
+ );
276
+ const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
277
+ try {
278
+ const timestamp = context.currentTime * 1e3;
279
+ const nextState = vad.process(levelDb, timestamp);
280
+ const speakingChanged = nextState.speaking !== lastState.speaking;
281
+ const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
282
+ if (speakingChanged || levelChanged) {
283
+ lastState = nextState;
284
+ updateGain(nextState);
285
+ emitter.emit("speakingChange", nextState);
528
286
  }
529
- });
530
- } catch (error) {
531
- const err = error instanceof Error ? error : new Error(String(error));
532
- console.error("Failed to create VAD node:", err);
533
- emitter.emit("error", err);
534
- throw err;
535
- }
536
- let lastVadState = {
537
- isSpeaking: false,
538
- probability: 0,
539
- state: "silent"
540
- };
287
+ } catch (error) {
288
+ const err = error instanceof Error ? error : new Error(String(error));
289
+ emitter.emit("error", err);
290
+ }
291
+ });
541
292
  const splitter = context.createGain();
542
- sourceNode.connect(nsNode);
543
- nsNode.connect(splitter);
544
- splitter.connect(vadNode);
545
- const delayNode = context.createDelay(1);
546
- const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
547
- delayNode.delayTime.value = preRollSeconds;
293
+ sourceNode.connect(nsHandle.node);
294
+ nsHandle.node.connect(splitter);
295
+ splitter.connect(levelHandle.node);
548
296
  const gainNode = context.createGain();
549
297
  gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
550
- let compressor = null;
551
- if (fullConfig.output?.enableCompression) {
552
- compressor = context.createDynamicsCompressor();
553
- const comp = fullConfig.output.compression;
554
- compressor.threshold.value = comp.threshold ?? -24;
555
- compressor.ratio.value = comp.ratio ?? 3;
556
- compressor.attack.value = comp.attack ?? 3e-3;
557
- compressor.release.value = comp.release ?? 0.05;
558
- compressor.knee.value = 10;
559
- }
298
+ splitter.connect(gainNode);
560
299
  const destination = context.createMediaStreamDestination();
561
- try {
562
- splitter.connect(delayNode);
563
- delayNode.connect(gainNode);
564
- if (compressor) {
565
- gainNode.connect(compressor);
566
- compressor.connect(destination);
567
- console.log("Compression enabled:", fullConfig.output?.compression);
568
- } else {
569
- gainNode.connect(destination);
570
- }
571
- } catch (error) {
572
- const err = error instanceof Error ? error : new Error(String(error));
573
- console.error("Failed to wire audio pipeline:", err);
574
- emitter.emit("error", err);
575
- throw err;
576
- }
300
+ gainNode.connect(destination);
577
301
  function updateGain(state) {
578
- try {
579
- const {
580
- speechGain = 1,
581
- silenceGain = 0,
582
- gainRampTime = 0.015,
583
- smoothTransitions = true,
584
- maxGainDb = 6
585
- } = fullConfig.output;
586
- const maxGainLinear = Math.pow(10, maxGainDb / 20);
587
- const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
588
- const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
589
- const now = context.currentTime;
590
- if (smoothTransitions) {
591
- gainNode.gain.cancelScheduledValues(now);
592
- gainNode.gain.setValueAtTime(gainNode.gain.value, now);
593
- gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
594
- } else {
595
- gainNode.gain.setValueAtTime(targetGain, now);
596
- }
597
- } catch (error) {
598
- const err = error instanceof Error ? error : new Error(String(error));
599
- console.error("Failed to update gain:", err);
600
- emitter.emit("error", err);
302
+ const {
303
+ speechGain = 1,
304
+ silenceGain = 0,
305
+ gainRampTime = 0.015,
306
+ smoothTransitions = true,
307
+ maxGainDb = 6
308
+ } = fullConfig.output ?? {};
309
+ const maxGainLinear = Math.pow(10, maxGainDb / 20);
310
+ const limitedSpeechGain = Math.min(speechGain ?? 1, maxGainLinear);
311
+ const target = state.speaking ? limitedSpeechGain : silenceGain ?? 0;
312
+ const now = context.currentTime;
313
+ gainNode.gain.cancelScheduledValues(now);
314
+ gainNode.gain.setValueAtTime(gainNode.gain.value, now);
315
+ if (smoothTransitions) {
316
+ gainNode.gain.setTargetAtTime(target, now, gainRampTime / 3);
317
+ } else {
318
+ gainNode.gain.setValueAtTime(target, now);
601
319
  }
602
320
  }
603
321
  const audioTracks = destination.stream.getAudioTracks();
604
- console.log("Destination stream tracks:", {
605
- count: audioTracks.length,
606
- tracks: audioTracks.map((t) => ({
607
- id: t.id,
608
- label: t.label,
609
- enabled: t.enabled,
610
- readyState: t.readyState
611
- }))
612
- });
613
322
  if (audioTracks.length === 0) {
614
- const err = new Error(
615
- "Failed to create processed audio track: destination stream has no audio tracks. This may indicate an issue with the audio graph connection."
616
- );
617
- console.error(err);
618
- emitter.emit("error", err);
619
- throw err;
323
+ nsHandle.dispose();
324
+ levelHandle.dispose();
325
+ unregisterPipeline();
326
+ throw new Error("Failed to create processed audio track");
620
327
  }
621
328
  const processedTrack = audioTracks[0];
622
- if (!processedTrack || processedTrack.readyState === "ended") {
623
- const err = new Error("Processed audio track is invalid or ended");
624
- console.error(err);
625
- emitter.emit("error", err);
626
- throw err;
627
- }
628
- console.log("Audio pipeline created successfully:", {
629
- sourceTrack: {
630
- id: sourceTrack.id,
631
- label: sourceTrack.label,
632
- readyState: sourceTrack.readyState
633
- },
634
- processedTrack: {
635
- id: processedTrack.id,
636
- label: processedTrack.label,
637
- readyState: processedTrack.readyState
638
- },
639
- config: {
640
- noiseSuppression: fullConfig.noiseSuppression?.enabled,
641
- vad: fullConfig.vad?.enabled
642
- }
643
- });
644
329
  function dispose() {
645
330
  try {
646
331
  sourceNode.disconnect();
647
- nsNode.disconnect();
332
+ nsHandle.node.disconnect();
648
333
  splitter.disconnect();
649
- vadNode.disconnect();
650
- delayNode.disconnect();
334
+ levelHandle.node.disconnect();
651
335
  gainNode.disconnect();
652
- if (compressor) {
653
- compressor.disconnect();
654
- }
655
336
  destination.stream.getTracks().forEach((t) => t.stop());
656
- unregisterPipeline();
337
+ levelHandle.dispose();
338
+ nsHandle.dispose();
657
339
  } catch (error) {
658
- console.error("Error during pipeline disposal:", error);
340
+ console.error("Error during pipeline disposal", error);
341
+ } finally {
342
+ unregisterPipeline();
659
343
  }
660
344
  }
661
- return {
345
+ const handle = {
662
346
  processedTrack,
663
347
  events: emitter,
664
348
  get state() {
665
- return lastVadState;
349
+ return lastState;
666
350
  },
667
- setConfig: (newConfig) => {
351
+ setConfig: (next) => {
668
352
  try {
669
- if (newConfig.vad) {
670
- vadStateMachine.updateConfig(newConfig.vad);
671
- Object.assign(fullConfig.vad, newConfig.vad);
672
- if (newConfig.vad.preRollMs !== void 0) {
673
- const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
674
- delayNode.delayTime.setValueAtTime(
675
- preRollSeconds2,
676
- context.currentTime
677
- );
678
- }
353
+ if (next.speaking) {
354
+ vad.updateConfig(next.speaking);
355
+ fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
356
+ }
357
+ if (next.output) {
358
+ fullConfig.output = { ...fullConfig.output, ...next.output };
359
+ updateGain(lastState);
679
360
  }
680
- if (newConfig.output) {
681
- Object.assign(fullConfig.output, newConfig.output);
682
- updateGain(lastVadState);
683
- if (compressor && newConfig.output.compression) {
684
- const comp = newConfig.output.compression;
685
- if (comp.threshold !== void 0) {
686
- compressor.threshold.setValueAtTime(
687
- comp.threshold,
688
- context.currentTime
689
- );
690
- }
691
- if (comp.ratio !== void 0) {
692
- compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
693
- }
694
- if (comp.attack !== void 0) {
695
- compressor.attack.setValueAtTime(
696
- comp.attack,
697
- context.currentTime
698
- );
699
- }
700
- if (comp.release !== void 0) {
701
- compressor.release.setValueAtTime(
702
- comp.release,
703
- context.currentTime
704
- );
705
- }
361
+ if (next.noiseSuppression) {
362
+ const ns = next.noiseSuppression;
363
+ fullConfig.noiseSuppression = {
364
+ ...fullConfig.noiseSuppression,
365
+ ...ns
366
+ };
367
+ if (typeof ns.noiseReductionLevel === "number") {
368
+ nsHandle.processor.setSuppressionLevel(ns.noiseReductionLevel);
369
+ }
370
+ if (typeof ns.enabled === "boolean") {
371
+ nsHandle.processor.setNoiseSuppressionEnabled(ns.enabled);
706
372
  }
707
373
  }
708
- if (newConfig.livekit) {
709
- Object.assign(fullConfig.livekit, newConfig.livekit);
374
+ if (typeof next.muteWhenSilent === "boolean") {
375
+ fullConfig.muteWhenSilent = next.muteWhenSilent;
710
376
  }
711
- console.log("Pipeline config updated:", newConfig);
712
377
  } catch (error) {
713
378
  const err = error instanceof Error ? error : new Error(String(error));
714
- console.error("Failed to update config:", err);
715
379
  emitter.emit("error", err);
716
380
  }
717
381
  },
718
382
  dispose
719
383
  };
384
+ return handle;
720
385
  }
721
386
  // Annotate the CommonJS export names for ESM import in node:
722
387
  0 && (module.exports = {