@tensamin/audio 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -123,13 +123,25 @@ To disable noise suppression, set noiseSuppression.enabled to false.`
123
123
  };
124
124
 
125
125
  // src/vad/vad-node.ts
126
- var energyVadWorkletCode = `
126
+ var createEnergyVadWorkletCode = (vadConfig) => {
127
+ const energyParams = vadConfig?.energyVad || {};
128
+ const smoothing = energyParams.smoothing ?? 0.95;
129
+ const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
130
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
131
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
132
+ const minSNR = energyParams.minSNR ?? 2;
133
+ const snrRange = energyParams.snrRange ?? 8;
134
+ return `
127
135
  class EnergyVadProcessor extends AudioWorkletProcessor {
128
136
  constructor() {
129
137
  super();
130
- this.smoothing = 0.95;
138
+ this.smoothing = ${smoothing};
131
139
  this.energy = 0;
132
- this.noiseFloor = 0.001;
140
+ this.noiseFloor = ${initialNoiseFloor};
141
+ this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
142
+ this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
143
+ this.minSNR = ${minSNR};
144
+ this.snrRange = ${snrRange};
133
145
  }
134
146
 
135
147
  process(inputs, outputs, parameters) {
@@ -137,32 +149,39 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
137
149
  if (!input || !input.length) return true;
138
150
  const channel = input[0];
139
151
 
140
- // Calculate RMS
152
+ // Calculate RMS (Root Mean Square) energy
141
153
  let sum = 0;
142
154
  for (let i = 0; i < channel.length; i++) {
143
155
  sum += channel[i] * channel[i];
144
156
  }
145
157
  const rms = Math.sqrt(sum / channel.length);
146
158
 
147
- // Simple adaptive noise floor (very basic)
159
+ // Adaptive noise floor estimation
160
+ // When signal is quiet, adapt quickly to find new noise floor
161
+ // When signal is loud (speech), adapt slowly to avoid raising noise floor
148
162
  if (rms < this.noiseFloor) {
149
- this.noiseFloor = this.noiseFloor * 0.99 + rms * 0.01;
163
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
150
164
  } else {
151
- this.noiseFloor = this.noiseFloor * 0.999 + rms * 0.001;
165
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
152
166
  }
153
167
 
154
- // Calculate "probability" based on SNR
155
- // This is a heuristic mapping from energy to 0-1
168
+ // Calculate Signal-to-Noise Ratio (SNR)
156
169
  const snr = rms / (this.noiseFloor + 1e-6);
157
- const probability = Math.min(1, Math.max(0, (snr - 1.5) / 10)); // Arbitrary scaling
170
+
171
+ // Map SNR to probability (0-1)
172
+ // Probability is 0 when SNR <= minSNR
173
+ // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
174
+ // Probability is 1 when SNR >= (minSNR + snrRange)
175
+ const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
158
176
 
159
- this.port.postMessage({ probability });
177
+ this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
160
178
 
161
179
  return true;
162
180
  }
163
181
  }
164
182
  registerProcessor('energy-vad-processor', EnergyVadProcessor);
165
183
  `;
184
+ };
166
185
  var EnergyVADPlugin = class {
167
186
  name = "energy-vad";
168
187
  async createNode(context, config, onDecision) {
@@ -171,7 +190,8 @@ var EnergyVADPlugin = class {
171
190
  const pass = context.createGain();
172
191
  return pass;
173
192
  }
174
- const blob = new Blob([energyVadWorkletCode], {
193
+ const workletCode = createEnergyVadWorkletCode(config);
194
+ const blob = new Blob([workletCode], {
175
195
  type: "application/javascript"
176
196
  });
177
197
  const url = URL.createObjectURL(blob);
@@ -251,31 +271,60 @@ var VADStateMachine = class {
251
271
  currentState = "silent";
252
272
  lastSpeechTime = 0;
253
273
  speechStartTime = 0;
274
+ lastSilenceTime = 0;
254
275
  frameDurationMs = 20;
255
276
  // Assumed frame duration, updated by calls
256
277
  constructor(config) {
257
278
  this.config = {
258
279
  enabled: config?.enabled ?? true,
259
280
  pluginName: config?.pluginName ?? "energy-vad",
260
- startThreshold: config?.startThreshold ?? 0.5,
261
- stopThreshold: config?.stopThreshold ?? 0.4,
262
- hangoverMs: config?.hangoverMs ?? 300,
263
- preRollMs: config?.preRollMs ?? 200
281
+ // Voice-optimized defaults
282
+ startThreshold: config?.startThreshold ?? 0.6,
283
+ // Higher threshold to avoid noise
284
+ stopThreshold: config?.stopThreshold ?? 0.45,
285
+ // Balanced for voice
286
+ hangoverMs: config?.hangoverMs ?? 400,
287
+ // Smooth for natural speech
288
+ preRollMs: config?.preRollMs ?? 250,
289
+ // Generous pre-roll
290
+ minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
291
+ minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
292
+ energyVad: {
293
+ smoothing: config?.energyVad?.smoothing ?? 0.95,
294
+ initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
295
+ noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
296
+ noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
297
+ minSNR: config?.energyVad?.minSNR ?? 2,
298
+ snrRange: config?.energyVad?.snrRange ?? 8
299
+ }
264
300
  };
301
+ this.lastSilenceTime = Date.now();
265
302
  }
266
303
  updateConfig(config) {
267
304
  this.config = { ...this.config, ...config };
268
305
  }
269
306
  processFrame(probability, timestamp) {
270
- const { startThreshold, stopThreshold, hangoverMs } = this.config;
307
+ const {
308
+ startThreshold,
309
+ stopThreshold,
310
+ hangoverMs,
311
+ minSpeechDurationMs,
312
+ minSilenceDurationMs
313
+ } = this.config;
271
314
  let newState = this.currentState;
272
315
  if (this.currentState === "silent" || this.currentState === "speech_ending") {
273
316
  if (probability >= startThreshold) {
274
- newState = "speech_starting";
275
- this.speechStartTime = timestamp;
276
- this.lastSpeechTime = timestamp;
317
+ const silenceDuration = timestamp - this.lastSilenceTime;
318
+ if (silenceDuration >= minSilenceDurationMs) {
319
+ newState = "speech_starting";
320
+ this.speechStartTime = timestamp;
321
+ this.lastSpeechTime = timestamp;
322
+ } else {
323
+ newState = "silent";
324
+ }
277
325
  } else {
278
326
  newState = "silent";
327
+ this.lastSilenceTime = timestamp;
279
328
  }
280
329
  } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
281
330
  if (probability >= stopThreshold) {
@@ -283,10 +332,15 @@ var VADStateMachine = class {
283
332
  this.lastSpeechTime = timestamp;
284
333
  } else {
285
334
  const timeSinceSpeech = timestamp - this.lastSpeechTime;
335
+ const speechDuration = timestamp - this.speechStartTime;
286
336
  if (timeSinceSpeech < hangoverMs) {
287
337
  newState = "speaking";
338
+ } else if (speechDuration < minSpeechDurationMs) {
339
+ newState = "silent";
340
+ this.lastSilenceTime = timestamp;
288
341
  } else {
289
342
  newState = "speech_ending";
343
+ this.lastSilenceTime = timestamp;
290
344
  }
291
345
  }
292
346
  }
@@ -305,7 +359,9 @@ var VADStateMachine = class {
305
359
  async function createAudioPipeline(sourceTrack, config = {}) {
306
360
  const context = getAudioContext();
307
361
  registerPipeline();
308
- const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl);
362
+ const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
363
+ config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
364
+ );
309
365
  const vadEnabled = config.vad?.enabled !== false;
310
366
  const fullConfig = {
311
367
  noiseSuppression: {
@@ -314,13 +370,38 @@ async function createAudioPipeline(sourceTrack, config = {}) {
314
370
  },
315
371
  vad: {
316
372
  enabled: vadEnabled,
373
+ // Voice-optimized defaults (will be overridden by config)
374
+ startThreshold: 0.6,
375
+ stopThreshold: 0.45,
376
+ hangoverMs: 400,
377
+ preRollMs: 250,
378
+ minSpeechDurationMs: 100,
379
+ minSilenceDurationMs: 150,
380
+ energyVad: {
381
+ smoothing: 0.95,
382
+ initialNoiseFloor: 1e-3,
383
+ noiseFloorAdaptRateQuiet: 0.01,
384
+ noiseFloorAdaptRateLoud: 1e-3,
385
+ minSNR: 2,
386
+ snrRange: 8
387
+ },
317
388
  ...config.vad
318
389
  },
319
390
  output: {
320
391
  speechGain: 1,
321
- silenceGain: vadEnabled ? 0 : 1,
322
- // If no VAD, always output audio
323
- gainRampTime: 0.02,
392
+ silenceGain: 0,
393
+ // Full mute for voice-only
394
+ gainRampTime: 0.015,
395
+ // Fast but smooth transitions
396
+ smoothTransitions: true,
397
+ maxGainDb: 6,
398
+ enableCompression: false,
399
+ compression: {
400
+ threshold: -24,
401
+ ratio: 3,
402
+ attack: 3e-3,
403
+ release: 0.05
404
+ },
324
405
  ...config.output
325
406
  },
326
407
  livekit: { manageTrackMute: false, ...config.livekit }
@@ -331,7 +412,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
331
412
  output: fullConfig.output
332
413
  });
333
414
  if (!sourceTrack || sourceTrack.kind !== "audio") {
334
- throw new Error("createAudioPipeline requires a valid audio MediaStreamTrack");
415
+ throw new Error(
416
+ "createAudioPipeline requires a valid audio MediaStreamTrack"
417
+ );
335
418
  }
336
419
  if (sourceTrack.readyState === "ended") {
337
420
  throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
@@ -345,10 +428,7 @@ async function createAudioPipeline(sourceTrack, config = {}) {
345
428
  const nsPlugin = getNoiseSuppressionPlugin(
346
429
  fullConfig.noiseSuppression?.pluginName
347
430
  );
348
- nsNode = await nsPlugin.createNode(
349
- context,
350
- fullConfig.noiseSuppression
351
- );
431
+ nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
352
432
  } catch (error) {
353
433
  const err = error instanceof Error ? error : new Error(String(error));
354
434
  console.error("Failed to create noise suppression node:", err);
@@ -358,25 +438,21 @@ async function createAudioPipeline(sourceTrack, config = {}) {
358
438
  const vadStateMachine = new VADStateMachine(fullConfig.vad);
359
439
  try {
360
440
  const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
361
- vadNode = await vadPlugin.createNode(
362
- context,
363
- fullConfig.vad,
364
- (prob) => {
365
- try {
366
- const timestamp = context.currentTime * 1e3;
367
- const newState = vadStateMachine.processFrame(prob, timestamp);
368
- if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
369
- emitter.emit("vadChange", newState);
370
- lastVadState = newState;
371
- updateGain(newState);
372
- }
373
- } catch (vadError) {
374
- const err = vadError instanceof Error ? vadError : new Error(String(vadError));
375
- console.error("Error in VAD callback:", err);
376
- emitter.emit("error", err);
441
+ vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
442
+ try {
443
+ const timestamp = context.currentTime * 1e3;
444
+ const newState = vadStateMachine.processFrame(prob, timestamp);
445
+ if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
446
+ emitter.emit("vadChange", newState);
447
+ lastVadState = newState;
448
+ updateGain(newState);
377
449
  }
450
+ } catch (vadError) {
451
+ const err = vadError instanceof Error ? vadError : new Error(String(vadError));
452
+ console.error("Error in VAD callback:", err);
453
+ emitter.emit("error", err);
378
454
  }
379
- );
455
+ });
380
456
  } catch (error) {
381
457
  const err = error instanceof Error ? error : new Error(String(error));
382
458
  console.error("Failed to create VAD node:", err);
@@ -393,15 +469,31 @@ async function createAudioPipeline(sourceTrack, config = {}) {
393
469
  nsNode.connect(splitter);
394
470
  splitter.connect(vadNode);
395
471
  const delayNode = context.createDelay(1);
396
- const preRollSeconds = (fullConfig.vad?.preRollMs ?? 200) / 1e3;
472
+ const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
397
473
  delayNode.delayTime.value = preRollSeconds;
398
474
  const gainNode = context.createGain();
399
475
  gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
476
+ let compressor = null;
477
+ if (fullConfig.output?.enableCompression) {
478
+ compressor = context.createDynamicsCompressor();
479
+ const comp = fullConfig.output.compression;
480
+ compressor.threshold.value = comp.threshold ?? -24;
481
+ compressor.ratio.value = comp.ratio ?? 3;
482
+ compressor.attack.value = comp.attack ?? 3e-3;
483
+ compressor.release.value = comp.release ?? 0.05;
484
+ compressor.knee.value = 10;
485
+ }
400
486
  const destination = context.createMediaStreamDestination();
401
487
  try {
402
488
  splitter.connect(delayNode);
403
489
  delayNode.connect(gainNode);
404
- gainNode.connect(destination);
490
+ if (compressor) {
491
+ gainNode.connect(compressor);
492
+ compressor.connect(destination);
493
+ console.log("Compression enabled:", fullConfig.output?.compression);
494
+ } else {
495
+ gainNode.connect(destination);
496
+ }
405
497
  } catch (error) {
406
498
  const err = error instanceof Error ? error : new Error(String(error));
407
499
  console.error("Failed to wire audio pipeline:", err);
@@ -410,10 +502,24 @@ async function createAudioPipeline(sourceTrack, config = {}) {
410
502
  }
411
503
  function updateGain(state) {
412
504
  try {
413
- const { speechGain, silenceGain, gainRampTime } = fullConfig.output;
414
- const targetGain = state.isSpeaking ? speechGain ?? 1 : silenceGain ?? 0;
505
+ const {
506
+ speechGain = 1,
507
+ silenceGain = 0,
508
+ gainRampTime = 0.015,
509
+ smoothTransitions = true,
510
+ maxGainDb = 6
511
+ } = fullConfig.output;
512
+ const maxGainLinear = Math.pow(10, maxGainDb / 20);
513
+ const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
514
+ const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
415
515
  const now = context.currentTime;
416
- gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime ?? 0.02);
516
+ if (smoothTransitions) {
517
+ gainNode.gain.cancelScheduledValues(now);
518
+ gainNode.gain.setValueAtTime(gainNode.gain.value, now);
519
+ gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
520
+ } else {
521
+ gainNode.gain.setValueAtTime(targetGain, now);
522
+ }
417
523
  } catch (error) {
418
524
  const err = error instanceof Error ? error : new Error(String(error));
419
525
  console.error("Failed to update gain:", err);
@@ -469,6 +575,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
469
575
  vadNode.disconnect();
470
576
  delayNode.disconnect();
471
577
  gainNode.disconnect();
578
+ if (compressor) {
579
+ compressor.disconnect();
580
+ }
472
581
  destination.stream.getTracks().forEach((t) => t.stop());
473
582
  unregisterPipeline();
474
583
  } catch (error) {
@@ -485,7 +594,47 @@ async function createAudioPipeline(sourceTrack, config = {}) {
485
594
  try {
486
595
  if (newConfig.vad) {
487
596
  vadStateMachine.updateConfig(newConfig.vad);
597
+ Object.assign(fullConfig.vad, newConfig.vad);
598
+ if (newConfig.vad.preRollMs !== void 0) {
599
+ const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
600
+ delayNode.delayTime.setValueAtTime(
601
+ preRollSeconds2,
602
+ context.currentTime
603
+ );
604
+ }
605
+ }
606
+ if (newConfig.output) {
607
+ Object.assign(fullConfig.output, newConfig.output);
608
+ updateGain(lastVadState);
609
+ if (compressor && newConfig.output.compression) {
610
+ const comp = newConfig.output.compression;
611
+ if (comp.threshold !== void 0) {
612
+ compressor.threshold.setValueAtTime(
613
+ comp.threshold,
614
+ context.currentTime
615
+ );
616
+ }
617
+ if (comp.ratio !== void 0) {
618
+ compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
619
+ }
620
+ if (comp.attack !== void 0) {
621
+ compressor.attack.setValueAtTime(
622
+ comp.attack,
623
+ context.currentTime
624
+ );
625
+ }
626
+ if (comp.release !== void 0) {
627
+ compressor.release.setValueAtTime(
628
+ comp.release,
629
+ context.currentTime
630
+ );
631
+ }
632
+ }
633
+ }
634
+ if (newConfig.livekit) {
635
+ Object.assign(fullConfig.livekit, newConfig.livekit);
488
636
  }
637
+ console.log("Pipeline config updated:", newConfig);
489
638
  } catch (error) {
490
639
  const err = error instanceof Error ? error : new Error(String(error));
491
640
  console.error("Failed to update config:", err);
@@ -1,12 +1,12 @@
1
1
  import {
2
2
  attachProcessingToTrack
3
- } from "../chunk-XMTQPMQ6.mjs";
4
- import "../chunk-EXH2PNUE.mjs";
5
- import "../chunk-JJASCVEW.mjs";
3
+ } from "../chunk-ERJVV5JR.mjs";
4
+ import "../chunk-AHBRT4RD.mjs";
5
+ import "../chunk-N553RHTI.mjs";
6
6
  import "../chunk-OZ7KMC4S.mjs";
7
- import "../chunk-6P2RDBW5.mjs";
7
+ import "../chunk-YOSTLLCS.mjs";
8
8
  import "../chunk-XO6B3D4A.mjs";
9
- import "../chunk-R5JVHKWA.mjs";
9
+ import "../chunk-NMHKX64G.mjs";
10
10
  export {
11
11
  attachProcessingToTrack
12
12
  };