@tensamin/audio 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  VADStateMachine
3
- } from "./chunk-JJASCVEW.mjs";
3
+ } from "./chunk-N553RHTI.mjs";
4
4
  import {
5
5
  getAudioContext,
6
6
  registerPipeline,
@@ -9,14 +9,16 @@ import {
9
9
  import {
10
10
  getNoiseSuppressionPlugin,
11
11
  getVADPlugin
12
- } from "./chunk-6P2RDBW5.mjs";
12
+ } from "./chunk-H5UKZU2Y.mjs";
13
13
 
14
14
  // src/pipeline/audio-pipeline.ts
15
15
  import mitt from "mitt";
16
16
  async function createAudioPipeline(sourceTrack, config = {}) {
17
17
  const context = getAudioContext();
18
18
  registerPipeline();
19
- const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl);
19
+ const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
20
+ config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
21
+ );
20
22
  const vadEnabled = config.vad?.enabled !== false;
21
23
  const fullConfig = {
22
24
  noiseSuppression: {
@@ -25,13 +27,38 @@ async function createAudioPipeline(sourceTrack, config = {}) {
25
27
  },
26
28
  vad: {
27
29
  enabled: vadEnabled,
30
+ // Voice-optimized defaults (will be overridden by config)
31
+ startThreshold: 0.6,
32
+ stopThreshold: 0.45,
33
+ hangoverMs: 400,
34
+ preRollMs: 250,
35
+ minSpeechDurationMs: 100,
36
+ minSilenceDurationMs: 150,
37
+ energyVad: {
38
+ smoothing: 0.95,
39
+ initialNoiseFloor: 1e-3,
40
+ noiseFloorAdaptRateQuiet: 0.01,
41
+ noiseFloorAdaptRateLoud: 1e-3,
42
+ minSNR: 2,
43
+ snrRange: 8
44
+ },
28
45
  ...config.vad
29
46
  },
30
47
  output: {
31
48
  speechGain: 1,
32
- silenceGain: vadEnabled ? 0 : 1,
33
- // If no VAD, always output audio
34
- gainRampTime: 0.02,
49
+ silenceGain: 0,
50
+ // Full mute for voice-only
51
+ gainRampTime: 0.015,
52
+ // Fast but smooth transitions
53
+ smoothTransitions: true,
54
+ maxGainDb: 6,
55
+ enableCompression: false,
56
+ compression: {
57
+ threshold: -24,
58
+ ratio: 3,
59
+ attack: 3e-3,
60
+ release: 0.05
61
+ },
35
62
  ...config.output
36
63
  },
37
64
  livekit: { manageTrackMute: false, ...config.livekit }
@@ -42,7 +69,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
42
69
  output: fullConfig.output
43
70
  });
44
71
  if (!sourceTrack || sourceTrack.kind !== "audio") {
45
- throw new Error("createAudioPipeline requires a valid audio MediaStreamTrack");
72
+ throw new Error(
73
+ "createAudioPipeline requires a valid audio MediaStreamTrack"
74
+ );
46
75
  }
47
76
  if (sourceTrack.readyState === "ended") {
48
77
  throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
@@ -56,10 +85,7 @@ async function createAudioPipeline(sourceTrack, config = {}) {
56
85
  const nsPlugin = getNoiseSuppressionPlugin(
57
86
  fullConfig.noiseSuppression?.pluginName
58
87
  );
59
- nsNode = await nsPlugin.createNode(
60
- context,
61
- fullConfig.noiseSuppression
62
- );
88
+ nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
63
89
  } catch (error) {
64
90
  const err = error instanceof Error ? error : new Error(String(error));
65
91
  console.error("Failed to create noise suppression node:", err);
@@ -67,27 +93,27 @@ async function createAudioPipeline(sourceTrack, config = {}) {
67
93
  throw err;
68
94
  }
69
95
  const vadStateMachine = new VADStateMachine(fullConfig.vad);
96
+ let vadPlugin;
70
97
  try {
71
- const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
72
- vadNode = await vadPlugin.createNode(
73
- context,
74
- fullConfig.vad,
75
- (prob) => {
76
- try {
77
- const timestamp = context.currentTime * 1e3;
78
- const newState = vadStateMachine.processFrame(prob, timestamp);
79
- if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
80
- emitter.emit("vadChange", newState);
81
- lastVadState = newState;
82
- updateGain(newState);
83
- }
84
- } catch (vadError) {
85
- const err = vadError instanceof Error ? vadError : new Error(String(vadError));
86
- console.error("Error in VAD callback:", err);
87
- emitter.emit("error", err);
98
+ vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
99
+ vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
100
+ try {
101
+ const timestamp = context.currentTime * 1e3;
102
+ const newState = vadStateMachine.processFrame(prob, timestamp);
103
+ if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
104
+ vadPlugin.updateSpeakingState(newState.isSpeaking);
88
105
  }
106
+ if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
107
+ emitter.emit("vadChange", newState);
108
+ lastVadState = newState;
109
+ updateGain(newState);
110
+ }
111
+ } catch (vadError) {
112
+ const err = vadError instanceof Error ? vadError : new Error(String(vadError));
113
+ console.error("Error in VAD callback:", err);
114
+ emitter.emit("error", err);
89
115
  }
90
- );
116
+ });
91
117
  } catch (error) {
92
118
  const err = error instanceof Error ? error : new Error(String(error));
93
119
  console.error("Failed to create VAD node:", err);
@@ -104,15 +130,31 @@ async function createAudioPipeline(sourceTrack, config = {}) {
104
130
  nsNode.connect(splitter);
105
131
  splitter.connect(vadNode);
106
132
  const delayNode = context.createDelay(1);
107
- const preRollSeconds = (fullConfig.vad?.preRollMs ?? 200) / 1e3;
133
+ const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
108
134
  delayNode.delayTime.value = preRollSeconds;
109
135
  const gainNode = context.createGain();
110
136
  gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
137
+ let compressor = null;
138
+ if (fullConfig.output?.enableCompression) {
139
+ compressor = context.createDynamicsCompressor();
140
+ const comp = fullConfig.output.compression;
141
+ compressor.threshold.value = comp.threshold ?? -24;
142
+ compressor.ratio.value = comp.ratio ?? 3;
143
+ compressor.attack.value = comp.attack ?? 3e-3;
144
+ compressor.release.value = comp.release ?? 0.05;
145
+ compressor.knee.value = 10;
146
+ }
111
147
  const destination = context.createMediaStreamDestination();
112
148
  try {
113
149
  splitter.connect(delayNode);
114
150
  delayNode.connect(gainNode);
115
- gainNode.connect(destination);
151
+ if (compressor) {
152
+ gainNode.connect(compressor);
153
+ compressor.connect(destination);
154
+ console.log("Compression enabled:", fullConfig.output?.compression);
155
+ } else {
156
+ gainNode.connect(destination);
157
+ }
116
158
  } catch (error) {
117
159
  const err = error instanceof Error ? error : new Error(String(error));
118
160
  console.error("Failed to wire audio pipeline:", err);
@@ -121,10 +163,24 @@ async function createAudioPipeline(sourceTrack, config = {}) {
121
163
  }
122
164
  function updateGain(state) {
123
165
  try {
124
- const { speechGain, silenceGain, gainRampTime } = fullConfig.output;
125
- const targetGain = state.isSpeaking ? speechGain ?? 1 : silenceGain ?? 0;
166
+ const {
167
+ speechGain = 1,
168
+ silenceGain = 0,
169
+ gainRampTime = 0.015,
170
+ smoothTransitions = true,
171
+ maxGainDb = 6
172
+ } = fullConfig.output;
173
+ const maxGainLinear = Math.pow(10, maxGainDb / 20);
174
+ const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
175
+ const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
126
176
  const now = context.currentTime;
127
- gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime ?? 0.02);
177
+ if (smoothTransitions) {
178
+ gainNode.gain.cancelScheduledValues(now);
179
+ gainNode.gain.setValueAtTime(gainNode.gain.value, now);
180
+ gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
181
+ } else {
182
+ gainNode.gain.setValueAtTime(targetGain, now);
183
+ }
128
184
  } catch (error) {
129
185
  const err = error instanceof Error ? error : new Error(String(error));
130
186
  console.error("Failed to update gain:", err);
@@ -180,6 +236,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
180
236
  vadNode.disconnect();
181
237
  delayNode.disconnect();
182
238
  gainNode.disconnect();
239
+ if (compressor) {
240
+ compressor.disconnect();
241
+ }
183
242
  destination.stream.getTracks().forEach((t) => t.stop());
184
243
  unregisterPipeline();
185
244
  } catch (error) {
@@ -196,7 +255,47 @@ async function createAudioPipeline(sourceTrack, config = {}) {
196
255
  try {
197
256
  if (newConfig.vad) {
198
257
  vadStateMachine.updateConfig(newConfig.vad);
258
+ Object.assign(fullConfig.vad, newConfig.vad);
259
+ if (newConfig.vad.preRollMs !== void 0) {
260
+ const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
261
+ delayNode.delayTime.setValueAtTime(
262
+ preRollSeconds2,
263
+ context.currentTime
264
+ );
265
+ }
266
+ }
267
+ if (newConfig.output) {
268
+ Object.assign(fullConfig.output, newConfig.output);
269
+ updateGain(lastVadState);
270
+ if (compressor && newConfig.output.compression) {
271
+ const comp = newConfig.output.compression;
272
+ if (comp.threshold !== void 0) {
273
+ compressor.threshold.setValueAtTime(
274
+ comp.threshold,
275
+ context.currentTime
276
+ );
277
+ }
278
+ if (comp.ratio !== void 0) {
279
+ compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
280
+ }
281
+ if (comp.attack !== void 0) {
282
+ compressor.attack.setValueAtTime(
283
+ comp.attack,
284
+ context.currentTime
285
+ );
286
+ }
287
+ if (comp.release !== void 0) {
288
+ compressor.release.setValueAtTime(
289
+ comp.release,
290
+ context.currentTime
291
+ );
292
+ }
293
+ }
294
+ }
295
+ if (newConfig.livekit) {
296
+ Object.assign(fullConfig.livekit, newConfig.livekit);
199
297
  }
298
+ console.log("Pipeline config updated:", newConfig);
200
299
  } catch (error) {
201
300
  const err = error instanceof Error ? error : new Error(String(error));
202
301
  console.error("Failed to update config:", err);
@@ -102,13 +102,32 @@ To disable noise suppression, set noiseSuppression.enabled to false.`
102
102
  };
103
103
 
104
104
  // src/vad/vad-node.ts
105
- var energyVadWorkletCode = `
105
+ var createEnergyVadWorkletCode = (vadConfig) => {
106
+ const energyParams = vadConfig?.energyVad || {};
107
+ const smoothing = energyParams.smoothing ?? 0.95;
108
+ const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
109
+ const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
110
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
111
+ const minSNR = energyParams.minSNR ?? 2;
112
+ const snrRange = energyParams.snrRange ?? 8;
113
+ return `
106
114
  class EnergyVadProcessor extends AudioWorkletProcessor {
107
115
  constructor() {
108
116
  super();
109
- this.smoothing = 0.95;
117
+ this.smoothing = ${smoothing};
110
118
  this.energy = 0;
111
- this.noiseFloor = 0.001;
119
+ this.noiseFloor = ${initialNoiseFloor};
120
+ this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
121
+ this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
122
+ this.minSNR = ${minSNR};
123
+ this.snrRange = ${snrRange};
124
+ this.isSpeaking = false;
125
+
126
+ this.port.onmessage = (event) => {
127
+ if (event.data && event.data.isSpeaking !== undefined) {
128
+ this.isSpeaking = event.data.isSpeaking;
129
+ }
130
+ };
112
131
  }
113
132
 
114
133
  process(inputs, outputs, parameters) {
@@ -116,41 +135,54 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
116
135
  if (!input || !input.length) return true;
117
136
  const channel = input[0];
118
137
 
119
- // Calculate RMS
138
+ // Calculate RMS (Root Mean Square) energy
120
139
  let sum = 0;
121
140
  for (let i = 0; i < channel.length; i++) {
122
141
  sum += channel[i] * channel[i];
123
142
  }
124
143
  const rms = Math.sqrt(sum / channel.length);
125
144
 
126
- // Simple adaptive noise floor (very basic)
127
- if (rms < this.noiseFloor) {
128
- this.noiseFloor = this.noiseFloor * 0.99 + rms * 0.01;
129
- } else {
130
- this.noiseFloor = this.noiseFloor * 0.999 + rms * 0.001;
145
+ // Adaptive noise floor estimation - ONLY during silence
146
+ // This prevents the noise floor from rising during speech
147
+ if (!this.isSpeaking) {
148
+ if (rms < this.noiseFloor) {
149
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
150
+ } else {
151
+ // Even during silence, if we detect a loud signal, adapt very slowly
152
+ // This could be brief noise we haven't classified as speech yet
153
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
154
+ }
131
155
  }
156
+ // During speech, freeze the noise floor to maintain consistent detection
132
157
 
133
- // Calculate "probability" based on SNR
134
- // This is a heuristic mapping from energy to 0-1
158
+ // Calculate Signal-to-Noise Ratio (SNR)
135
159
  const snr = rms / (this.noiseFloor + 1e-6);
136
- const probability = Math.min(1, Math.max(0, (snr - 1.5) / 10)); // Arbitrary scaling
160
+
161
+ // Map SNR to probability (0-1)
162
+ // Probability is 0 when SNR <= minSNR
163
+ // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
164
+ // Probability is 1 when SNR >= (minSNR + snrRange)
165
+ const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
137
166
 
138
- this.port.postMessage({ probability });
167
+ this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
139
168
 
140
169
  return true;
141
170
  }
142
171
  }
143
172
  registerProcessor('energy-vad-processor', EnergyVadProcessor);
144
173
  `;
174
+ };
145
175
  var EnergyVADPlugin = class {
146
176
  name = "energy-vad";
177
+ workletNode = null;
147
178
  async createNode(context, config, onDecision) {
148
179
  if (!config?.enabled) {
149
180
  console.log("VAD disabled, using passthrough node");
150
181
  const pass = context.createGain();
151
182
  return pass;
152
183
  }
153
- const blob = new Blob([energyVadWorkletCode], {
184
+ const workletCode = createEnergyVadWorkletCode(config);
185
+ const blob = new Blob([workletCode], {
154
186
  type: "application/javascript"
155
187
  });
156
188
  const url = URL.createObjectURL(blob);
@@ -169,6 +201,7 @@ var EnergyVADPlugin = class {
169
201
  let node;
170
202
  try {
171
203
  node = new AudioWorkletNode(context, "energy-vad-processor");
204
+ this.workletNode = node;
172
205
  console.log("Energy VAD node created successfully");
173
206
  } catch (e) {
174
207
  const error = new Error(
@@ -194,6 +227,11 @@ var EnergyVADPlugin = class {
194
227
  };
195
228
  return node;
196
229
  }
230
+ updateSpeakingState(isSpeaking) {
231
+ if (this.workletNode) {
232
+ this.workletNode.port.postMessage({ isSpeaking });
233
+ }
234
+ }
197
235
  };
198
236
 
199
237
  // src/extensibility/plugins.ts
@@ -3,9 +3,9 @@ import {
3
3
  getVADPlugin,
4
4
  registerNoiseSuppressionPlugin,
5
5
  registerVADPlugin
6
- } from "../chunk-6P2RDBW5.mjs";
6
+ } from "../chunk-H5UKZU2Y.mjs";
7
7
  import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-R5JVHKWA.mjs";
8
+ import "../chunk-VEJXAEMM.mjs";
9
9
  export {
10
10
  getNoiseSuppressionPlugin,
11
11
  getVADPlugin,