@tensamin/audio 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ import {
9
9
  import {
10
10
  getNoiseSuppressionPlugin,
11
11
  getVADPlugin
12
- } from "./chunk-YOSTLLCS.mjs";
12
+ } from "./chunk-ZCC7ID7L.mjs";
13
13
 
14
14
  // src/pipeline/audio-pipeline.ts
15
15
  import mitt from "mitt";
@@ -93,12 +93,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
93
93
  throw err;
94
94
  }
95
95
  const vadStateMachine = new VADStateMachine(fullConfig.vad);
96
+ let vadPlugin;
96
97
  try {
97
- const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
98
+ vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
98
99
  vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
99
100
  try {
100
101
  const timestamp = context.currentTime * 1e3;
101
102
  const newState = vadStateMachine.processFrame(prob, timestamp);
103
+ if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
104
+ vadPlugin.updateSpeakingState(newState.isSpeaking);
105
+ }
102
106
  if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
103
107
  emitter.emit("vadChange", newState);
104
108
  lastVadState = newState;
@@ -4,7 +4,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
4
4
  const smoothing = energyParams.smoothing ?? 0.95;
5
5
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
6
6
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
7
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
7
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
8
8
  const minSNR = energyParams.minSNR ?? 2;
9
9
  const snrRange = energyParams.snrRange ?? 8;
10
10
  return `
@@ -18,6 +18,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
18
18
  this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
19
19
  this.minSNR = ${minSNR};
20
20
  this.snrRange = ${snrRange};
21
+ this.isSpeaking = false;
22
+
23
+ this.port.onmessage = (event) => {
24
+ if (event.data && event.data.isSpeaking !== undefined) {
25
+ this.isSpeaking = event.data.isSpeaking;
26
+ }
27
+ };
21
28
  }
22
29
 
23
30
  process(inputs, outputs, parameters) {
@@ -32,14 +39,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
32
39
  }
33
40
  const rms = Math.sqrt(sum / channel.length);
34
41
 
35
- // Adaptive noise floor estimation
36
- // When signal is quiet, adapt quickly to find new noise floor
37
- // When signal is loud (speech), adapt slowly to avoid raising noise floor
38
- if (rms < this.noiseFloor) {
39
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
40
- } else {
41
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
42
+ // Adaptive noise floor estimation - ONLY during silence
43
+ // This prevents the noise floor from rising during speech
44
+ if (!this.isSpeaking) {
45
+ if (rms < this.noiseFloor) {
46
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
47
+ } else {
48
+ // Even during silence, if we detect a loud signal, adapt very slowly
49
+ // This could be brief noise we haven't classified as speech yet
50
+
51
+ // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
52
+ // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
53
+ // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
54
+ const instantSnr = rms / (this.noiseFloor + 1e-6);
55
+
56
+ if (instantSnr < 3.0) {
57
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
58
+ }
59
+ }
42
60
  }
61
+ // During speech, freeze the noise floor to maintain consistent detection
43
62
 
44
63
  // Calculate Signal-to-Noise Ratio (SNR)
45
64
  const snr = rms / (this.noiseFloor + 1e-6);
@@ -60,6 +79,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
60
79
  };
61
80
  var EnergyVADPlugin = class {
62
81
  name = "energy-vad";
82
+ workletNode = null;
63
83
  async createNode(context, config, onDecision) {
64
84
  if (!config?.enabled) {
65
85
  console.log("VAD disabled, using passthrough node");
@@ -86,6 +106,7 @@ var EnergyVADPlugin = class {
86
106
  let node;
87
107
  try {
88
108
  node = new AudioWorkletNode(context, "energy-vad-processor");
109
+ this.workletNode = node;
89
110
  console.log("Energy VAD node created successfully");
90
111
  } catch (e) {
91
112
  const error = new Error(
@@ -111,6 +132,11 @@ var EnergyVADPlugin = class {
111
132
  };
112
133
  return node;
113
134
  }
135
+ updateSpeakingState(isSpeaking) {
136
+ if (this.workletNode) {
137
+ this.workletNode.port.postMessage({ isSpeaking });
138
+ }
139
+ }
114
140
  };
115
141
 
116
142
  export {
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "./chunk-AHBRT4RD.mjs";
3
+ } from "./chunk-DF4AYGHJ.mjs";
4
4
 
5
5
  // src/livekit/integration.ts
6
6
  async function attachProcessingToTrack(track, config = {}) {
@@ -3,7 +3,7 @@ import {
3
3
  } from "./chunk-XO6B3D4A.mjs";
4
4
  import {
5
5
  EnergyVADPlugin
6
- } from "./chunk-NMHKX64G.mjs";
6
+ } from "./chunk-TLPO52HV.mjs";
7
7
 
8
8
  // src/extensibility/plugins.ts
9
9
  var nsPlugins = /* @__PURE__ */ new Map();
@@ -107,7 +107,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
107
107
  const smoothing = energyParams.smoothing ?? 0.95;
108
108
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
109
109
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
110
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
110
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
111
111
  const minSNR = energyParams.minSNR ?? 2;
112
112
  const snrRange = energyParams.snrRange ?? 8;
113
113
  return `
@@ -121,6 +121,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
121
121
  this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
122
122
  this.minSNR = ${minSNR};
123
123
  this.snrRange = ${snrRange};
124
+ this.isSpeaking = false;
125
+
126
+ this.port.onmessage = (event) => {
127
+ if (event.data && event.data.isSpeaking !== undefined) {
128
+ this.isSpeaking = event.data.isSpeaking;
129
+ }
130
+ };
124
131
  }
125
132
 
126
133
  process(inputs, outputs, parameters) {
@@ -135,14 +142,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
135
142
  }
136
143
  const rms = Math.sqrt(sum / channel.length);
137
144
 
138
- // Adaptive noise floor estimation
139
- // When signal is quiet, adapt quickly to find new noise floor
140
- // When signal is loud (speech), adapt slowly to avoid raising noise floor
141
- if (rms < this.noiseFloor) {
142
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
143
- } else {
144
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
145
+ // Adaptive noise floor estimation - ONLY during silence
146
+ // This prevents the noise floor from rising during speech
147
+ if (!this.isSpeaking) {
148
+ if (rms < this.noiseFloor) {
149
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
150
+ } else {
151
+ // Even during silence, if we detect a loud signal, adapt very slowly
152
+ // This could be brief noise we haven't classified as speech yet
153
+
154
+ // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
155
+ // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
156
+ // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
157
+ const instantSnr = rms / (this.noiseFloor + 1e-6);
158
+
159
+ if (instantSnr < 3.0) {
160
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
161
+ }
162
+ }
145
163
  }
164
+ // During speech, freeze the noise floor to maintain consistent detection
146
165
 
147
166
  // Calculate Signal-to-Noise Ratio (SNR)
148
167
  const snr = rms / (this.noiseFloor + 1e-6);
@@ -163,6 +182,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
163
182
  };
164
183
  var EnergyVADPlugin = class {
165
184
  name = "energy-vad";
185
+ workletNode = null;
166
186
  async createNode(context, config, onDecision) {
167
187
  if (!config?.enabled) {
168
188
  console.log("VAD disabled, using passthrough node");
@@ -189,6 +209,7 @@ var EnergyVADPlugin = class {
189
209
  let node;
190
210
  try {
191
211
  node = new AudioWorkletNode(context, "energy-vad-processor");
212
+ this.workletNode = node;
192
213
  console.log("Energy VAD node created successfully");
193
214
  } catch (e) {
194
215
  const error = new Error(
@@ -214,6 +235,11 @@ var EnergyVADPlugin = class {
214
235
  };
215
236
  return node;
216
237
  }
238
+ updateSpeakingState(isSpeaking) {
239
+ if (this.workletNode) {
240
+ this.workletNode.port.postMessage({ isSpeaking });
241
+ }
242
+ }
217
243
  };
218
244
 
219
245
  // src/extensibility/plugins.ts
@@ -3,9 +3,9 @@ import {
3
3
  getVADPlugin,
4
4
  registerNoiseSuppressionPlugin,
5
5
  registerVADPlugin
6
- } from "../chunk-YOSTLLCS.mjs";
6
+ } from "../chunk-ZCC7ID7L.mjs";
7
7
  import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-NMHKX64G.mjs";
8
+ import "../chunk-TLPO52HV.mjs";
9
9
  export {
10
10
  getNoiseSuppressionPlugin,
11
11
  getVADPlugin,
package/dist/index.js CHANGED
@@ -159,7 +159,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
159
159
  const smoothing = energyParams.smoothing ?? 0.95;
160
160
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
161
161
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
162
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
162
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
163
163
  const minSNR = energyParams.minSNR ?? 2;
164
164
  const snrRange = energyParams.snrRange ?? 8;
165
165
  return `
@@ -173,6 +173,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
173
173
  this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
174
174
  this.minSNR = ${minSNR};
175
175
  this.snrRange = ${snrRange};
176
+ this.isSpeaking = false;
177
+
178
+ this.port.onmessage = (event) => {
179
+ if (event.data && event.data.isSpeaking !== undefined) {
180
+ this.isSpeaking = event.data.isSpeaking;
181
+ }
182
+ };
176
183
  }
177
184
 
178
185
  process(inputs, outputs, parameters) {
@@ -187,14 +194,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
187
194
  }
188
195
  const rms = Math.sqrt(sum / channel.length);
189
196
 
190
- // Adaptive noise floor estimation
191
- // When signal is quiet, adapt quickly to find new noise floor
192
- // When signal is loud (speech), adapt slowly to avoid raising noise floor
193
- if (rms < this.noiseFloor) {
194
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
195
- } else {
196
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
197
+ // Adaptive noise floor estimation - ONLY during silence
198
+ // This prevents the noise floor from rising during speech
199
+ if (!this.isSpeaking) {
200
+ if (rms < this.noiseFloor) {
201
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
202
+ } else {
203
+ // Even during silence, if we detect a loud signal, adapt very slowly
204
+ // This could be brief noise we haven't classified as speech yet
205
+
206
+ // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
207
+ // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
208
+ // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
209
+ const instantSnr = rms / (this.noiseFloor + 1e-6);
210
+
211
+ if (instantSnr < 3.0) {
212
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
213
+ }
214
+ }
197
215
  }
216
+ // During speech, freeze the noise floor to maintain consistent detection
198
217
 
199
218
  // Calculate Signal-to-Noise Ratio (SNR)
200
219
  const snr = rms / (this.noiseFloor + 1e-6);
@@ -215,6 +234,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
215
234
  };
216
235
  var EnergyVADPlugin = class {
217
236
  name = "energy-vad";
237
+ workletNode = null;
218
238
  async createNode(context, config, onDecision) {
219
239
  if (!config?.enabled) {
220
240
  console.log("VAD disabled, using passthrough node");
@@ -241,6 +261,7 @@ var EnergyVADPlugin = class {
241
261
  let node;
242
262
  try {
243
263
  node = new AudioWorkletNode(context, "energy-vad-processor");
264
+ this.workletNode = node;
244
265
  console.log("Energy VAD node created successfully");
245
266
  } catch (e) {
246
267
  const error = new Error(
@@ -266,6 +287,11 @@ var EnergyVADPlugin = class {
266
287
  };
267
288
  return node;
268
289
  }
290
+ updateSpeakingState(isSpeaking) {
291
+ if (this.workletNode) {
292
+ this.workletNode.port.postMessage({ isSpeaking });
293
+ }
294
+ }
269
295
  };
270
296
 
271
297
  // src/extensibility/plugins.ts
@@ -473,12 +499,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
473
499
  throw err;
474
500
  }
475
501
  const vadStateMachine = new VADStateMachine(fullConfig.vad);
502
+ let vadPlugin;
476
503
  try {
477
- const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
504
+ vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
478
505
  vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
479
506
  try {
480
507
  const timestamp = context.currentTime * 1e3;
481
508
  const newState = vadStateMachine.processFrame(prob, timestamp);
509
+ if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
510
+ vadPlugin.updateSpeakingState(newState.isSpeaking);
511
+ }
482
512
  if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
483
513
  emitter.emit("vadChange", newState);
484
514
  lastVadState = newState;
package/dist/index.mjs CHANGED
@@ -1,10 +1,10 @@
1
1
  import "./chunk-WBQAMGXK.mjs";
2
2
  import {
3
3
  attachProcessingToTrack
4
- } from "./chunk-ERJVV5JR.mjs";
4
+ } from "./chunk-TWQJGBBU.mjs";
5
5
  import {
6
6
  createAudioPipeline
7
- } from "./chunk-AHBRT4RD.mjs";
7
+ } from "./chunk-DF4AYGHJ.mjs";
8
8
  import {
9
9
  VADStateMachine
10
10
  } from "./chunk-N553RHTI.mjs";
@@ -21,13 +21,13 @@ import {
21
21
  getVADPlugin,
22
22
  registerNoiseSuppressionPlugin,
23
23
  registerVADPlugin
24
- } from "./chunk-YOSTLLCS.mjs";
24
+ } from "./chunk-ZCC7ID7L.mjs";
25
25
  import {
26
26
  RNNoisePlugin
27
27
  } from "./chunk-XO6B3D4A.mjs";
28
28
  import {
29
29
  EnergyVADPlugin
30
- } from "./chunk-NMHKX64G.mjs";
30
+ } from "./chunk-TLPO52HV.mjs";
31
31
  export {
32
32
  EnergyVADPlugin,
33
33
  RNNoisePlugin,
@@ -128,7 +128,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
128
128
  const smoothing = energyParams.smoothing ?? 0.95;
129
129
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
130
130
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
131
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
131
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
132
132
  const minSNR = energyParams.minSNR ?? 2;
133
133
  const snrRange = energyParams.snrRange ?? 8;
134
134
  return `
@@ -142,6 +142,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
142
142
  this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
143
143
  this.minSNR = ${minSNR};
144
144
  this.snrRange = ${snrRange};
145
+ this.isSpeaking = false;
146
+
147
+ this.port.onmessage = (event) => {
148
+ if (event.data && event.data.isSpeaking !== undefined) {
149
+ this.isSpeaking = event.data.isSpeaking;
150
+ }
151
+ };
145
152
  }
146
153
 
147
154
  process(inputs, outputs, parameters) {
@@ -156,14 +163,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
156
163
  }
157
164
  const rms = Math.sqrt(sum / channel.length);
158
165
 
159
- // Adaptive noise floor estimation
160
- // When signal is quiet, adapt quickly to find new noise floor
161
- // When signal is loud (speech), adapt slowly to avoid raising noise floor
162
- if (rms < this.noiseFloor) {
163
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
164
- } else {
165
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
166
+ // Adaptive noise floor estimation - ONLY during silence
167
+ // This prevents the noise floor from rising during speech
168
+ if (!this.isSpeaking) {
169
+ if (rms < this.noiseFloor) {
170
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
171
+ } else {
172
+ // Even during silence, if we detect a loud signal, adapt very slowly
173
+ // This could be brief noise we haven't classified as speech yet
174
+
175
+ // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
176
+ // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
177
+ // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
178
+ const instantSnr = rms / (this.noiseFloor + 1e-6);
179
+
180
+ if (instantSnr < 3.0) {
181
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
182
+ }
183
+ }
166
184
  }
185
+ // During speech, freeze the noise floor to maintain consistent detection
167
186
 
168
187
  // Calculate Signal-to-Noise Ratio (SNR)
169
188
  const snr = rms / (this.noiseFloor + 1e-6);
@@ -184,6 +203,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
184
203
  };
185
204
  var EnergyVADPlugin = class {
186
205
  name = "energy-vad";
206
+ workletNode = null;
187
207
  async createNode(context, config, onDecision) {
188
208
  if (!config?.enabled) {
189
209
  console.log("VAD disabled, using passthrough node");
@@ -210,6 +230,7 @@ var EnergyVADPlugin = class {
210
230
  let node;
211
231
  try {
212
232
  node = new AudioWorkletNode(context, "energy-vad-processor");
233
+ this.workletNode = node;
213
234
  console.log("Energy VAD node created successfully");
214
235
  } catch (e) {
215
236
  const error = new Error(
@@ -235,6 +256,11 @@ var EnergyVADPlugin = class {
235
256
  };
236
257
  return node;
237
258
  }
259
+ updateSpeakingState(isSpeaking) {
260
+ if (this.workletNode) {
261
+ this.workletNode.port.postMessage({ isSpeaking });
262
+ }
263
+ }
238
264
  };
239
265
 
240
266
  // src/extensibility/plugins.ts
@@ -436,12 +462,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
436
462
  throw err;
437
463
  }
438
464
  const vadStateMachine = new VADStateMachine(fullConfig.vad);
465
+ let vadPlugin;
439
466
  try {
440
- const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
467
+ vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
441
468
  vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
442
469
  try {
443
470
  const timestamp = context.currentTime * 1e3;
444
471
  const newState = vadStateMachine.processFrame(prob, timestamp);
472
+ if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
473
+ vadPlugin.updateSpeakingState(newState.isSpeaking);
474
+ }
445
475
  if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
446
476
  emitter.emit("vadChange", newState);
447
477
  lastVadState = newState;
@@ -1,12 +1,12 @@
1
1
  import {
2
2
  attachProcessingToTrack
3
- } from "../chunk-ERJVV5JR.mjs";
4
- import "../chunk-AHBRT4RD.mjs";
3
+ } from "../chunk-TWQJGBBU.mjs";
4
+ import "../chunk-DF4AYGHJ.mjs";
5
5
  import "../chunk-N553RHTI.mjs";
6
6
  import "../chunk-OZ7KMC4S.mjs";
7
- import "../chunk-YOSTLLCS.mjs";
7
+ import "../chunk-ZCC7ID7L.mjs";
8
8
  import "../chunk-XO6B3D4A.mjs";
9
- import "../chunk-NMHKX64G.mjs";
9
+ import "../chunk-TLPO52HV.mjs";
10
10
  export {
11
11
  attachProcessingToTrack
12
12
  };
@@ -126,7 +126,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
126
126
  const smoothing = energyParams.smoothing ?? 0.95;
127
127
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
128
128
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
129
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
129
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
130
130
  const minSNR = energyParams.minSNR ?? 2;
131
131
  const snrRange = energyParams.snrRange ?? 8;
132
132
  return `
@@ -140,6 +140,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
140
140
  this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
141
141
  this.minSNR = ${minSNR};
142
142
  this.snrRange = ${snrRange};
143
+ this.isSpeaking = false;
144
+
145
+ this.port.onmessage = (event) => {
146
+ if (event.data && event.data.isSpeaking !== undefined) {
147
+ this.isSpeaking = event.data.isSpeaking;
148
+ }
149
+ };
143
150
  }
144
151
 
145
152
  process(inputs, outputs, parameters) {
@@ -154,14 +161,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
154
161
  }
155
162
  const rms = Math.sqrt(sum / channel.length);
156
163
 
157
- // Adaptive noise floor estimation
158
- // When signal is quiet, adapt quickly to find new noise floor
159
- // When signal is loud (speech), adapt slowly to avoid raising noise floor
160
- if (rms < this.noiseFloor) {
161
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
162
- } else {
163
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
164
+ // Adaptive noise floor estimation - ONLY during silence
165
+ // This prevents the noise floor from rising during speech
166
+ if (!this.isSpeaking) {
167
+ if (rms < this.noiseFloor) {
168
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
169
+ } else {
170
+ // Even during silence, if we detect a loud signal, adapt very slowly
171
+ // This could be brief noise we haven't classified as speech yet
172
+
173
+ // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
174
+ // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
175
+ // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
176
+ const instantSnr = rms / (this.noiseFloor + 1e-6);
177
+
178
+ if (instantSnr < 3.0) {
179
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
180
+ }
181
+ }
164
182
  }
183
+ // During speech, freeze the noise floor to maintain consistent detection
165
184
 
166
185
  // Calculate Signal-to-Noise Ratio (SNR)
167
186
  const snr = rms / (this.noiseFloor + 1e-6);
@@ -182,6 +201,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
182
201
  };
183
202
  var EnergyVADPlugin = class {
184
203
  name = "energy-vad";
204
+ workletNode = null;
185
205
  async createNode(context, config, onDecision) {
186
206
  if (!config?.enabled) {
187
207
  console.log("VAD disabled, using passthrough node");
@@ -208,6 +228,7 @@ var EnergyVADPlugin = class {
208
228
  let node;
209
229
  try {
210
230
  node = new AudioWorkletNode(context, "energy-vad-processor");
231
+ this.workletNode = node;
211
232
  console.log("Energy VAD node created successfully");
212
233
  } catch (e) {
213
234
  const error = new Error(
@@ -233,6 +254,11 @@ var EnergyVADPlugin = class {
233
254
  };
234
255
  return node;
235
256
  }
257
+ updateSpeakingState(isSpeaking) {
258
+ if (this.workletNode) {
259
+ this.workletNode.port.postMessage({ isSpeaking });
260
+ }
261
+ }
236
262
  };
237
263
 
238
264
  // src/extensibility/plugins.ts
@@ -434,12 +460,16 @@ async function createAudioPipeline(sourceTrack, config = {}) {
434
460
  throw err;
435
461
  }
436
462
  const vadStateMachine = new VADStateMachine(fullConfig.vad);
463
+ let vadPlugin;
437
464
  try {
438
- const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
465
+ vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
439
466
  vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
440
467
  try {
441
468
  const timestamp = context.currentTime * 1e3;
442
469
  const newState = vadStateMachine.processFrame(prob, timestamp);
470
+ if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
471
+ vadPlugin.updateSpeakingState(newState.isSpeaking);
472
+ }
443
473
  if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
444
474
  emitter.emit("vadChange", newState);
445
475
  lastVadState = newState;
@@ -1,11 +1,11 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "../chunk-AHBRT4RD.mjs";
3
+ } from "../chunk-DF4AYGHJ.mjs";
4
4
  import "../chunk-N553RHTI.mjs";
5
5
  import "../chunk-OZ7KMC4S.mjs";
6
- import "../chunk-YOSTLLCS.mjs";
6
+ import "../chunk-ZCC7ID7L.mjs";
7
7
  import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-NMHKX64G.mjs";
8
+ import "../chunk-TLPO52HV.mjs";
9
9
  export {
10
10
  createAudioPipeline
11
11
  };
@@ -3,7 +3,9 @@ import 'mitt';
3
3
 
4
4
  declare class EnergyVADPlugin implements VADPlugin {
5
5
  name: string;
6
+ private workletNode;
6
7
  createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
8
+ updateSpeakingState(isSpeaking: boolean): void;
7
9
  }
8
10
 
9
11
  export { EnergyVADPlugin };
@@ -3,7 +3,9 @@ import 'mitt';
3
3
 
4
4
  declare class EnergyVADPlugin implements VADPlugin {
5
5
  name: string;
6
+ private workletNode;
6
7
  createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
8
+ updateSpeakingState(isSpeaking: boolean): void;
7
9
  }
8
10
 
9
11
  export { EnergyVADPlugin };
@@ -28,7 +28,7 @@ var createEnergyVadWorkletCode = (vadConfig) => {
28
28
  const smoothing = energyParams.smoothing ?? 0.95;
29
29
  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
30
30
  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
31
- const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
31
+ const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-4;
32
32
  const minSNR = energyParams.minSNR ?? 2;
33
33
  const snrRange = energyParams.snrRange ?? 8;
34
34
  return `
@@ -42,6 +42,13 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
42
42
  this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
43
43
  this.minSNR = ${minSNR};
44
44
  this.snrRange = ${snrRange};
45
+ this.isSpeaking = false;
46
+
47
+ this.port.onmessage = (event) => {
48
+ if (event.data && event.data.isSpeaking !== undefined) {
49
+ this.isSpeaking = event.data.isSpeaking;
50
+ }
51
+ };
45
52
  }
46
53
 
47
54
  process(inputs, outputs, parameters) {
@@ -56,14 +63,26 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
56
63
  }
57
64
  const rms = Math.sqrt(sum / channel.length);
58
65
 
59
- // Adaptive noise floor estimation
60
- // When signal is quiet, adapt quickly to find new noise floor
61
- // When signal is loud (speech), adapt slowly to avoid raising noise floor
62
- if (rms < this.noiseFloor) {
63
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
64
- } else {
65
- this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
66
+ // Adaptive noise floor estimation - ONLY during silence
67
+ // This prevents the noise floor from rising during speech
68
+ if (!this.isSpeaking) {
69
+ if (rms < this.noiseFloor) {
70
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
71
+ } else {
72
+ // Even during silence, if we detect a loud signal, adapt very slowly
73
+ // This could be brief noise we haven't classified as speech yet
74
+
75
+ // SAFEGUARD: If the signal is significantly louder than the noise floor (e.g. > 3x),
76
+ // assume it's unclassified speech or a sudden loud noise and DO NOT adapt.
77
+ // This prevents the noise floor from "chasing" the speech level during brief pauses or onsets.
78
+ const instantSnr = rms / (this.noiseFloor + 1e-6);
79
+
80
+ if (instantSnr < 3.0) {
81
+ this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
82
+ }
83
+ }
66
84
  }
85
+ // During speech, freeze the noise floor to maintain consistent detection
67
86
 
68
87
  // Calculate Signal-to-Noise Ratio (SNR)
69
88
  const snr = rms / (this.noiseFloor + 1e-6);
@@ -84,6 +103,7 @@ registerProcessor('energy-vad-processor', EnergyVadProcessor);
84
103
  };
85
104
  var EnergyVADPlugin = class {
86
105
  name = "energy-vad";
106
+ workletNode = null;
87
107
  async createNode(context, config, onDecision) {
88
108
  if (!config?.enabled) {
89
109
  console.log("VAD disabled, using passthrough node");
@@ -110,6 +130,7 @@ var EnergyVADPlugin = class {
110
130
  let node;
111
131
  try {
112
132
  node = new AudioWorkletNode(context, "energy-vad-processor");
133
+ this.workletNode = node;
113
134
  console.log("Energy VAD node created successfully");
114
135
  } catch (e) {
115
136
  const error = new Error(
@@ -135,6 +156,11 @@ var EnergyVADPlugin = class {
135
156
  };
136
157
  return node;
137
158
  }
159
+ updateSpeakingState(isSpeaking) {
160
+ if (this.workletNode) {
161
+ this.workletNode.port.postMessage({ isSpeaking });
162
+ }
163
+ }
138
164
  };
139
165
  // Annotate the CommonJS export names for ESM import in node:
140
166
  0 && (module.exports = {
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  EnergyVADPlugin
3
- } from "../chunk-NMHKX64G.mjs";
3
+ } from "../chunk-TLPO52HV.mjs";
4
4
  export {
5
5
  EnergyVADPlugin
6
6
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tensamin/audio",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "main": "dist/index.js",
5
5
  "module": "dist/index.mjs",
6
6
  "types": "dist/index.d.ts",