@tensamin/audio 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -54
- package/dist/{chunk-XMTQPMQ6.mjs → chunk-GVKCBKW6.mjs} +1 -1
- package/dist/{chunk-6P2RDBW5.mjs → chunk-H5UKZU2Y.mjs} +1 -1
- package/dist/chunk-N553RHTI.mjs +93 -0
- package/dist/chunk-VEJXAEMM.mjs +136 -0
- package/dist/{chunk-EXH2PNUE.mjs → chunk-XXTNAUYX.mjs} +133 -34
- package/dist/extensibility/plugins.js +52 -14
- package/dist/extensibility/plugins.mjs +2 -2
- package/dist/index.js +225 -54
- package/dist/index.mjs +5 -5
- package/dist/livekit/integration.js +225 -54
- package/dist/livekit/integration.mjs +5 -5
- package/dist/pipeline/audio-pipeline.js +225 -54
- package/dist/pipeline/audio-pipeline.mjs +4 -4
- package/dist/types.d.mts +118 -10
- package/dist/types.d.ts +118 -10
- package/dist/vad/vad-node.d.mts +2 -0
- package/dist/vad/vad-node.d.ts +2 -0
- package/dist/vad/vad-node.js +52 -14
- package/dist/vad/vad-node.mjs +1 -1
- package/dist/vad/vad-state.d.mts +1 -0
- package/dist/vad/vad-state.d.ts +1 -0
- package/dist/vad/vad-state.js +42 -8
- package/dist/vad/vad-state.mjs +1 -1
- package/package.json +1 -1
- package/dist/chunk-JJASCVEW.mjs +0 -59
- package/dist/chunk-R5JVHKWA.mjs +0 -98
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
VADStateMachine
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-N553RHTI.mjs";
|
|
4
4
|
import {
|
|
5
5
|
getAudioContext,
|
|
6
6
|
registerPipeline,
|
|
@@ -9,14 +9,16 @@ import {
|
|
|
9
9
|
import {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-H5UKZU2Y.mjs";
|
|
13
13
|
|
|
14
14
|
// src/pipeline/audio-pipeline.ts
|
|
15
15
|
import mitt from "mitt";
|
|
16
16
|
async function createAudioPipeline(sourceTrack, config = {}) {
|
|
17
17
|
const context = getAudioContext();
|
|
18
18
|
registerPipeline();
|
|
19
|
-
const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
|
|
19
|
+
const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
|
|
20
|
+
config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
|
|
21
|
+
);
|
|
20
22
|
const vadEnabled = config.vad?.enabled !== false;
|
|
21
23
|
const fullConfig = {
|
|
22
24
|
noiseSuppression: {
|
|
@@ -25,13 +27,38 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
25
27
|
},
|
|
26
28
|
vad: {
|
|
27
29
|
enabled: vadEnabled,
|
|
30
|
+
// Voice-optimized defaults (will be overridden by config)
|
|
31
|
+
startThreshold: 0.6,
|
|
32
|
+
stopThreshold: 0.45,
|
|
33
|
+
hangoverMs: 400,
|
|
34
|
+
preRollMs: 250,
|
|
35
|
+
minSpeechDurationMs: 100,
|
|
36
|
+
minSilenceDurationMs: 150,
|
|
37
|
+
energyVad: {
|
|
38
|
+
smoothing: 0.95,
|
|
39
|
+
initialNoiseFloor: 1e-3,
|
|
40
|
+
noiseFloorAdaptRateQuiet: 0.01,
|
|
41
|
+
noiseFloorAdaptRateLoud: 1e-3,
|
|
42
|
+
minSNR: 2,
|
|
43
|
+
snrRange: 8
|
|
44
|
+
},
|
|
28
45
|
...config.vad
|
|
29
46
|
},
|
|
30
47
|
output: {
|
|
31
48
|
speechGain: 1,
|
|
32
|
-
silenceGain:
|
|
33
|
-
//
|
|
34
|
-
gainRampTime: 0.
|
|
49
|
+
silenceGain: 0,
|
|
50
|
+
// Full mute for voice-only
|
|
51
|
+
gainRampTime: 0.015,
|
|
52
|
+
// Fast but smooth transitions
|
|
53
|
+
smoothTransitions: true,
|
|
54
|
+
maxGainDb: 6,
|
|
55
|
+
enableCompression: false,
|
|
56
|
+
compression: {
|
|
57
|
+
threshold: -24,
|
|
58
|
+
ratio: 3,
|
|
59
|
+
attack: 3e-3,
|
|
60
|
+
release: 0.05
|
|
61
|
+
},
|
|
35
62
|
...config.output
|
|
36
63
|
},
|
|
37
64
|
livekit: { manageTrackMute: false, ...config.livekit }
|
|
@@ -42,7 +69,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
42
69
|
output: fullConfig.output
|
|
43
70
|
});
|
|
44
71
|
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
45
|
-
throw new Error(
|
|
72
|
+
throw new Error(
|
|
73
|
+
"createAudioPipeline requires a valid audio MediaStreamTrack"
|
|
74
|
+
);
|
|
46
75
|
}
|
|
47
76
|
if (sourceTrack.readyState === "ended") {
|
|
48
77
|
throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
|
|
@@ -56,10 +85,7 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
56
85
|
const nsPlugin = getNoiseSuppressionPlugin(
|
|
57
86
|
fullConfig.noiseSuppression?.pluginName
|
|
58
87
|
);
|
|
59
|
-
nsNode = await nsPlugin.createNode(
|
|
60
|
-
context,
|
|
61
|
-
fullConfig.noiseSuppression
|
|
62
|
-
);
|
|
88
|
+
nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
|
|
63
89
|
} catch (error) {
|
|
64
90
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
65
91
|
console.error("Failed to create noise suppression node:", err);
|
|
@@ -67,27 +93,27 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
67
93
|
throw err;
|
|
68
94
|
}
|
|
69
95
|
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
96
|
+
let vadPlugin;
|
|
70
97
|
try {
|
|
71
|
-
|
|
72
|
-
vadNode = await vadPlugin.createNode(
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
79
|
-
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
80
|
-
emitter.emit("vadChange", newState);
|
|
81
|
-
lastVadState = newState;
|
|
82
|
-
updateGain(newState);
|
|
83
|
-
}
|
|
84
|
-
} catch (vadError) {
|
|
85
|
-
const err = vadError instanceof Error ? vadError : new Error(String(vadError));
|
|
86
|
-
console.error("Error in VAD callback:", err);
|
|
87
|
-
emitter.emit("error", err);
|
|
98
|
+
vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
99
|
+
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
100
|
+
try {
|
|
101
|
+
const timestamp = context.currentTime * 1e3;
|
|
102
|
+
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
103
|
+
if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
|
|
104
|
+
vadPlugin.updateSpeakingState(newState.isSpeaking);
|
|
88
105
|
}
|
|
106
|
+
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
107
|
+
emitter.emit("vadChange", newState);
|
|
108
|
+
lastVadState = newState;
|
|
109
|
+
updateGain(newState);
|
|
110
|
+
}
|
|
111
|
+
} catch (vadError) {
|
|
112
|
+
const err = vadError instanceof Error ? vadError : new Error(String(vadError));
|
|
113
|
+
console.error("Error in VAD callback:", err);
|
|
114
|
+
emitter.emit("error", err);
|
|
89
115
|
}
|
|
90
|
-
);
|
|
116
|
+
});
|
|
91
117
|
} catch (error) {
|
|
92
118
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
93
119
|
console.error("Failed to create VAD node:", err);
|
|
@@ -104,15 +130,31 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
104
130
|
nsNode.connect(splitter);
|
|
105
131
|
splitter.connect(vadNode);
|
|
106
132
|
const delayNode = context.createDelay(1);
|
|
107
|
-
const preRollSeconds = (fullConfig.vad?.preRollMs ??
|
|
133
|
+
const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
|
|
108
134
|
delayNode.delayTime.value = preRollSeconds;
|
|
109
135
|
const gainNode = context.createGain();
|
|
110
136
|
gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
|
|
137
|
+
let compressor = null;
|
|
138
|
+
if (fullConfig.output?.enableCompression) {
|
|
139
|
+
compressor = context.createDynamicsCompressor();
|
|
140
|
+
const comp = fullConfig.output.compression;
|
|
141
|
+
compressor.threshold.value = comp.threshold ?? -24;
|
|
142
|
+
compressor.ratio.value = comp.ratio ?? 3;
|
|
143
|
+
compressor.attack.value = comp.attack ?? 3e-3;
|
|
144
|
+
compressor.release.value = comp.release ?? 0.05;
|
|
145
|
+
compressor.knee.value = 10;
|
|
146
|
+
}
|
|
111
147
|
const destination = context.createMediaStreamDestination();
|
|
112
148
|
try {
|
|
113
149
|
splitter.connect(delayNode);
|
|
114
150
|
delayNode.connect(gainNode);
|
|
115
|
-
|
|
151
|
+
if (compressor) {
|
|
152
|
+
gainNode.connect(compressor);
|
|
153
|
+
compressor.connect(destination);
|
|
154
|
+
console.log("Compression enabled:", fullConfig.output?.compression);
|
|
155
|
+
} else {
|
|
156
|
+
gainNode.connect(destination);
|
|
157
|
+
}
|
|
116
158
|
} catch (error) {
|
|
117
159
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
118
160
|
console.error("Failed to wire audio pipeline:", err);
|
|
@@ -121,10 +163,24 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
121
163
|
}
|
|
122
164
|
function updateGain(state) {
|
|
123
165
|
try {
|
|
124
|
-
const {
|
|
125
|
-
|
|
166
|
+
const {
|
|
167
|
+
speechGain = 1,
|
|
168
|
+
silenceGain = 0,
|
|
169
|
+
gainRampTime = 0.015,
|
|
170
|
+
smoothTransitions = true,
|
|
171
|
+
maxGainDb = 6
|
|
172
|
+
} = fullConfig.output;
|
|
173
|
+
const maxGainLinear = Math.pow(10, maxGainDb / 20);
|
|
174
|
+
const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
|
|
175
|
+
const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
|
|
126
176
|
const now = context.currentTime;
|
|
127
|
-
|
|
177
|
+
if (smoothTransitions) {
|
|
178
|
+
gainNode.gain.cancelScheduledValues(now);
|
|
179
|
+
gainNode.gain.setValueAtTime(gainNode.gain.value, now);
|
|
180
|
+
gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
|
|
181
|
+
} else {
|
|
182
|
+
gainNode.gain.setValueAtTime(targetGain, now);
|
|
183
|
+
}
|
|
128
184
|
} catch (error) {
|
|
129
185
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
130
186
|
console.error("Failed to update gain:", err);
|
|
@@ -180,6 +236,9 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
180
236
|
vadNode.disconnect();
|
|
181
237
|
delayNode.disconnect();
|
|
182
238
|
gainNode.disconnect();
|
|
239
|
+
if (compressor) {
|
|
240
|
+
compressor.disconnect();
|
|
241
|
+
}
|
|
183
242
|
destination.stream.getTracks().forEach((t) => t.stop());
|
|
184
243
|
unregisterPipeline();
|
|
185
244
|
} catch (error) {
|
|
@@ -196,7 +255,47 @@ async function createAudioPipeline(sourceTrack, config = {}) {
|
|
|
196
255
|
try {
|
|
197
256
|
if (newConfig.vad) {
|
|
198
257
|
vadStateMachine.updateConfig(newConfig.vad);
|
|
258
|
+
Object.assign(fullConfig.vad, newConfig.vad);
|
|
259
|
+
if (newConfig.vad.preRollMs !== void 0) {
|
|
260
|
+
const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
|
|
261
|
+
delayNode.delayTime.setValueAtTime(
|
|
262
|
+
preRollSeconds2,
|
|
263
|
+
context.currentTime
|
|
264
|
+
);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
if (newConfig.output) {
|
|
268
|
+
Object.assign(fullConfig.output, newConfig.output);
|
|
269
|
+
updateGain(lastVadState);
|
|
270
|
+
if (compressor && newConfig.output.compression) {
|
|
271
|
+
const comp = newConfig.output.compression;
|
|
272
|
+
if (comp.threshold !== void 0) {
|
|
273
|
+
compressor.threshold.setValueAtTime(
|
|
274
|
+
comp.threshold,
|
|
275
|
+
context.currentTime
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
if (comp.ratio !== void 0) {
|
|
279
|
+
compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
|
|
280
|
+
}
|
|
281
|
+
if (comp.attack !== void 0) {
|
|
282
|
+
compressor.attack.setValueAtTime(
|
|
283
|
+
comp.attack,
|
|
284
|
+
context.currentTime
|
|
285
|
+
);
|
|
286
|
+
}
|
|
287
|
+
if (comp.release !== void 0) {
|
|
288
|
+
compressor.release.setValueAtTime(
|
|
289
|
+
comp.release,
|
|
290
|
+
context.currentTime
|
|
291
|
+
);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
if (newConfig.livekit) {
|
|
296
|
+
Object.assign(fullConfig.livekit, newConfig.livekit);
|
|
199
297
|
}
|
|
298
|
+
console.log("Pipeline config updated:", newConfig);
|
|
200
299
|
} catch (error) {
|
|
201
300
|
const err = error instanceof Error ? error : new Error(String(error));
|
|
202
301
|
console.error("Failed to update config:", err);
|
|
@@ -102,13 +102,32 @@ To disable noise suppression, set noiseSuppression.enabled to false.`
|
|
|
102
102
|
};
|
|
103
103
|
|
|
104
104
|
// src/vad/vad-node.ts
|
|
105
|
-
var
|
|
105
|
+
var createEnergyVadWorkletCode = (vadConfig) => {
|
|
106
|
+
const energyParams = vadConfig?.energyVad || {};
|
|
107
|
+
const smoothing = energyParams.smoothing ?? 0.95;
|
|
108
|
+
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
109
|
+
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
|
|
110
|
+
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
|
|
111
|
+
const minSNR = energyParams.minSNR ?? 2;
|
|
112
|
+
const snrRange = energyParams.snrRange ?? 8;
|
|
113
|
+
return `
|
|
106
114
|
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
107
115
|
constructor() {
|
|
108
116
|
super();
|
|
109
|
-
this.smoothing =
|
|
117
|
+
this.smoothing = ${smoothing};
|
|
110
118
|
this.energy = 0;
|
|
111
|
-
this.noiseFloor =
|
|
119
|
+
this.noiseFloor = ${initialNoiseFloor};
|
|
120
|
+
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
121
|
+
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
122
|
+
this.minSNR = ${minSNR};
|
|
123
|
+
this.snrRange = ${snrRange};
|
|
124
|
+
this.isSpeaking = false;
|
|
125
|
+
|
|
126
|
+
this.port.onmessage = (event) => {
|
|
127
|
+
if (event.data && event.data.isSpeaking !== undefined) {
|
|
128
|
+
this.isSpeaking = event.data.isSpeaking;
|
|
129
|
+
}
|
|
130
|
+
};
|
|
112
131
|
}
|
|
113
132
|
|
|
114
133
|
process(inputs, outputs, parameters) {
|
|
@@ -116,41 +135,54 @@ class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
|
116
135
|
if (!input || !input.length) return true;
|
|
117
136
|
const channel = input[0];
|
|
118
137
|
|
|
119
|
-
// Calculate RMS
|
|
138
|
+
// Calculate RMS (Root Mean Square) energy
|
|
120
139
|
let sum = 0;
|
|
121
140
|
for (let i = 0; i < channel.length; i++) {
|
|
122
141
|
sum += channel[i] * channel[i];
|
|
123
142
|
}
|
|
124
143
|
const rms = Math.sqrt(sum / channel.length);
|
|
125
144
|
|
|
126
|
-
//
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
145
|
+
// Adaptive noise floor estimation - ONLY during silence
|
|
146
|
+
// This prevents the noise floor from rising during speech
|
|
147
|
+
if (!this.isSpeaking) {
|
|
148
|
+
if (rms < this.noiseFloor) {
|
|
149
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
|
|
150
|
+
} else {
|
|
151
|
+
// Even during silence, if we detect a loud signal, adapt very slowly
|
|
152
|
+
// This could be brief noise we haven't classified as speech yet
|
|
153
|
+
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
|
|
154
|
+
}
|
|
131
155
|
}
|
|
156
|
+
// During speech, freeze the noise floor to maintain consistent detection
|
|
132
157
|
|
|
133
|
-
// Calculate
|
|
134
|
-
// This is a heuristic mapping from energy to 0-1
|
|
158
|
+
// Calculate Signal-to-Noise Ratio (SNR)
|
|
135
159
|
const snr = rms / (this.noiseFloor + 1e-6);
|
|
136
|
-
|
|
160
|
+
|
|
161
|
+
// Map SNR to probability (0-1)
|
|
162
|
+
// Probability is 0 when SNR <= minSNR
|
|
163
|
+
// Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
|
|
164
|
+
// Probability is 1 when SNR >= (minSNR + snrRange)
|
|
165
|
+
const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
|
|
137
166
|
|
|
138
|
-
this.port.postMessage({ probability });
|
|
167
|
+
this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
|
|
139
168
|
|
|
140
169
|
return true;
|
|
141
170
|
}
|
|
142
171
|
}
|
|
143
172
|
registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
144
173
|
`;
|
|
174
|
+
};
|
|
145
175
|
var EnergyVADPlugin = class {
|
|
146
176
|
name = "energy-vad";
|
|
177
|
+
workletNode = null;
|
|
147
178
|
async createNode(context, config, onDecision) {
|
|
148
179
|
if (!config?.enabled) {
|
|
149
180
|
console.log("VAD disabled, using passthrough node");
|
|
150
181
|
const pass = context.createGain();
|
|
151
182
|
return pass;
|
|
152
183
|
}
|
|
153
|
-
const
|
|
184
|
+
const workletCode = createEnergyVadWorkletCode(config);
|
|
185
|
+
const blob = new Blob([workletCode], {
|
|
154
186
|
type: "application/javascript"
|
|
155
187
|
});
|
|
156
188
|
const url = URL.createObjectURL(blob);
|
|
@@ -169,6 +201,7 @@ var EnergyVADPlugin = class {
|
|
|
169
201
|
let node;
|
|
170
202
|
try {
|
|
171
203
|
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
204
|
+
this.workletNode = node;
|
|
172
205
|
console.log("Energy VAD node created successfully");
|
|
173
206
|
} catch (e) {
|
|
174
207
|
const error = new Error(
|
|
@@ -194,6 +227,11 @@ var EnergyVADPlugin = class {
|
|
|
194
227
|
};
|
|
195
228
|
return node;
|
|
196
229
|
}
|
|
230
|
+
updateSpeakingState(isSpeaking) {
|
|
231
|
+
if (this.workletNode) {
|
|
232
|
+
this.workletNode.port.postMessage({ isSpeaking });
|
|
233
|
+
}
|
|
234
|
+
}
|
|
197
235
|
};
|
|
198
236
|
|
|
199
237
|
// src/extensibility/plugins.ts
|
|
@@ -3,9 +3,9 @@ import {
|
|
|
3
3
|
getVADPlugin,
|
|
4
4
|
registerNoiseSuppressionPlugin,
|
|
5
5
|
registerVADPlugin
|
|
6
|
-
} from "../chunk-
|
|
6
|
+
} from "../chunk-H5UKZU2Y.mjs";
|
|
7
7
|
import "../chunk-XO6B3D4A.mjs";
|
|
8
|
-
import "../chunk-
|
|
8
|
+
import "../chunk-VEJXAEMM.mjs";
|
|
9
9
|
export {
|
|
10
10
|
getNoiseSuppressionPlugin,
|
|
11
11
|
getVADPlugin,
|