@tensamin/audio 0.1.14 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -231
- package/dist/chunk-6BJ4XGSA.mjs +80 -0
- package/dist/chunk-AQ5RVY33.mjs +74 -0
- package/dist/chunk-IS37FHDN.mjs +33 -0
- package/dist/chunk-K4J3UUOR.mjs +178 -0
- package/dist/chunk-QNQK6QFB.mjs +71 -0
- package/dist/context/audio-context.d.mts +0 -24
- package/dist/context/audio-context.d.ts +0 -24
- package/dist/index.d.mts +2 -8
- package/dist/index.d.ts +2 -8
- package/dist/index.js +285 -680
- package/dist/index.mjs +8 -43
- package/dist/livekit/integration.d.mts +3 -7
- package/dist/livekit/integration.d.ts +3 -7
- package/dist/livekit/integration.js +280 -626
- package/dist/livekit/integration.mjs +7 -8
- package/dist/noise-suppression/deepfilternet-node.d.mts +12 -0
- package/dist/noise-suppression/deepfilternet-node.d.ts +12 -0
- package/dist/noise-suppression/deepfilternet-node.js +57 -0
- package/dist/noise-suppression/deepfilternet-node.mjs +6 -0
- package/dist/pipeline/audio-pipeline.d.mts +2 -2
- package/dist/pipeline/audio-pipeline.d.ts +2 -2
- package/dist/pipeline/audio-pipeline.js +219 -554
- package/dist/pipeline/audio-pipeline.mjs +4 -5
- package/dist/types.d.mts +42 -257
- package/dist/types.d.ts +42 -257
- package/dist/vad/vad-node.d.mts +7 -9
- package/dist/vad/vad-node.d.ts +7 -9
- package/dist/vad/vad-node.js +47 -156
- package/dist/vad/vad-node.mjs +3 -3
- package/dist/vad/vad-state.d.mts +9 -11
- package/dist/vad/vad-state.d.ts +9 -11
- package/dist/vad/vad-state.js +50 -79
- package/dist/vad/vad-state.mjs +3 -3
- package/package.json +21 -21
- package/dist/chunk-2G2JFHJY.mjs +0 -180
- package/dist/chunk-6F2HZUYO.mjs +0 -91
- package/dist/chunk-K4YLH73B.mjs +0 -103
- package/dist/chunk-R5M2DGAQ.mjs +0 -311
- package/dist/chunk-UFKIAMG3.mjs +0 -47
- package/dist/chunk-XO6B3D4A.mjs +0 -67
- package/dist/extensibility/plugins.d.mts +0 -9
- package/dist/extensibility/plugins.d.ts +0 -9
- package/dist/extensibility/plugins.js +0 -320
- package/dist/extensibility/plugins.mjs +0 -14
- package/dist/noise-suppression/rnnoise-node.d.mts +0 -10
- package/dist/noise-suppression/rnnoise-node.d.ts +0 -10
- package/dist/noise-suppression/rnnoise-node.js +0 -101
- package/dist/noise-suppression/rnnoise-node.mjs +0 -6
package/dist/chunk-2G2JFHJY.mjs
DELETED
|
@@ -1,180 +0,0 @@
|
|
|
1
|
-
// src/vad/vad-node.ts
|
|
2
|
-
var createEnergyVadWorkletCode = (vadConfig) => {
|
|
3
|
-
const energyParams = vadConfig?.energyVad || {};
|
|
4
|
-
const smoothing = energyParams.smoothing ?? 0.95;
|
|
5
|
-
const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
|
|
6
|
-
const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 2e-3;
|
|
7
|
-
const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.02;
|
|
8
|
-
const minSNR = energyParams.minSNR ?? 12;
|
|
9
|
-
const snrRange = energyParams.snrRange ?? 10;
|
|
10
|
-
const minEnergy = energyParams.minEnergy ?? 3e-3;
|
|
11
|
-
return `
|
|
12
|
-
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
13
|
-
constructor() {
|
|
14
|
-
super();
|
|
15
|
-
this.smoothing = ${smoothing};
|
|
16
|
-
this.energy = 0;
|
|
17
|
-
this.noiseFloor = ${initialNoiseFloor};
|
|
18
|
-
this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
|
|
19
|
-
this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
|
|
20
|
-
this.minSNR = ${minSNR};
|
|
21
|
-
this.snrRange = ${snrRange};
|
|
22
|
-
this.minEnergy = ${minEnergy};
|
|
23
|
-
this.isSpeaking = false;
|
|
24
|
-
|
|
25
|
-
this.port.onmessage = (event) => {
|
|
26
|
-
if (event.data && event.data.isSpeaking !== undefined) {
|
|
27
|
-
this.isSpeaking = event.data.isSpeaking;
|
|
28
|
-
}
|
|
29
|
-
};
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
process(inputs, outputs, parameters) {
|
|
33
|
-
const input = inputs[0];
|
|
34
|
-
if (!input || !input.length) return true;
|
|
35
|
-
const channel = input[0];
|
|
36
|
-
|
|
37
|
-
// Calculate instantaneous RMS (Root Mean Square) energy
|
|
38
|
-
let sum = 0;
|
|
39
|
-
let peak = 0;
|
|
40
|
-
for (let i = 0; i < channel.length; i++) {
|
|
41
|
-
const sample = Math.abs(channel[i]);
|
|
42
|
-
sum += channel[i] * channel[i];
|
|
43
|
-
peak = Math.max(peak, sample);
|
|
44
|
-
}
|
|
45
|
-
const instantRms = Math.sqrt(sum / channel.length);
|
|
46
|
-
|
|
47
|
-
// Smooth the RMS energy to reduce jitter
|
|
48
|
-
// this.energy acts as the smoothed RMS value
|
|
49
|
-
this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
|
|
50
|
-
|
|
51
|
-
// Calculate Crest Factor (peak-to-RMS ratio)
|
|
52
|
-
// Voice typically has crest factor of 2-4 (6-12dB)
|
|
53
|
-
// Keyboard clicks have crest factor of 10-30+ (20-30dB)
|
|
54
|
-
const crestFactor = peak / (instantRms + 1e-10);
|
|
55
|
-
const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
|
|
56
|
-
|
|
57
|
-
// Adaptive noise floor estimation using SMOOTHED energy (not instantaneous)
|
|
58
|
-
// This prevents sharp transients from affecting the noise floor
|
|
59
|
-
if (this.energy < this.noiseFloor) {
|
|
60
|
-
// Signal is quieter than noise floor, adapt downwards slowly
|
|
61
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + this.energy * this.noiseFloorAdaptRateQuiet;
|
|
62
|
-
} else {
|
|
63
|
-
// Calculate SNR based on smoothed energy
|
|
64
|
-
const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
|
|
65
|
-
const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
|
|
66
|
-
|
|
67
|
-
// Only adapt upwards if:
|
|
68
|
-
// 1. SNR is low (< 10dB) - likely just background noise
|
|
69
|
-
// 2. AND crest factor is low (< 15dB) - not a sharp transient
|
|
70
|
-
if (smoothedSnrDb < 10 && crestFactorDb < 15) {
|
|
71
|
-
// This is persistent background noise, adapt upwards
|
|
72
|
-
this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
|
|
73
|
-
} else {
|
|
74
|
-
// Either high SNR (speech) or high crest factor (click) - adapt very slowly
|
|
75
|
-
const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
|
|
76
|
-
this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
// Ensure noise floor doesn't drop to absolute zero
|
|
81
|
-
this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
|
|
82
|
-
|
|
83
|
-
// SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
|
|
84
|
-
const snr = this.energy / (this.noiseFloor + 1e-6);
|
|
85
|
-
const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
|
|
86
|
-
|
|
87
|
-
// Map SNR dB to probability (0-1)
|
|
88
|
-
// Probability is 0 when snrDb <= minSNR
|
|
89
|
-
// Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
|
|
90
|
-
let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
|
|
91
|
-
|
|
92
|
-
// Apply absolute energy threshold with soft knee
|
|
93
|
-
if (this.energy < this.minEnergy) {
|
|
94
|
-
const energyRatio = this.energy / (this.minEnergy + 1e-6);
|
|
95
|
-
probability *= Math.pow(energyRatio, 2);
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
// Apply crest factor penalty
|
|
99
|
-
// Reject signals with high crest factor (sharp transients like keyboard clicks)
|
|
100
|
-
// Voice: 6-12dB, Keyboard: 20-30dB
|
|
101
|
-
// We penalize anything above 14dB
|
|
102
|
-
if (crestFactorDb > 14) {
|
|
103
|
-
const excess = crestFactorDb - 14;
|
|
104
|
-
const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
|
|
105
|
-
probability *= penalty;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
|
|
109
|
-
|
|
110
|
-
return true;
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
114
|
-
`;
|
|
115
|
-
};
|
|
116
|
-
var EnergyVADPlugin = class {
|
|
117
|
-
name = "energy-vad";
|
|
118
|
-
workletNode = null;
|
|
119
|
-
async createNode(context, config, onDecision) {
|
|
120
|
-
if (!config?.enabled) {
|
|
121
|
-
console.log("VAD disabled, using passthrough node");
|
|
122
|
-
const pass = context.createGain();
|
|
123
|
-
return pass;
|
|
124
|
-
}
|
|
125
|
-
const workletCode = createEnergyVadWorkletCode(config);
|
|
126
|
-
const blob = new Blob([workletCode], {
|
|
127
|
-
type: "application/javascript"
|
|
128
|
-
});
|
|
129
|
-
const url = URL.createObjectURL(blob);
|
|
130
|
-
try {
|
|
131
|
-
await context.audioWorklet.addModule(url);
|
|
132
|
-
console.log("Energy VAD worklet loaded successfully");
|
|
133
|
-
} catch (e) {
|
|
134
|
-
const error = new Error(
|
|
135
|
-
`Failed to load Energy VAD worklet: ${e instanceof Error ? e.message : String(e)}`
|
|
136
|
-
);
|
|
137
|
-
console.error(error.message);
|
|
138
|
-
URL.revokeObjectURL(url);
|
|
139
|
-
throw error;
|
|
140
|
-
}
|
|
141
|
-
URL.revokeObjectURL(url);
|
|
142
|
-
let node;
|
|
143
|
-
try {
|
|
144
|
-
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
145
|
-
this.workletNode = node;
|
|
146
|
-
console.log("Energy VAD node created successfully");
|
|
147
|
-
} catch (e) {
|
|
148
|
-
const error = new Error(
|
|
149
|
-
`Failed to create Energy VAD node: ${e instanceof Error ? e.message : String(e)}`
|
|
150
|
-
);
|
|
151
|
-
console.error(error.message);
|
|
152
|
-
throw error;
|
|
153
|
-
}
|
|
154
|
-
node.port.onmessage = (event) => {
|
|
155
|
-
try {
|
|
156
|
-
const { probability } = event.data;
|
|
157
|
-
if (typeof probability === "number" && !isNaN(probability)) {
|
|
158
|
-
onDecision(probability);
|
|
159
|
-
} else {
|
|
160
|
-
console.warn("Invalid VAD probability received:", event.data);
|
|
161
|
-
}
|
|
162
|
-
} catch (error) {
|
|
163
|
-
console.error("Error in VAD message handler:", error);
|
|
164
|
-
}
|
|
165
|
-
};
|
|
166
|
-
node.port.onmessageerror = (event) => {
|
|
167
|
-
console.error("VAD port message error:", event);
|
|
168
|
-
};
|
|
169
|
-
return node;
|
|
170
|
-
}
|
|
171
|
-
updateSpeakingState(isSpeaking) {
|
|
172
|
-
if (this.workletNode) {
|
|
173
|
-
this.workletNode.port.postMessage({ isSpeaking });
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
};
|
|
177
|
-
|
|
178
|
-
export {
|
|
179
|
-
EnergyVADPlugin
|
|
180
|
-
};
|
package/dist/chunk-6F2HZUYO.mjs
DELETED
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
createAudioPipeline
|
|
3
|
-
} from "./chunk-R5M2DGAQ.mjs";
|
|
4
|
-
|
|
5
|
-
// src/livekit/integration.ts
|
|
6
|
-
async function attachProcessingToTrack(track, config = {}) {
|
|
7
|
-
if (!track) {
|
|
8
|
-
throw new Error("attachProcessingToTrack requires a valid LocalAudioTrack");
|
|
9
|
-
}
|
|
10
|
-
const originalTrack = track.mediaStreamTrack;
|
|
11
|
-
if (!originalTrack) {
|
|
12
|
-
throw new Error("LocalAudioTrack has no underlying MediaStreamTrack");
|
|
13
|
-
}
|
|
14
|
-
if (originalTrack.readyState === "ended") {
|
|
15
|
-
throw new Error("Cannot attach processing to an ended MediaStreamTrack");
|
|
16
|
-
}
|
|
17
|
-
let pipeline;
|
|
18
|
-
try {
|
|
19
|
-
console.log("Creating audio processing pipeline...");
|
|
20
|
-
pipeline = await createAudioPipeline(originalTrack, config);
|
|
21
|
-
console.log("Audio processing pipeline created successfully");
|
|
22
|
-
} catch (error) {
|
|
23
|
-
const err = new Error(
|
|
24
|
-
`Failed to create audio pipeline: ${error instanceof Error ? error.message : String(error)}`
|
|
25
|
-
);
|
|
26
|
-
console.error(err);
|
|
27
|
-
throw err;
|
|
28
|
-
}
|
|
29
|
-
if (!pipeline.processedTrack) {
|
|
30
|
-
throw new Error("Pipeline did not return a processed track");
|
|
31
|
-
}
|
|
32
|
-
try {
|
|
33
|
-
console.log("Replacing LiveKit track with processed track...");
|
|
34
|
-
await track.replaceTrack(pipeline.processedTrack);
|
|
35
|
-
console.log("LiveKit track replaced successfully");
|
|
36
|
-
} catch (error) {
|
|
37
|
-
pipeline.dispose();
|
|
38
|
-
const err = new Error(
|
|
39
|
-
`Failed to replace LiveKit track: ${error instanceof Error ? error.message : String(error)}`
|
|
40
|
-
);
|
|
41
|
-
console.error(err);
|
|
42
|
-
throw err;
|
|
43
|
-
}
|
|
44
|
-
if (config.livekit?.manageTrackMute) {
|
|
45
|
-
let isVadMuted = false;
|
|
46
|
-
pipeline.events.on("vadChange", async (state) => {
|
|
47
|
-
try {
|
|
48
|
-
if (state.isSpeaking) {
|
|
49
|
-
if (isVadMuted) {
|
|
50
|
-
await track.unmute();
|
|
51
|
-
isVadMuted = false;
|
|
52
|
-
}
|
|
53
|
-
} else {
|
|
54
|
-
if (!track.isMuted) {
|
|
55
|
-
await track.mute();
|
|
56
|
-
isVadMuted = true;
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
} catch (error) {
|
|
60
|
-
console.error("Error handling VAD-based track muting:", error);
|
|
61
|
-
}
|
|
62
|
-
});
|
|
63
|
-
}
|
|
64
|
-
pipeline.events.on("error", (error) => {
|
|
65
|
-
console.error("Audio pipeline error:", error);
|
|
66
|
-
});
|
|
67
|
-
const originalDispose = pipeline.dispose;
|
|
68
|
-
pipeline.dispose = () => {
|
|
69
|
-
try {
|
|
70
|
-
if (originalTrack.readyState === "live") {
|
|
71
|
-
console.log("Restoring original track...");
|
|
72
|
-
track.replaceTrack(originalTrack).catch((error) => {
|
|
73
|
-
console.error("Failed to restore original track:", error);
|
|
74
|
-
});
|
|
75
|
-
}
|
|
76
|
-
originalDispose();
|
|
77
|
-
} catch (error) {
|
|
78
|
-
console.error("Error during pipeline disposal:", error);
|
|
79
|
-
try {
|
|
80
|
-
originalDispose();
|
|
81
|
-
} catch (disposeError) {
|
|
82
|
-
console.error("Error calling original dispose:", disposeError);
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
};
|
|
86
|
-
return pipeline;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
export {
|
|
90
|
-
attachProcessingToTrack
|
|
91
|
-
};
|
package/dist/chunk-K4YLH73B.mjs
DELETED
|
@@ -1,103 +0,0 @@
|
|
|
1
|
-
// src/vad/vad-state.ts
|
|
2
|
-
var VADStateMachine = class {
|
|
3
|
-
config;
|
|
4
|
-
currentState = "silent";
|
|
5
|
-
lastSpeechTime = 0;
|
|
6
|
-
speechStartTime = 0;
|
|
7
|
-
lastSilenceTime = 0;
|
|
8
|
-
frameDurationMs = 20;
|
|
9
|
-
// Assumed frame duration, updated by calls
|
|
10
|
-
constructor(config) {
|
|
11
|
-
this.config = {
|
|
12
|
-
enabled: config?.enabled ?? true,
|
|
13
|
-
pluginName: config?.pluginName ?? "energy-vad",
|
|
14
|
-
// Voice-optimized defaults
|
|
15
|
-
startThreshold: config?.startThreshold ?? 0.8,
|
|
16
|
-
// Higher threshold to avoid noise
|
|
17
|
-
stopThreshold: config?.stopThreshold ?? 0.3,
|
|
18
|
-
// Balanced for voice
|
|
19
|
-
hangoverMs: config?.hangoverMs ?? 300,
|
|
20
|
-
// Smooth for natural speech
|
|
21
|
-
preRollMs: config?.preRollMs ?? 250,
|
|
22
|
-
// Generous pre-roll
|
|
23
|
-
minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
|
|
24
|
-
// Aggressive transient rejection
|
|
25
|
-
minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
|
|
26
|
-
energyVad: {
|
|
27
|
-
smoothing: config?.energyVad?.smoothing ?? 0.95,
|
|
28
|
-
initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
|
|
29
|
-
noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 2e-3,
|
|
30
|
-
noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.02,
|
|
31
|
-
minSNR: config?.energyVad?.minSNR ?? 12,
|
|
32
|
-
snrRange: config?.energyVad?.snrRange ?? 10,
|
|
33
|
-
minEnergy: config?.energyVad?.minEnergy ?? 3e-3
|
|
34
|
-
}
|
|
35
|
-
};
|
|
36
|
-
this.lastSilenceTime = Date.now();
|
|
37
|
-
}
|
|
38
|
-
updateConfig(config) {
|
|
39
|
-
this.config = { ...this.config, ...config };
|
|
40
|
-
}
|
|
41
|
-
processFrame(probability, timestamp) {
|
|
42
|
-
const {
|
|
43
|
-
startThreshold,
|
|
44
|
-
stopThreshold,
|
|
45
|
-
hangoverMs,
|
|
46
|
-
minSpeechDurationMs,
|
|
47
|
-
minSilenceDurationMs
|
|
48
|
-
} = this.config;
|
|
49
|
-
let newState = this.currentState;
|
|
50
|
-
if (this.currentState === "silent" || this.currentState === "speech_ending") {
|
|
51
|
-
if (probability >= startThreshold) {
|
|
52
|
-
const silenceDuration = timestamp - this.lastSilenceTime;
|
|
53
|
-
if (silenceDuration >= minSilenceDurationMs) {
|
|
54
|
-
newState = "speech_starting";
|
|
55
|
-
this.speechStartTime = timestamp;
|
|
56
|
-
this.lastSpeechTime = timestamp;
|
|
57
|
-
} else {
|
|
58
|
-
newState = "silent";
|
|
59
|
-
}
|
|
60
|
-
} else {
|
|
61
|
-
newState = "silent";
|
|
62
|
-
this.lastSilenceTime = timestamp;
|
|
63
|
-
}
|
|
64
|
-
} else if (this.currentState === "speech_starting") {
|
|
65
|
-
if (probability >= stopThreshold) {
|
|
66
|
-
const speechDuration = timestamp - this.speechStartTime;
|
|
67
|
-
if (speechDuration >= minSpeechDurationMs) {
|
|
68
|
-
newState = "speaking";
|
|
69
|
-
} else {
|
|
70
|
-
newState = "speech_starting";
|
|
71
|
-
}
|
|
72
|
-
this.lastSpeechTime = timestamp;
|
|
73
|
-
} else {
|
|
74
|
-
newState = "silent";
|
|
75
|
-
this.lastSilenceTime = timestamp;
|
|
76
|
-
}
|
|
77
|
-
} else if (this.currentState === "speaking") {
|
|
78
|
-
if (probability >= stopThreshold) {
|
|
79
|
-
newState = "speaking";
|
|
80
|
-
this.lastSpeechTime = timestamp;
|
|
81
|
-
} else {
|
|
82
|
-
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
83
|
-
if (timeSinceSpeech < hangoverMs) {
|
|
84
|
-
newState = "speaking";
|
|
85
|
-
} else {
|
|
86
|
-
newState = "speech_ending";
|
|
87
|
-
this.lastSilenceTime = timestamp;
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
if (newState === "speech_ending") newState = "silent";
|
|
92
|
-
this.currentState = newState;
|
|
93
|
-
return {
|
|
94
|
-
isSpeaking: newState === "speaking",
|
|
95
|
-
probability,
|
|
96
|
-
state: newState
|
|
97
|
-
};
|
|
98
|
-
}
|
|
99
|
-
};
|
|
100
|
-
|
|
101
|
-
export {
|
|
102
|
-
VADStateMachine
|
|
103
|
-
};
|
package/dist/chunk-R5M2DGAQ.mjs
DELETED
|
@@ -1,311 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
VADStateMachine
|
|
3
|
-
} from "./chunk-K4YLH73B.mjs";
|
|
4
|
-
import {
|
|
5
|
-
getAudioContext,
|
|
6
|
-
registerPipeline,
|
|
7
|
-
unregisterPipeline
|
|
8
|
-
} from "./chunk-OZ7KMC4S.mjs";
|
|
9
|
-
import {
|
|
10
|
-
getNoiseSuppressionPlugin,
|
|
11
|
-
getVADPlugin
|
|
12
|
-
} from "./chunk-UFKIAMG3.mjs";
|
|
13
|
-
|
|
14
|
-
// src/pipeline/audio-pipeline.ts
|
|
15
|
-
import mitt from "mitt";
|
|
16
|
-
async function createAudioPipeline(sourceTrack, config = {}) {
|
|
17
|
-
const context = getAudioContext();
|
|
18
|
-
registerPipeline();
|
|
19
|
-
const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
|
|
20
|
-
config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
|
|
21
|
-
);
|
|
22
|
-
const vadEnabled = config.vad?.enabled !== false;
|
|
23
|
-
const fullConfig = {
|
|
24
|
-
noiseSuppression: {
|
|
25
|
-
enabled: nsEnabled,
|
|
26
|
-
...config.noiseSuppression
|
|
27
|
-
},
|
|
28
|
-
vad: {
|
|
29
|
-
enabled: vadEnabled,
|
|
30
|
-
// Voice-optimized defaults (will be overridden by config)
|
|
31
|
-
startThreshold: 0.6,
|
|
32
|
-
stopThreshold: 0.45,
|
|
33
|
-
hangoverMs: 400,
|
|
34
|
-
preRollMs: 250,
|
|
35
|
-
minSpeechDurationMs: 100,
|
|
36
|
-
minSilenceDurationMs: 150,
|
|
37
|
-
energyVad: {
|
|
38
|
-
smoothing: 0.95,
|
|
39
|
-
initialNoiseFloor: 1e-3,
|
|
40
|
-
noiseFloorAdaptRateQuiet: 0.01,
|
|
41
|
-
noiseFloorAdaptRateLoud: 1e-3,
|
|
42
|
-
minSNR: 2,
|
|
43
|
-
snrRange: 8
|
|
44
|
-
},
|
|
45
|
-
...config.vad
|
|
46
|
-
},
|
|
47
|
-
output: {
|
|
48
|
-
speechGain: 1,
|
|
49
|
-
silenceGain: 0,
|
|
50
|
-
// Full mute for voice-only
|
|
51
|
-
gainRampTime: 0.015,
|
|
52
|
-
// Fast but smooth transitions
|
|
53
|
-
smoothTransitions: true,
|
|
54
|
-
maxGainDb: 6,
|
|
55
|
-
enableCompression: false,
|
|
56
|
-
compression: {
|
|
57
|
-
threshold: -24,
|
|
58
|
-
ratio: 3,
|
|
59
|
-
attack: 3e-3,
|
|
60
|
-
release: 0.05
|
|
61
|
-
},
|
|
62
|
-
...config.output
|
|
63
|
-
},
|
|
64
|
-
livekit: { manageTrackMute: false, ...config.livekit }
|
|
65
|
-
};
|
|
66
|
-
console.log("Audio pipeline config:", {
|
|
67
|
-
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
68
|
-
vad: fullConfig.vad?.enabled,
|
|
69
|
-
output: fullConfig.output
|
|
70
|
-
});
|
|
71
|
-
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
72
|
-
throw new Error(
|
|
73
|
-
"createAudioPipeline requires a valid audio MediaStreamTrack"
|
|
74
|
-
);
|
|
75
|
-
}
|
|
76
|
-
if (sourceTrack.readyState === "ended") {
|
|
77
|
-
throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
|
|
78
|
-
}
|
|
79
|
-
const sourceStream = new MediaStream([sourceTrack]);
|
|
80
|
-
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
81
|
-
let nsNode;
|
|
82
|
-
let vadNode;
|
|
83
|
-
const emitter = mitt();
|
|
84
|
-
try {
|
|
85
|
-
const nsPlugin = getNoiseSuppressionPlugin(
|
|
86
|
-
fullConfig.noiseSuppression?.pluginName
|
|
87
|
-
);
|
|
88
|
-
nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
|
|
89
|
-
} catch (error) {
|
|
90
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
91
|
-
console.error("Failed to create noise suppression node:", err);
|
|
92
|
-
emitter.emit("error", err);
|
|
93
|
-
throw err;
|
|
94
|
-
}
|
|
95
|
-
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
96
|
-
let vadPlugin;
|
|
97
|
-
try {
|
|
98
|
-
vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
99
|
-
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
100
|
-
try {
|
|
101
|
-
const timestamp = context.currentTime * 1e3;
|
|
102
|
-
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
103
|
-
if (vadPlugin && typeof vadPlugin.updateSpeakingState === "function") {
|
|
104
|
-
vadPlugin.updateSpeakingState(newState.isSpeaking);
|
|
105
|
-
}
|
|
106
|
-
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
107
|
-
emitter.emit("vadChange", newState);
|
|
108
|
-
lastVadState = newState;
|
|
109
|
-
updateGain(newState);
|
|
110
|
-
}
|
|
111
|
-
} catch (vadError) {
|
|
112
|
-
const err = vadError instanceof Error ? vadError : new Error(String(vadError));
|
|
113
|
-
console.error("Error in VAD callback:", err);
|
|
114
|
-
emitter.emit("error", err);
|
|
115
|
-
}
|
|
116
|
-
});
|
|
117
|
-
} catch (error) {
|
|
118
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
119
|
-
console.error("Failed to create VAD node:", err);
|
|
120
|
-
emitter.emit("error", err);
|
|
121
|
-
throw err;
|
|
122
|
-
}
|
|
123
|
-
let lastVadState = {
|
|
124
|
-
isSpeaking: false,
|
|
125
|
-
probability: 0,
|
|
126
|
-
state: "silent"
|
|
127
|
-
};
|
|
128
|
-
const splitter = context.createGain();
|
|
129
|
-
sourceNode.connect(nsNode);
|
|
130
|
-
nsNode.connect(splitter);
|
|
131
|
-
splitter.connect(vadNode);
|
|
132
|
-
const delayNode = context.createDelay(1);
|
|
133
|
-
const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
|
|
134
|
-
delayNode.delayTime.value = preRollSeconds;
|
|
135
|
-
const gainNode = context.createGain();
|
|
136
|
-
gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
|
|
137
|
-
let compressor = null;
|
|
138
|
-
if (fullConfig.output?.enableCompression) {
|
|
139
|
-
compressor = context.createDynamicsCompressor();
|
|
140
|
-
const comp = fullConfig.output.compression;
|
|
141
|
-
compressor.threshold.value = comp.threshold ?? -24;
|
|
142
|
-
compressor.ratio.value = comp.ratio ?? 3;
|
|
143
|
-
compressor.attack.value = comp.attack ?? 3e-3;
|
|
144
|
-
compressor.release.value = comp.release ?? 0.05;
|
|
145
|
-
compressor.knee.value = 10;
|
|
146
|
-
}
|
|
147
|
-
const destination = context.createMediaStreamDestination();
|
|
148
|
-
try {
|
|
149
|
-
splitter.connect(delayNode);
|
|
150
|
-
delayNode.connect(gainNode);
|
|
151
|
-
if (compressor) {
|
|
152
|
-
gainNode.connect(compressor);
|
|
153
|
-
compressor.connect(destination);
|
|
154
|
-
console.log("Compression enabled:", fullConfig.output?.compression);
|
|
155
|
-
} else {
|
|
156
|
-
gainNode.connect(destination);
|
|
157
|
-
}
|
|
158
|
-
} catch (error) {
|
|
159
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
160
|
-
console.error("Failed to wire audio pipeline:", err);
|
|
161
|
-
emitter.emit("error", err);
|
|
162
|
-
throw err;
|
|
163
|
-
}
|
|
164
|
-
function updateGain(state) {
|
|
165
|
-
try {
|
|
166
|
-
const {
|
|
167
|
-
speechGain = 1,
|
|
168
|
-
silenceGain = 0,
|
|
169
|
-
gainRampTime = 0.015,
|
|
170
|
-
smoothTransitions = true,
|
|
171
|
-
maxGainDb = 6
|
|
172
|
-
} = fullConfig.output;
|
|
173
|
-
const maxGainLinear = Math.pow(10, maxGainDb / 20);
|
|
174
|
-
const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
|
|
175
|
-
const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
|
|
176
|
-
const now = context.currentTime;
|
|
177
|
-
if (smoothTransitions) {
|
|
178
|
-
gainNode.gain.cancelScheduledValues(now);
|
|
179
|
-
gainNode.gain.setValueAtTime(gainNode.gain.value, now);
|
|
180
|
-
gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
|
|
181
|
-
} else {
|
|
182
|
-
gainNode.gain.setValueAtTime(targetGain, now);
|
|
183
|
-
}
|
|
184
|
-
} catch (error) {
|
|
185
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
186
|
-
console.error("Failed to update gain:", err);
|
|
187
|
-
emitter.emit("error", err);
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
const audioTracks = destination.stream.getAudioTracks();
|
|
191
|
-
console.log("Destination stream tracks:", {
|
|
192
|
-
count: audioTracks.length,
|
|
193
|
-
tracks: audioTracks.map((t) => ({
|
|
194
|
-
id: t.id,
|
|
195
|
-
label: t.label,
|
|
196
|
-
enabled: t.enabled,
|
|
197
|
-
readyState: t.readyState
|
|
198
|
-
}))
|
|
199
|
-
});
|
|
200
|
-
if (audioTracks.length === 0) {
|
|
201
|
-
const err = new Error(
|
|
202
|
-
"Failed to create processed audio track: destination stream has no audio tracks. This may indicate an issue with the audio graph connection."
|
|
203
|
-
);
|
|
204
|
-
console.error(err);
|
|
205
|
-
emitter.emit("error", err);
|
|
206
|
-
throw err;
|
|
207
|
-
}
|
|
208
|
-
const processedTrack = audioTracks[0];
|
|
209
|
-
if (!processedTrack || processedTrack.readyState === "ended") {
|
|
210
|
-
const err = new Error("Processed audio track is invalid or ended");
|
|
211
|
-
console.error(err);
|
|
212
|
-
emitter.emit("error", err);
|
|
213
|
-
throw err;
|
|
214
|
-
}
|
|
215
|
-
console.log("Audio pipeline created successfully:", {
|
|
216
|
-
sourceTrack: {
|
|
217
|
-
id: sourceTrack.id,
|
|
218
|
-
label: sourceTrack.label,
|
|
219
|
-
readyState: sourceTrack.readyState
|
|
220
|
-
},
|
|
221
|
-
processedTrack: {
|
|
222
|
-
id: processedTrack.id,
|
|
223
|
-
label: processedTrack.label,
|
|
224
|
-
readyState: processedTrack.readyState
|
|
225
|
-
},
|
|
226
|
-
config: {
|
|
227
|
-
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
228
|
-
vad: fullConfig.vad?.enabled
|
|
229
|
-
}
|
|
230
|
-
});
|
|
231
|
-
function dispose() {
|
|
232
|
-
try {
|
|
233
|
-
sourceNode.disconnect();
|
|
234
|
-
nsNode.disconnect();
|
|
235
|
-
splitter.disconnect();
|
|
236
|
-
vadNode.disconnect();
|
|
237
|
-
delayNode.disconnect();
|
|
238
|
-
gainNode.disconnect();
|
|
239
|
-
if (compressor) {
|
|
240
|
-
compressor.disconnect();
|
|
241
|
-
}
|
|
242
|
-
destination.stream.getTracks().forEach((t) => t.stop());
|
|
243
|
-
unregisterPipeline();
|
|
244
|
-
} catch (error) {
|
|
245
|
-
console.error("Error during pipeline disposal:", error);
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
return {
|
|
249
|
-
processedTrack,
|
|
250
|
-
events: emitter,
|
|
251
|
-
get state() {
|
|
252
|
-
return lastVadState;
|
|
253
|
-
},
|
|
254
|
-
setConfig: (newConfig) => {
|
|
255
|
-
try {
|
|
256
|
-
if (newConfig.vad) {
|
|
257
|
-
vadStateMachine.updateConfig(newConfig.vad);
|
|
258
|
-
Object.assign(fullConfig.vad, newConfig.vad);
|
|
259
|
-
if (newConfig.vad.preRollMs !== void 0) {
|
|
260
|
-
const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
|
|
261
|
-
delayNode.delayTime.setValueAtTime(
|
|
262
|
-
preRollSeconds2,
|
|
263
|
-
context.currentTime
|
|
264
|
-
);
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
if (newConfig.output) {
|
|
268
|
-
Object.assign(fullConfig.output, newConfig.output);
|
|
269
|
-
updateGain(lastVadState);
|
|
270
|
-
if (compressor && newConfig.output.compression) {
|
|
271
|
-
const comp = newConfig.output.compression;
|
|
272
|
-
if (comp.threshold !== void 0) {
|
|
273
|
-
compressor.threshold.setValueAtTime(
|
|
274
|
-
comp.threshold,
|
|
275
|
-
context.currentTime
|
|
276
|
-
);
|
|
277
|
-
}
|
|
278
|
-
if (comp.ratio !== void 0) {
|
|
279
|
-
compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
|
|
280
|
-
}
|
|
281
|
-
if (comp.attack !== void 0) {
|
|
282
|
-
compressor.attack.setValueAtTime(
|
|
283
|
-
comp.attack,
|
|
284
|
-
context.currentTime
|
|
285
|
-
);
|
|
286
|
-
}
|
|
287
|
-
if (comp.release !== void 0) {
|
|
288
|
-
compressor.release.setValueAtTime(
|
|
289
|
-
comp.release,
|
|
290
|
-
context.currentTime
|
|
291
|
-
);
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
if (newConfig.livekit) {
|
|
296
|
-
Object.assign(fullConfig.livekit, newConfig.livekit);
|
|
297
|
-
}
|
|
298
|
-
console.log("Pipeline config updated:", newConfig);
|
|
299
|
-
} catch (error) {
|
|
300
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
301
|
-
console.error("Failed to update config:", err);
|
|
302
|
-
emitter.emit("error", err);
|
|
303
|
-
}
|
|
304
|
-
},
|
|
305
|
-
dispose
|
|
306
|
-
};
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
export {
|
|
310
|
-
createAudioPipeline
|
|
311
|
-
};
|