@tensamin/audio 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -3
- package/dist/chunk-6P2RDBW5.mjs +47 -0
- package/dist/chunk-EXH2PNUE.mjs +212 -0
- package/{src/vad/vad-state.ts → dist/chunk-JJASCVEW.mjs} +21 -33
- package/dist/chunk-OZ7KMC4S.mjs +46 -0
- package/dist/chunk-R5JVHKWA.mjs +98 -0
- package/dist/chunk-WBQAMGXK.mjs +0 -0
- package/dist/chunk-XMTQPMQ6.mjs +91 -0
- package/dist/chunk-XO6B3D4A.mjs +67 -0
- package/dist/context/audio-context.d.mts +32 -0
- package/dist/context/audio-context.d.ts +32 -0
- package/dist/context/audio-context.js +75 -0
- package/dist/context/audio-context.mjs +16 -0
- package/dist/extensibility/plugins.d.mts +9 -0
- package/dist/extensibility/plugins.d.ts +9 -0
- package/dist/extensibility/plugins.js +238 -0
- package/dist/extensibility/plugins.mjs +14 -0
- package/dist/index.d.mts +10 -216
- package/dist/index.d.ts +10 -216
- package/dist/index.js +298 -80
- package/dist/index.mjs +29 -352
- package/dist/livekit/integration.d.mts +11 -0
- package/dist/livekit/integration.d.ts +11 -0
- package/dist/livekit/integration.js +585 -0
- package/dist/livekit/integration.mjs +12 -0
- package/dist/noise-suppression/rnnoise-node.d.mts +10 -0
- package/dist/noise-suppression/rnnoise-node.d.ts +10 -0
- package/dist/noise-suppression/rnnoise-node.js +101 -0
- package/dist/noise-suppression/rnnoise-node.mjs +6 -0
- package/dist/pipeline/audio-pipeline.d.mts +6 -0
- package/dist/pipeline/audio-pipeline.d.ts +6 -0
- package/dist/pipeline/audio-pipeline.js +499 -0
- package/dist/pipeline/audio-pipeline.mjs +11 -0
- package/dist/types.d.mts +155 -0
- package/dist/types.d.ts +155 -0
- package/dist/types.js +18 -0
- package/dist/types.mjs +1 -0
- package/dist/vad/vad-node.d.mts +9 -0
- package/dist/vad/vad-node.d.ts +9 -0
- package/dist/vad/vad-node.js +122 -0
- package/dist/vad/vad-node.mjs +6 -0
- package/dist/vad/vad-state.d.mts +15 -0
- package/dist/vad/vad-state.d.ts +15 -0
- package/dist/vad/vad-state.js +83 -0
- package/dist/vad/vad-state.mjs +6 -0
- package/package.json +8 -5
- package/.github/workflows/publish.yml +0 -29
- package/bun.lock +0 -258
- package/src/context/audio-context.ts +0 -69
- package/src/extensibility/plugins.ts +0 -45
- package/src/index.ts +0 -8
- package/src/livekit/integration.ts +0 -61
- package/src/noise-suppression/rnnoise-node.ts +0 -62
- package/src/pipeline/audio-pipeline.ts +0 -154
- package/src/types.ts +0 -167
- package/src/vad/vad-node.ts +0 -78
- package/tsconfig.json +0 -46
package/README.md
CHANGED
|
@@ -7,7 +7,7 @@ A audio processing library for the web, featuring RNNoise-based noise suppressio
|
|
|
7
7
|
- **Noise Suppression**: Uses `@sapphi-red/web-noise-suppressor` (RNNoise) for high-quality noise reduction.
|
|
8
8
|
- **Robust VAD**: Energy-based VAD with hysteresis, hangover, and pre-roll buffering to prevent cutting off speech onset.
|
|
9
9
|
- **Intelligent Muting**: Automatically gates audio or mutes LiveKit tracks when silent.
|
|
10
|
-
- **LiveKit Integration**:
|
|
10
|
+
- **LiveKit Integration**: Good support for `LocalAudioTrack`.
|
|
11
11
|
- **Extensible**: Plugin system for custom WASM/Worklet processors.
|
|
12
12
|
|
|
13
13
|
## Installation
|
|
@@ -18,8 +18,45 @@ bun add @tensamin/audio livekit-client
|
|
|
18
18
|
pnpm install @tensamin/audio livekit-client
|
|
19
19
|
```
|
|
20
20
|
|
|
21
|
+
## Setup Assets
|
|
22
|
+
|
|
23
|
+
This library uses WASM and AudioWorklets for processing. **Asset setup is optional** - the pipeline can run in passthrough mode without them.
|
|
24
|
+
|
|
25
|
+
### For Noise Suppression (Optional)
|
|
26
|
+
|
|
27
|
+
If you want to enable noise suppression, download these files from `https://unpkg.com/@sapphi-red/web-noise-suppressor@0.3.5/dist/`:
|
|
28
|
+
|
|
29
|
+
- `rnnoise.wasm`
|
|
30
|
+
- `rnnoise_simd.wasm`
|
|
31
|
+
- `noise-suppressor-worklet.min.js`
|
|
32
|
+
|
|
33
|
+
Place them in your project's public directory (e.g., `public/audio-processor/`).
|
|
34
|
+
|
|
35
|
+
**Note:** The pipeline will automatically disable noise suppression if these URLs are not provided, and will use passthrough mode instead.
|
|
36
|
+
|
|
21
37
|
## Usage
|
|
22
38
|
|
|
39
|
+
### Minimal Setup (Passthrough Mode)
|
|
40
|
+
|
|
41
|
+
If you want to use the pipeline without noise suppression or VAD (e.g., for testing or when features are not needed), you can disable them:
|
|
42
|
+
|
|
43
|
+
```ts
|
|
44
|
+
import { createAudioPipeline } from "@tensamin/audio";
|
|
45
|
+
|
|
46
|
+
// Get a stream
|
|
47
|
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
48
|
+
const track = stream.getAudioTracks()[0];
|
|
49
|
+
|
|
50
|
+
// Create pipeline
|
|
51
|
+
const pipeline = await createAudioPipeline(track, {
|
|
52
|
+
noiseSuppression: { enabled: false },
|
|
53
|
+
vad: { enabled: false },
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
// Use the processed track
|
|
57
|
+
const processedStream = new MediaStream([pipeline.processedTrack]);
|
|
58
|
+
```
|
|
59
|
+
|
|
23
60
|
### Basic Usage (Raw MediaStream)
|
|
24
61
|
|
|
25
62
|
```ts
|
|
@@ -31,7 +68,12 @@ const track = stream.getAudioTracks()[0];
|
|
|
31
68
|
|
|
32
69
|
// Create pipeline
|
|
33
70
|
const pipeline = await createAudioPipeline(track, {
|
|
34
|
-
noiseSuppression: {
|
|
71
|
+
noiseSuppression: {
|
|
72
|
+
enabled: true,
|
|
73
|
+
wasmUrl: "/audio-processor/rnnoise.wasm",
|
|
74
|
+
simdUrl: "/audio-processor/rnnoise_simd.wasm",
|
|
75
|
+
workletUrl: "/audio-processor/noise-suppressor-worklet.min.js",
|
|
76
|
+
},
|
|
35
77
|
vad: { enabled: true },
|
|
36
78
|
});
|
|
37
79
|
|
|
@@ -56,7 +98,12 @@ const localTrack = await LocalAudioTrack.create();
|
|
|
56
98
|
|
|
57
99
|
// Attach processing (replaces the underlying track)
|
|
58
100
|
const pipeline = await attachProcessingToTrack(localTrack, {
|
|
59
|
-
noiseSuppression: {
|
|
101
|
+
noiseSuppression: {
|
|
102
|
+
enabled: true,
|
|
103
|
+
wasmUrl: "/audio-processor/rnnoise.wasm",
|
|
104
|
+
simdUrl: "/audio-processor/rnnoise_simd.wasm",
|
|
105
|
+
workletUrl: "/audio-processor/noise-suppressor-worklet.min.js",
|
|
106
|
+
},
|
|
60
107
|
vad: { enabled: true },
|
|
61
108
|
livekit: { manageTrackMute: true }, // Optional: mute the track object itself
|
|
62
109
|
});
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import {
|
|
2
|
+
RNNoisePlugin
|
|
3
|
+
} from "./chunk-XO6B3D4A.mjs";
|
|
4
|
+
import {
|
|
5
|
+
EnergyVADPlugin
|
|
6
|
+
} from "./chunk-R5JVHKWA.mjs";
|
|
7
|
+
|
|
8
|
+
// src/extensibility/plugins.ts
|
|
9
|
+
var nsPlugins = /* @__PURE__ */ new Map();
|
|
10
|
+
var vadPlugins = /* @__PURE__ */ new Map();
|
|
11
|
+
var defaultNs = new RNNoisePlugin();
|
|
12
|
+
nsPlugins.set(defaultNs.name, defaultNs);
|
|
13
|
+
var defaultVad = new EnergyVADPlugin();
|
|
14
|
+
vadPlugins.set(defaultVad.name, defaultVad);
|
|
15
|
+
function registerNoiseSuppressionPlugin(plugin) {
|
|
16
|
+
nsPlugins.set(plugin.name, plugin);
|
|
17
|
+
}
|
|
18
|
+
function registerVADPlugin(plugin) {
|
|
19
|
+
vadPlugins.set(plugin.name, plugin);
|
|
20
|
+
}
|
|
21
|
+
function getNoiseSuppressionPlugin(name) {
|
|
22
|
+
if (!name) return defaultNs;
|
|
23
|
+
const plugin = nsPlugins.get(name);
|
|
24
|
+
if (!plugin) {
|
|
25
|
+
console.warn(
|
|
26
|
+
`Noise suppression plugin '${name}' not found, falling back to default.`
|
|
27
|
+
);
|
|
28
|
+
return defaultNs;
|
|
29
|
+
}
|
|
30
|
+
return plugin;
|
|
31
|
+
}
|
|
32
|
+
function getVADPlugin(name) {
|
|
33
|
+
if (!name) return defaultVad;
|
|
34
|
+
const plugin = vadPlugins.get(name);
|
|
35
|
+
if (!plugin) {
|
|
36
|
+
console.warn(`VAD plugin '${name}' not found, falling back to default.`);
|
|
37
|
+
return defaultVad;
|
|
38
|
+
}
|
|
39
|
+
return plugin;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export {
|
|
43
|
+
registerNoiseSuppressionPlugin,
|
|
44
|
+
registerVADPlugin,
|
|
45
|
+
getNoiseSuppressionPlugin,
|
|
46
|
+
getVADPlugin
|
|
47
|
+
};
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import {
|
|
2
|
+
VADStateMachine
|
|
3
|
+
} from "./chunk-JJASCVEW.mjs";
|
|
4
|
+
import {
|
|
5
|
+
getAudioContext,
|
|
6
|
+
registerPipeline,
|
|
7
|
+
unregisterPipeline
|
|
8
|
+
} from "./chunk-OZ7KMC4S.mjs";
|
|
9
|
+
import {
|
|
10
|
+
getNoiseSuppressionPlugin,
|
|
11
|
+
getVADPlugin
|
|
12
|
+
} from "./chunk-6P2RDBW5.mjs";
|
|
13
|
+
|
|
14
|
+
// src/pipeline/audio-pipeline.ts
|
|
15
|
+
import mitt from "mitt";
|
|
16
|
+
async function createAudioPipeline(sourceTrack, config = {}) {
|
|
17
|
+
const context = getAudioContext();
|
|
18
|
+
registerPipeline();
|
|
19
|
+
const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl);
|
|
20
|
+
const vadEnabled = config.vad?.enabled !== false;
|
|
21
|
+
const fullConfig = {
|
|
22
|
+
noiseSuppression: {
|
|
23
|
+
enabled: nsEnabled,
|
|
24
|
+
...config.noiseSuppression
|
|
25
|
+
},
|
|
26
|
+
vad: {
|
|
27
|
+
enabled: vadEnabled,
|
|
28
|
+
...config.vad
|
|
29
|
+
},
|
|
30
|
+
output: {
|
|
31
|
+
speechGain: 1,
|
|
32
|
+
silenceGain: vadEnabled ? 0 : 1,
|
|
33
|
+
// If no VAD, always output audio
|
|
34
|
+
gainRampTime: 0.02,
|
|
35
|
+
...config.output
|
|
36
|
+
},
|
|
37
|
+
livekit: { manageTrackMute: false, ...config.livekit }
|
|
38
|
+
};
|
|
39
|
+
console.log("Audio pipeline config:", {
|
|
40
|
+
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
41
|
+
vad: fullConfig.vad?.enabled,
|
|
42
|
+
output: fullConfig.output
|
|
43
|
+
});
|
|
44
|
+
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
45
|
+
throw new Error("createAudioPipeline requires a valid audio MediaStreamTrack");
|
|
46
|
+
}
|
|
47
|
+
if (sourceTrack.readyState === "ended") {
|
|
48
|
+
throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
|
|
49
|
+
}
|
|
50
|
+
const sourceStream = new MediaStream([sourceTrack]);
|
|
51
|
+
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
52
|
+
let nsNode;
|
|
53
|
+
let vadNode;
|
|
54
|
+
const emitter = mitt();
|
|
55
|
+
try {
|
|
56
|
+
const nsPlugin = getNoiseSuppressionPlugin(
|
|
57
|
+
fullConfig.noiseSuppression?.pluginName
|
|
58
|
+
);
|
|
59
|
+
nsNode = await nsPlugin.createNode(
|
|
60
|
+
context,
|
|
61
|
+
fullConfig.noiseSuppression
|
|
62
|
+
);
|
|
63
|
+
} catch (error) {
|
|
64
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
65
|
+
console.error("Failed to create noise suppression node:", err);
|
|
66
|
+
emitter.emit("error", err);
|
|
67
|
+
throw err;
|
|
68
|
+
}
|
|
69
|
+
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
70
|
+
try {
|
|
71
|
+
const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
72
|
+
vadNode = await vadPlugin.createNode(
|
|
73
|
+
context,
|
|
74
|
+
fullConfig.vad,
|
|
75
|
+
(prob) => {
|
|
76
|
+
try {
|
|
77
|
+
const timestamp = context.currentTime * 1e3;
|
|
78
|
+
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
79
|
+
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
80
|
+
emitter.emit("vadChange", newState);
|
|
81
|
+
lastVadState = newState;
|
|
82
|
+
updateGain(newState);
|
|
83
|
+
}
|
|
84
|
+
} catch (vadError) {
|
|
85
|
+
const err = vadError instanceof Error ? vadError : new Error(String(vadError));
|
|
86
|
+
console.error("Error in VAD callback:", err);
|
|
87
|
+
emitter.emit("error", err);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
);
|
|
91
|
+
} catch (error) {
|
|
92
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
93
|
+
console.error("Failed to create VAD node:", err);
|
|
94
|
+
emitter.emit("error", err);
|
|
95
|
+
throw err;
|
|
96
|
+
}
|
|
97
|
+
let lastVadState = {
|
|
98
|
+
isSpeaking: false,
|
|
99
|
+
probability: 0,
|
|
100
|
+
state: "silent"
|
|
101
|
+
};
|
|
102
|
+
const splitter = context.createGain();
|
|
103
|
+
sourceNode.connect(nsNode);
|
|
104
|
+
nsNode.connect(splitter);
|
|
105
|
+
splitter.connect(vadNode);
|
|
106
|
+
const delayNode = context.createDelay(1);
|
|
107
|
+
const preRollSeconds = (fullConfig.vad?.preRollMs ?? 200) / 1e3;
|
|
108
|
+
delayNode.delayTime.value = preRollSeconds;
|
|
109
|
+
const gainNode = context.createGain();
|
|
110
|
+
gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
|
|
111
|
+
const destination = context.createMediaStreamDestination();
|
|
112
|
+
try {
|
|
113
|
+
splitter.connect(delayNode);
|
|
114
|
+
delayNode.connect(gainNode);
|
|
115
|
+
gainNode.connect(destination);
|
|
116
|
+
} catch (error) {
|
|
117
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
118
|
+
console.error("Failed to wire audio pipeline:", err);
|
|
119
|
+
emitter.emit("error", err);
|
|
120
|
+
throw err;
|
|
121
|
+
}
|
|
122
|
+
function updateGain(state) {
|
|
123
|
+
try {
|
|
124
|
+
const { speechGain, silenceGain, gainRampTime } = fullConfig.output;
|
|
125
|
+
const targetGain = state.isSpeaking ? speechGain ?? 1 : silenceGain ?? 0;
|
|
126
|
+
const now = context.currentTime;
|
|
127
|
+
gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime ?? 0.02);
|
|
128
|
+
} catch (error) {
|
|
129
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
130
|
+
console.error("Failed to update gain:", err);
|
|
131
|
+
emitter.emit("error", err);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
const audioTracks = destination.stream.getAudioTracks();
|
|
135
|
+
console.log("Destination stream tracks:", {
|
|
136
|
+
count: audioTracks.length,
|
|
137
|
+
tracks: audioTracks.map((t) => ({
|
|
138
|
+
id: t.id,
|
|
139
|
+
label: t.label,
|
|
140
|
+
enabled: t.enabled,
|
|
141
|
+
readyState: t.readyState
|
|
142
|
+
}))
|
|
143
|
+
});
|
|
144
|
+
if (audioTracks.length === 0) {
|
|
145
|
+
const err = new Error(
|
|
146
|
+
"Failed to create processed audio track: destination stream has no audio tracks. This may indicate an issue with the audio graph connection."
|
|
147
|
+
);
|
|
148
|
+
console.error(err);
|
|
149
|
+
emitter.emit("error", err);
|
|
150
|
+
throw err;
|
|
151
|
+
}
|
|
152
|
+
const processedTrack = audioTracks[0];
|
|
153
|
+
if (!processedTrack || processedTrack.readyState === "ended") {
|
|
154
|
+
const err = new Error("Processed audio track is invalid or ended");
|
|
155
|
+
console.error(err);
|
|
156
|
+
emitter.emit("error", err);
|
|
157
|
+
throw err;
|
|
158
|
+
}
|
|
159
|
+
console.log("Audio pipeline created successfully:", {
|
|
160
|
+
sourceTrack: {
|
|
161
|
+
id: sourceTrack.id,
|
|
162
|
+
label: sourceTrack.label,
|
|
163
|
+
readyState: sourceTrack.readyState
|
|
164
|
+
},
|
|
165
|
+
processedTrack: {
|
|
166
|
+
id: processedTrack.id,
|
|
167
|
+
label: processedTrack.label,
|
|
168
|
+
readyState: processedTrack.readyState
|
|
169
|
+
},
|
|
170
|
+
config: {
|
|
171
|
+
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
172
|
+
vad: fullConfig.vad?.enabled
|
|
173
|
+
}
|
|
174
|
+
});
|
|
175
|
+
function dispose() {
|
|
176
|
+
try {
|
|
177
|
+
sourceNode.disconnect();
|
|
178
|
+
nsNode.disconnect();
|
|
179
|
+
splitter.disconnect();
|
|
180
|
+
vadNode.disconnect();
|
|
181
|
+
delayNode.disconnect();
|
|
182
|
+
gainNode.disconnect();
|
|
183
|
+
destination.stream.getTracks().forEach((t) => t.stop());
|
|
184
|
+
unregisterPipeline();
|
|
185
|
+
} catch (error) {
|
|
186
|
+
console.error("Error during pipeline disposal:", error);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
return {
|
|
190
|
+
processedTrack,
|
|
191
|
+
events: emitter,
|
|
192
|
+
get state() {
|
|
193
|
+
return lastVadState;
|
|
194
|
+
},
|
|
195
|
+
setConfig: (newConfig) => {
|
|
196
|
+
try {
|
|
197
|
+
if (newConfig.vad) {
|
|
198
|
+
vadStateMachine.updateConfig(newConfig.vad);
|
|
199
|
+
}
|
|
200
|
+
} catch (error) {
|
|
201
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
202
|
+
console.error("Failed to update config:", err);
|
|
203
|
+
emitter.emit("error", err);
|
|
204
|
+
}
|
|
205
|
+
},
|
|
206
|
+
dispose
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
export {
|
|
211
|
+
createAudioPipeline
|
|
212
|
+
};
|
|
@@ -1,36 +1,28 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
constructor(config: AudioProcessingConfig["vad"]) {
|
|
1
|
+
// src/vad/vad-state.ts
|
|
2
|
+
var VADStateMachine = class {
|
|
3
|
+
config;
|
|
4
|
+
currentState = "silent";
|
|
5
|
+
lastSpeechTime = 0;
|
|
6
|
+
speechStartTime = 0;
|
|
7
|
+
frameDurationMs = 20;
|
|
8
|
+
// Assumed frame duration, updated by calls
|
|
9
|
+
constructor(config) {
|
|
11
10
|
this.config = {
|
|
12
11
|
enabled: config?.enabled ?? true,
|
|
13
12
|
pluginName: config?.pluginName ?? "energy-vad",
|
|
14
13
|
startThreshold: config?.startThreshold ?? 0.5,
|
|
15
14
|
stopThreshold: config?.stopThreshold ?? 0.4,
|
|
16
15
|
hangoverMs: config?.hangoverMs ?? 300,
|
|
17
|
-
preRollMs: config?.preRollMs ?? 200
|
|
16
|
+
preRollMs: config?.preRollMs ?? 200
|
|
18
17
|
};
|
|
19
18
|
}
|
|
20
|
-
|
|
21
|
-
updateConfig(config: Partial<AudioProcessingConfig["vad"]>) {
|
|
19
|
+
updateConfig(config) {
|
|
22
20
|
this.config = { ...this.config, ...config };
|
|
23
21
|
}
|
|
24
|
-
|
|
25
|
-
processFrame(probability: number, timestamp: number): VADState {
|
|
22
|
+
processFrame(probability, timestamp) {
|
|
26
23
|
const { startThreshold, stopThreshold, hangoverMs } = this.config;
|
|
27
|
-
|
|
28
24
|
let newState = this.currentState;
|
|
29
|
-
|
|
30
|
-
if (
|
|
31
|
-
this.currentState === "silent" ||
|
|
32
|
-
this.currentState === "speech_ending"
|
|
33
|
-
) {
|
|
25
|
+
if (this.currentState === "silent" || this.currentState === "speech_ending") {
|
|
34
26
|
if (probability >= startThreshold) {
|
|
35
27
|
newState = "speech_starting";
|
|
36
28
|
this.speechStartTime = timestamp;
|
|
@@ -38,34 +30,30 @@ export class VADStateMachine {
|
|
|
38
30
|
} else {
|
|
39
31
|
newState = "silent";
|
|
40
32
|
}
|
|
41
|
-
} else if (
|
|
42
|
-
this.currentState === "speech_starting" ||
|
|
43
|
-
this.currentState === "speaking"
|
|
44
|
-
) {
|
|
33
|
+
} else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
|
|
45
34
|
if (probability >= stopThreshold) {
|
|
46
35
|
newState = "speaking";
|
|
47
36
|
this.lastSpeechTime = timestamp;
|
|
48
37
|
} else {
|
|
49
|
-
// Check hangover
|
|
50
38
|
const timeSinceSpeech = timestamp - this.lastSpeechTime;
|
|
51
39
|
if (timeSinceSpeech < hangoverMs) {
|
|
52
|
-
newState = "speaking";
|
|
40
|
+
newState = "speaking";
|
|
53
41
|
} else {
|
|
54
42
|
newState = "speech_ending";
|
|
55
43
|
}
|
|
56
44
|
}
|
|
57
45
|
}
|
|
58
|
-
|
|
59
|
-
// Transition from starting/ending to stable states
|
|
60
46
|
if (newState === "speech_starting") newState = "speaking";
|
|
61
47
|
if (newState === "speech_ending") newState = "silent";
|
|
62
|
-
|
|
63
48
|
this.currentState = newState;
|
|
64
|
-
|
|
65
49
|
return {
|
|
66
50
|
isSpeaking: newState === "speaking",
|
|
67
51
|
probability,
|
|
68
|
-
state: newState
|
|
52
|
+
state: newState
|
|
69
53
|
};
|
|
70
54
|
}
|
|
71
|
-
}
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
export {
|
|
58
|
+
VADStateMachine
|
|
59
|
+
};
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
// src/context/audio-context.ts
|
|
2
|
+
var sharedContext = null;
|
|
3
|
+
var activePipelines = 0;
|
|
4
|
+
function getAudioContext(options) {
|
|
5
|
+
if (typeof window === "undefined" || typeof AudioContext === "undefined") {
|
|
6
|
+
throw new Error(
|
|
7
|
+
"AudioContext is not supported in this environment (browser only)."
|
|
8
|
+
);
|
|
9
|
+
}
|
|
10
|
+
if (!sharedContext || sharedContext.state === "closed") {
|
|
11
|
+
sharedContext = new AudioContext(options);
|
|
12
|
+
}
|
|
13
|
+
return sharedContext;
|
|
14
|
+
}
|
|
15
|
+
function registerPipeline() {
|
|
16
|
+
activePipelines++;
|
|
17
|
+
}
|
|
18
|
+
function unregisterPipeline() {
|
|
19
|
+
activePipelines = Math.max(0, activePipelines - 1);
|
|
20
|
+
}
|
|
21
|
+
async function resumeAudioContext() {
|
|
22
|
+
if (sharedContext && sharedContext.state === "suspended") {
|
|
23
|
+
await sharedContext.resume();
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
async function suspendAudioContext() {
|
|
27
|
+
if (sharedContext && sharedContext.state === "running") {
|
|
28
|
+
await sharedContext.suspend();
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
async function closeAudioContext() {
|
|
32
|
+
if (sharedContext && sharedContext.state !== "closed") {
|
|
33
|
+
await sharedContext.close();
|
|
34
|
+
}
|
|
35
|
+
sharedContext = null;
|
|
36
|
+
activePipelines = 0;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export {
|
|
40
|
+
getAudioContext,
|
|
41
|
+
registerPipeline,
|
|
42
|
+
unregisterPipeline,
|
|
43
|
+
resumeAudioContext,
|
|
44
|
+
suspendAudioContext,
|
|
45
|
+
closeAudioContext
|
|
46
|
+
};
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
// src/vad/vad-node.ts
|
|
2
|
+
var energyVadWorkletCode = `
|
|
3
|
+
class EnergyVadProcessor extends AudioWorkletProcessor {
|
|
4
|
+
constructor() {
|
|
5
|
+
super();
|
|
6
|
+
this.smoothing = 0.95;
|
|
7
|
+
this.energy = 0;
|
|
8
|
+
this.noiseFloor = 0.001;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
process(inputs, outputs, parameters) {
|
|
12
|
+
const input = inputs[0];
|
|
13
|
+
if (!input || !input.length) return true;
|
|
14
|
+
const channel = input[0];
|
|
15
|
+
|
|
16
|
+
// Calculate RMS
|
|
17
|
+
let sum = 0;
|
|
18
|
+
for (let i = 0; i < channel.length; i++) {
|
|
19
|
+
sum += channel[i] * channel[i];
|
|
20
|
+
}
|
|
21
|
+
const rms = Math.sqrt(sum / channel.length);
|
|
22
|
+
|
|
23
|
+
// Simple adaptive noise floor (very basic)
|
|
24
|
+
if (rms < this.noiseFloor) {
|
|
25
|
+
this.noiseFloor = this.noiseFloor * 0.99 + rms * 0.01;
|
|
26
|
+
} else {
|
|
27
|
+
this.noiseFloor = this.noiseFloor * 0.999 + rms * 0.001;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Calculate "probability" based on SNR
|
|
31
|
+
// This is a heuristic mapping from energy to 0-1
|
|
32
|
+
const snr = rms / (this.noiseFloor + 1e-6);
|
|
33
|
+
const probability = Math.min(1, Math.max(0, (snr - 1.5) / 10)); // Arbitrary scaling
|
|
34
|
+
|
|
35
|
+
this.port.postMessage({ probability });
|
|
36
|
+
|
|
37
|
+
return true;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
registerProcessor('energy-vad-processor', EnergyVadProcessor);
|
|
41
|
+
`;
|
|
42
|
+
var EnergyVADPlugin = class {
|
|
43
|
+
name = "energy-vad";
|
|
44
|
+
async createNode(context, config, onDecision) {
|
|
45
|
+
if (!config?.enabled) {
|
|
46
|
+
console.log("VAD disabled, using passthrough node");
|
|
47
|
+
const pass = context.createGain();
|
|
48
|
+
return pass;
|
|
49
|
+
}
|
|
50
|
+
const blob = new Blob([energyVadWorkletCode], {
|
|
51
|
+
type: "application/javascript"
|
|
52
|
+
});
|
|
53
|
+
const url = URL.createObjectURL(blob);
|
|
54
|
+
try {
|
|
55
|
+
await context.audioWorklet.addModule(url);
|
|
56
|
+
console.log("Energy VAD worklet loaded successfully");
|
|
57
|
+
} catch (e) {
|
|
58
|
+
const error = new Error(
|
|
59
|
+
`Failed to load Energy VAD worklet: ${e instanceof Error ? e.message : String(e)}`
|
|
60
|
+
);
|
|
61
|
+
console.error(error.message);
|
|
62
|
+
URL.revokeObjectURL(url);
|
|
63
|
+
throw error;
|
|
64
|
+
}
|
|
65
|
+
URL.revokeObjectURL(url);
|
|
66
|
+
let node;
|
|
67
|
+
try {
|
|
68
|
+
node = new AudioWorkletNode(context, "energy-vad-processor");
|
|
69
|
+
console.log("Energy VAD node created successfully");
|
|
70
|
+
} catch (e) {
|
|
71
|
+
const error = new Error(
|
|
72
|
+
`Failed to create Energy VAD node: ${e instanceof Error ? e.message : String(e)}`
|
|
73
|
+
);
|
|
74
|
+
console.error(error.message);
|
|
75
|
+
throw error;
|
|
76
|
+
}
|
|
77
|
+
node.port.onmessage = (event) => {
|
|
78
|
+
try {
|
|
79
|
+
const { probability } = event.data;
|
|
80
|
+
if (typeof probability === "number" && !isNaN(probability)) {
|
|
81
|
+
onDecision(probability);
|
|
82
|
+
} else {
|
|
83
|
+
console.warn("Invalid VAD probability received:", event.data);
|
|
84
|
+
}
|
|
85
|
+
} catch (error) {
|
|
86
|
+
console.error("Error in VAD message handler:", error);
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
node.port.onmessageerror = (event) => {
|
|
90
|
+
console.error("VAD port message error:", event);
|
|
91
|
+
};
|
|
92
|
+
return node;
|
|
93
|
+
}
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
export {
|
|
97
|
+
EnergyVADPlugin
|
|
98
|
+
};
|
|
File without changes
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import {
|
|
2
|
+
createAudioPipeline
|
|
3
|
+
} from "./chunk-EXH2PNUE.mjs";
|
|
4
|
+
|
|
5
|
+
// src/livekit/integration.ts
|
|
6
|
+
async function attachProcessingToTrack(track, config = {}) {
|
|
7
|
+
if (!track) {
|
|
8
|
+
throw new Error("attachProcessingToTrack requires a valid LocalAudioTrack");
|
|
9
|
+
}
|
|
10
|
+
const originalTrack = track.mediaStreamTrack;
|
|
11
|
+
if (!originalTrack) {
|
|
12
|
+
throw new Error("LocalAudioTrack has no underlying MediaStreamTrack");
|
|
13
|
+
}
|
|
14
|
+
if (originalTrack.readyState === "ended") {
|
|
15
|
+
throw new Error("Cannot attach processing to an ended MediaStreamTrack");
|
|
16
|
+
}
|
|
17
|
+
let pipeline;
|
|
18
|
+
try {
|
|
19
|
+
console.log("Creating audio processing pipeline...");
|
|
20
|
+
pipeline = await createAudioPipeline(originalTrack, config);
|
|
21
|
+
console.log("Audio processing pipeline created successfully");
|
|
22
|
+
} catch (error) {
|
|
23
|
+
const err = new Error(
|
|
24
|
+
`Failed to create audio pipeline: ${error instanceof Error ? error.message : String(error)}`
|
|
25
|
+
);
|
|
26
|
+
console.error(err);
|
|
27
|
+
throw err;
|
|
28
|
+
}
|
|
29
|
+
if (!pipeline.processedTrack) {
|
|
30
|
+
throw new Error("Pipeline did not return a processed track");
|
|
31
|
+
}
|
|
32
|
+
try {
|
|
33
|
+
console.log("Replacing LiveKit track with processed track...");
|
|
34
|
+
await track.replaceTrack(pipeline.processedTrack);
|
|
35
|
+
console.log("LiveKit track replaced successfully");
|
|
36
|
+
} catch (error) {
|
|
37
|
+
pipeline.dispose();
|
|
38
|
+
const err = new Error(
|
|
39
|
+
`Failed to replace LiveKit track: ${error instanceof Error ? error.message : String(error)}`
|
|
40
|
+
);
|
|
41
|
+
console.error(err);
|
|
42
|
+
throw err;
|
|
43
|
+
}
|
|
44
|
+
if (config.livekit?.manageTrackMute) {
|
|
45
|
+
let isVadMuted = false;
|
|
46
|
+
pipeline.events.on("vadChange", async (state) => {
|
|
47
|
+
try {
|
|
48
|
+
if (state.isSpeaking) {
|
|
49
|
+
if (isVadMuted) {
|
|
50
|
+
await track.unmute();
|
|
51
|
+
isVadMuted = false;
|
|
52
|
+
}
|
|
53
|
+
} else {
|
|
54
|
+
if (!track.isMuted) {
|
|
55
|
+
await track.mute();
|
|
56
|
+
isVadMuted = true;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
} catch (error) {
|
|
60
|
+
console.error("Error handling VAD-based track muting:", error);
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
pipeline.events.on("error", (error) => {
|
|
65
|
+
console.error("Audio pipeline error:", error);
|
|
66
|
+
});
|
|
67
|
+
const originalDispose = pipeline.dispose;
|
|
68
|
+
pipeline.dispose = () => {
|
|
69
|
+
try {
|
|
70
|
+
if (originalTrack.readyState === "live") {
|
|
71
|
+
console.log("Restoring original track...");
|
|
72
|
+
track.replaceTrack(originalTrack).catch((error) => {
|
|
73
|
+
console.error("Failed to restore original track:", error);
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
originalDispose();
|
|
77
|
+
} catch (error) {
|
|
78
|
+
console.error("Error during pipeline disposal:", error);
|
|
79
|
+
try {
|
|
80
|
+
originalDispose();
|
|
81
|
+
} catch (disposeError) {
|
|
82
|
+
console.error("Error calling original dispose:", disposeError);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
};
|
|
86
|
+
return pipeline;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export {
|
|
90
|
+
attachProcessingToTrack
|
|
91
|
+
};
|