@tensamin/audio 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +218 -30
- package/dist/chunk-AHBRT4RD.mjs +307 -0
- package/dist/chunk-ERJVV5JR.mjs +91 -0
- package/dist/chunk-N553RHTI.mjs +93 -0
- package/dist/chunk-NMHKX64G.mjs +118 -0
- package/dist/chunk-XO6B3D4A.mjs +67 -0
- package/dist/{chunk-FS635GMR.mjs → chunk-YOSTLLCS.mjs} +2 -2
- package/dist/extensibility/plugins.js +110 -32
- package/dist/extensibility/plugins.mjs +3 -3
- package/dist/index.js +463 -97
- package/dist/index.mjs +6 -6
- package/dist/livekit/integration.js +463 -97
- package/dist/livekit/integration.mjs +6 -6
- package/dist/noise-suppression/rnnoise-node.js +42 -14
- package/dist/noise-suppression/rnnoise-node.mjs +1 -1
- package/dist/pipeline/audio-pipeline.js +396 -83
- package/dist/pipeline/audio-pipeline.mjs +5 -5
- package/dist/types.d.mts +118 -10
- package/dist/types.d.ts +118 -10
- package/dist/vad/vad-node.js +68 -18
- package/dist/vad/vad-node.mjs +1 -1
- package/dist/vad/vad-state.d.mts +1 -0
- package/dist/vad/vad-state.d.ts +1 -0
- package/dist/vad/vad-state.js +42 -8
- package/dist/vad/vad-state.mjs +1 -1
- package/package.json +1 -1
- package/dist/chunk-HFSKQ33X.mjs +0 -38
- package/dist/chunk-JJASCVEW.mjs +0 -59
- package/dist/chunk-QU7E5HBA.mjs +0 -106
- package/dist/chunk-SDTOKWM2.mjs +0 -39
- package/dist/chunk-UMU2KIB6.mjs +0 -68
package/README.md
CHANGED
|
@@ -1,65 +1,56 @@
|
|
|
1
1
|
# @tensamin/audio
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Audio processing library for the web with RNNoise-based noise suppression and Voice Activity Detection (VAD). Designed for voice communication applications with LiveKit integration support.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
7
|
+
- Configurable Voice Activity Detection with energy-based algorithm
|
|
8
|
+
- RNNoise noise suppression via `@sapphi-red/web-noise-suppressor`
|
|
9
|
+
- Automatic audio gating based on voice detection
|
|
10
|
+
- Runtime configuration updates
|
|
11
|
+
- LiveKit `LocalAudioTrack` integration
|
|
12
|
+
- Plugin system for custom audio processors
|
|
13
|
+
- Optional dynamic range compression
|
|
12
14
|
|
|
13
15
|
## Installation
|
|
14
16
|
|
|
15
17
|
```bash
|
|
16
18
|
npm install @tensamin/audio livekit-client
|
|
17
|
-
bun add @tensamin/audio livekit-client
|
|
18
|
-
pnpm install @tensamin/audio livekit-client
|
|
19
19
|
```
|
|
20
20
|
|
|
21
|
-
##
|
|
21
|
+
## Requirements
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
1. Download the following files from `https://unpkg.com/@sapphi-red/web-noise-suppressor@0.3.5/dist/`:
|
|
23
|
+
For noise suppression, the following files must be provided:
|
|
26
24
|
|
|
27
25
|
- `rnnoise.wasm`
|
|
28
26
|
- `rnnoise_simd.wasm`
|
|
29
|
-
- `
|
|
27
|
+
- `worklet.js`
|
|
28
|
+
|
|
29
|
+
Available at: `https://unpkg.com/@sapphi-red/web-noise-suppressor@0.3.5/dist/`
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
Place these files in a publicly accessible directory (e.g., `public/audio-processor/`).
|
|
32
32
|
|
|
33
33
|
## Usage
|
|
34
34
|
|
|
35
|
-
### Basic
|
|
35
|
+
### Basic Example
|
|
36
36
|
|
|
37
37
|
```ts
|
|
38
38
|
import { createAudioPipeline } from "@tensamin/audio";
|
|
39
39
|
|
|
40
|
-
// Get a stream
|
|
41
40
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
42
41
|
const track = stream.getAudioTracks()[0];
|
|
43
42
|
|
|
44
|
-
// Create pipeline
|
|
45
43
|
const pipeline = await createAudioPipeline(track, {
|
|
46
44
|
noiseSuppression: {
|
|
47
45
|
enabled: true,
|
|
48
46
|
wasmUrl: "/audio-processor/rnnoise.wasm",
|
|
49
47
|
simdUrl: "/audio-processor/rnnoise_simd.wasm",
|
|
50
|
-
workletUrl: "/audio-processor/
|
|
48
|
+
workletUrl: "/audio-processor/worklet.js",
|
|
51
49
|
},
|
|
52
50
|
vad: { enabled: true },
|
|
53
51
|
});
|
|
54
52
|
|
|
55
|
-
// Use the processed track
|
|
56
53
|
const processedStream = new MediaStream([pipeline.processedTrack]);
|
|
57
|
-
// audioElement.srcObject = processedStream;
|
|
58
|
-
|
|
59
|
-
// Listen to VAD events
|
|
60
|
-
pipeline.events.on("vadChange", (state) => {
|
|
61
|
-
console.log("Is Speaking:", state.isSpeaking);
|
|
62
|
-
});
|
|
63
54
|
```
|
|
64
55
|
|
|
65
56
|
### LiveKit Integration
|
|
@@ -68,21 +59,218 @@ pipeline.events.on("vadChange", (state) => {
|
|
|
68
59
|
import { attachProcessingToTrack } from "@tensamin/audio";
|
|
69
60
|
import { LocalAudioTrack } from "livekit-client";
|
|
70
61
|
|
|
71
|
-
// Assume you have a LocalAudioTrack
|
|
72
62
|
const localTrack = await LocalAudioTrack.create();
|
|
73
63
|
|
|
74
|
-
// Attach processing (replaces the underlying track)
|
|
75
64
|
const pipeline = await attachProcessingToTrack(localTrack, {
|
|
76
65
|
noiseSuppression: {
|
|
77
66
|
enabled: true,
|
|
78
67
|
wasmUrl: "/audio-processor/rnnoise.wasm",
|
|
79
68
|
simdUrl: "/audio-processor/rnnoise_simd.wasm",
|
|
80
|
-
workletUrl: "/audio-processor/
|
|
69
|
+
workletUrl: "/audio-processor/worklet.js",
|
|
81
70
|
},
|
|
82
71
|
vad: { enabled: true },
|
|
83
|
-
livekit: { manageTrackMute: true },
|
|
72
|
+
livekit: { manageTrackMute: true },
|
|
84
73
|
});
|
|
85
74
|
|
|
86
|
-
// Publish the track
|
|
87
75
|
await room.localParticipant.publishTrack(localTrack);
|
|
88
76
|
```
|
|
77
|
+
|
|
78
|
+
### Monitoring VAD State
|
|
79
|
+
|
|
80
|
+
```ts
|
|
81
|
+
pipeline.events.on("vadChange", (state) => {
|
|
82
|
+
console.log("Speaking:", state.isSpeaking);
|
|
83
|
+
console.log("Probability:", state.probability);
|
|
84
|
+
console.log("State:", state.state);
|
|
85
|
+
});
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Configuration
|
|
89
|
+
|
|
90
|
+
### Voice Activity Detection
|
|
91
|
+
|
|
92
|
+
```ts
|
|
93
|
+
vad: {
|
|
94
|
+
enabled: boolean;
|
|
95
|
+
startThreshold: number; // Default: 0.6 (range: 0-1)
|
|
96
|
+
stopThreshold: number; // Default: 0.45 (range: 0-1)
|
|
97
|
+
hangoverMs: number; // Default: 400
|
|
98
|
+
preRollMs: number; // Default: 250
|
|
99
|
+
minSpeechDurationMs: number; // Default: 100
|
|
100
|
+
minSilenceDurationMs: number; // Default: 150
|
|
101
|
+
energyVad?: {
|
|
102
|
+
smoothing: number; // Default: 0.95
|
|
103
|
+
initialNoiseFloor: number; // Default: 0.001
|
|
104
|
+
noiseFloorAdaptRateQuiet: number; // Default: 0.01
|
|
105
|
+
noiseFloorAdaptRateLoud: number; // Default: 0.001
|
|
106
|
+
minSNR: number; // Default: 2.0
|
|
107
|
+
snrRange: number; // Default: 8.0
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
**Threshold Parameters:**
|
|
113
|
+
|
|
114
|
+
- `startThreshold`: Probability threshold to unmute audio
|
|
115
|
+
- `stopThreshold`: Probability threshold to mute audio (after hangover)
|
|
116
|
+
- `hangoverMs`: Delay before muting after speech stops
|
|
117
|
+
- `preRollMs`: Audio buffer duration before speech onset
|
|
118
|
+
- `minSpeechDurationMs`: Minimum duration to consider as valid speech
|
|
119
|
+
- `minSilenceDurationMs`: Minimum silence duration between speech segments
|
|
120
|
+
|
|
121
|
+
**Energy VAD Parameters:**
|
|
122
|
+
|
|
123
|
+
- `smoothing`: Energy calculation smoothing factor (0-1)
|
|
124
|
+
- `minSNR`: Minimum signal-to-noise ratio for speech detection
|
|
125
|
+
- `snrRange`: Range for probability scaling from minSNR
|
|
126
|
+
|
|
127
|
+
### Output Control
|
|
128
|
+
|
|
129
|
+
```ts
|
|
130
|
+
output: {
|
|
131
|
+
speechGain: number; // Default: 1.0
|
|
132
|
+
silenceGain: number; // Default: 0.0
|
|
133
|
+
gainRampTime: number; // Default: 0.015 (seconds)
|
|
134
|
+
smoothTransitions: boolean; // Default: true
|
|
135
|
+
maxGainDb: number; // Default: 6.0
|
|
136
|
+
enableCompression: boolean; // Default: false
|
|
137
|
+
compression?: {
|
|
138
|
+
threshold: number; // Default: -24.0 (dB)
|
|
139
|
+
ratio: number; // Default: 3.0
|
|
140
|
+
attack: number; // Default: 0.003 (seconds)
|
|
141
|
+
release: number; // Default: 0.05 (seconds)
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
**Gain Parameters:**
|
|
147
|
+
|
|
148
|
+
- `speechGain`: Gain multiplier when speaking (1.0 = unity)
|
|
149
|
+
- `silenceGain`: Gain multiplier when silent (0.0 = mute)
|
|
150
|
+
- `gainRampTime`: Transition duration for gain changes
|
|
151
|
+
- `maxGainDb`: Maximum gain limit to prevent clipping
|
|
152
|
+
|
|
153
|
+
**Compression Parameters:**
|
|
154
|
+
|
|
155
|
+
- `threshold`: Level above which compression is applied
|
|
156
|
+
- `ratio`: Compression ratio (e.g., 3.0 = 3:1)
|
|
157
|
+
- `attack`: Time to reach full compression
|
|
158
|
+
- `release`: Time to release compression
|
|
159
|
+
|
|
160
|
+
### Runtime Configuration Updates
|
|
161
|
+
|
|
162
|
+
```ts
|
|
163
|
+
pipeline.setConfig({
|
|
164
|
+
vad: {
|
|
165
|
+
startThreshold: 0.7,
|
|
166
|
+
stopThreshold: 0.55,
|
|
167
|
+
},
|
|
168
|
+
output: {
|
|
169
|
+
speechGain: 1.3,
|
|
170
|
+
},
|
|
171
|
+
});
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Configuration Examples
|
|
175
|
+
|
|
176
|
+
### Noisy Environment
|
|
177
|
+
|
|
178
|
+
```ts
|
|
179
|
+
{
|
|
180
|
+
vad: {
|
|
181
|
+
startThreshold: 0.7,
|
|
182
|
+
stopThreshold: 0.55,
|
|
183
|
+
minSpeechDurationMs: 150,
|
|
184
|
+
energyVad: { minSNR: 3.0 }
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Quiet Speaker
|
|
190
|
+
|
|
191
|
+
```ts
|
|
192
|
+
{
|
|
193
|
+
vad: {
|
|
194
|
+
startThreshold: 0.4,
|
|
195
|
+
stopThreshold: 0.25,
|
|
196
|
+
energyVad: { minSNR: 1.5 }
|
|
197
|
+
},
|
|
198
|
+
output: {
|
|
199
|
+
speechGain: 1.5
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Natural Conversation
|
|
205
|
+
|
|
206
|
+
```ts
|
|
207
|
+
{
|
|
208
|
+
vad: {
|
|
209
|
+
startThreshold: 0.5,
|
|
210
|
+
stopThreshold: 0.3,
|
|
211
|
+
hangoverMs: 600,
|
|
212
|
+
},
|
|
213
|
+
output: {
|
|
214
|
+
silenceGain: 0.2
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## API Reference
|
|
220
|
+
|
|
221
|
+
### `createAudioPipeline(track, config)`
|
|
222
|
+
|
|
223
|
+
Creates an audio processing pipeline from a MediaStreamTrack.
|
|
224
|
+
|
|
225
|
+
**Parameters:**
|
|
226
|
+
|
|
227
|
+
- `track`: MediaStreamTrack - Source audio track
|
|
228
|
+
- `config`: AudioProcessingConfig - Configuration object
|
|
229
|
+
|
|
230
|
+
**Returns:** `Promise<AudioPipelineHandle>`
|
|
231
|
+
|
|
232
|
+
### AudioPipelineHandle
|
|
233
|
+
|
|
234
|
+
```ts
|
|
235
|
+
interface AudioPipelineHandle {
|
|
236
|
+
processedTrack: MediaStreamTrack;
|
|
237
|
+
events: Emitter<AudioPipelineEvents>;
|
|
238
|
+
state: VADState;
|
|
239
|
+
setConfig(config: Partial<AudioProcessingConfig>): void;
|
|
240
|
+
dispose(): void;
|
|
241
|
+
}
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### AudioPipelineEvents
|
|
245
|
+
|
|
246
|
+
```ts
|
|
247
|
+
type AudioPipelineEvents = {
|
|
248
|
+
vadChange: VADState;
|
|
249
|
+
error: Error;
|
|
250
|
+
};
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### VADState
|
|
254
|
+
|
|
255
|
+
```ts
|
|
256
|
+
interface VADState {
|
|
257
|
+
isSpeaking: boolean;
|
|
258
|
+
probability: number;
|
|
259
|
+
state: "silent" | "speech_starting" | "speaking" | "speech_ending";
|
|
260
|
+
}
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## Default Values
|
|
264
|
+
|
|
265
|
+
| Parameter | Default | Description |
|
|
266
|
+
| ---------------------- | ------- | -------------------------------- |
|
|
267
|
+
| `startThreshold` | 0.6 | Unmute at 60% confidence |
|
|
268
|
+
| `stopThreshold` | 0.45 | Mute below 45% confidence |
|
|
269
|
+
| `hangoverMs` | 400 | Wait 400ms before muting |
|
|
270
|
+
| `preRollMs` | 250 | Buffer 250ms before speech |
|
|
271
|
+
| `minSpeechDurationMs` | 100 | Minimum valid speech duration |
|
|
272
|
+
| `minSilenceDurationMs` | 150 | Minimum silence between speech |
|
|
273
|
+
| `silenceGain` | 0.0 | Complete mute when silent |
|
|
274
|
+
| `speechGain` | 1.0 | Unity gain when speaking |
|
|
275
|
+
| `minSNR` | 2.0 | Voice must be 2x noise floor |
|
|
276
|
+
| `snrRange` | 8.0 | Probability scales over SNR 2-10 |
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
import {
|
|
2
|
+
VADStateMachine
|
|
3
|
+
} from "./chunk-N553RHTI.mjs";
|
|
4
|
+
import {
|
|
5
|
+
getAudioContext,
|
|
6
|
+
registerPipeline,
|
|
7
|
+
unregisterPipeline
|
|
8
|
+
} from "./chunk-OZ7KMC4S.mjs";
|
|
9
|
+
import {
|
|
10
|
+
getNoiseSuppressionPlugin,
|
|
11
|
+
getVADPlugin
|
|
12
|
+
} from "./chunk-YOSTLLCS.mjs";
|
|
13
|
+
|
|
14
|
+
// src/pipeline/audio-pipeline.ts
|
|
15
|
+
import mitt from "mitt";
|
|
16
|
+
async function createAudioPipeline(sourceTrack, config = {}) {
|
|
17
|
+
const context = getAudioContext();
|
|
18
|
+
registerPipeline();
|
|
19
|
+
const nsEnabled = config.noiseSuppression?.enabled !== false && Boolean(
|
|
20
|
+
config.noiseSuppression?.wasmUrl && config.noiseSuppression?.simdUrl && config.noiseSuppression?.workletUrl
|
|
21
|
+
);
|
|
22
|
+
const vadEnabled = config.vad?.enabled !== false;
|
|
23
|
+
const fullConfig = {
|
|
24
|
+
noiseSuppression: {
|
|
25
|
+
enabled: nsEnabled,
|
|
26
|
+
...config.noiseSuppression
|
|
27
|
+
},
|
|
28
|
+
vad: {
|
|
29
|
+
enabled: vadEnabled,
|
|
30
|
+
// Voice-optimized defaults (will be overridden by config)
|
|
31
|
+
startThreshold: 0.6,
|
|
32
|
+
stopThreshold: 0.45,
|
|
33
|
+
hangoverMs: 400,
|
|
34
|
+
preRollMs: 250,
|
|
35
|
+
minSpeechDurationMs: 100,
|
|
36
|
+
minSilenceDurationMs: 150,
|
|
37
|
+
energyVad: {
|
|
38
|
+
smoothing: 0.95,
|
|
39
|
+
initialNoiseFloor: 1e-3,
|
|
40
|
+
noiseFloorAdaptRateQuiet: 0.01,
|
|
41
|
+
noiseFloorAdaptRateLoud: 1e-3,
|
|
42
|
+
minSNR: 2,
|
|
43
|
+
snrRange: 8
|
|
44
|
+
},
|
|
45
|
+
...config.vad
|
|
46
|
+
},
|
|
47
|
+
output: {
|
|
48
|
+
speechGain: 1,
|
|
49
|
+
silenceGain: 0,
|
|
50
|
+
// Full mute for voice-only
|
|
51
|
+
gainRampTime: 0.015,
|
|
52
|
+
// Fast but smooth transitions
|
|
53
|
+
smoothTransitions: true,
|
|
54
|
+
maxGainDb: 6,
|
|
55
|
+
enableCompression: false,
|
|
56
|
+
compression: {
|
|
57
|
+
threshold: -24,
|
|
58
|
+
ratio: 3,
|
|
59
|
+
attack: 3e-3,
|
|
60
|
+
release: 0.05
|
|
61
|
+
},
|
|
62
|
+
...config.output
|
|
63
|
+
},
|
|
64
|
+
livekit: { manageTrackMute: false, ...config.livekit }
|
|
65
|
+
};
|
|
66
|
+
console.log("Audio pipeline config:", {
|
|
67
|
+
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
68
|
+
vad: fullConfig.vad?.enabled,
|
|
69
|
+
output: fullConfig.output
|
|
70
|
+
});
|
|
71
|
+
if (!sourceTrack || sourceTrack.kind !== "audio") {
|
|
72
|
+
throw new Error(
|
|
73
|
+
"createAudioPipeline requires a valid audio MediaStreamTrack"
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
if (sourceTrack.readyState === "ended") {
|
|
77
|
+
throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
|
|
78
|
+
}
|
|
79
|
+
const sourceStream = new MediaStream([sourceTrack]);
|
|
80
|
+
const sourceNode = context.createMediaStreamSource(sourceStream);
|
|
81
|
+
let nsNode;
|
|
82
|
+
let vadNode;
|
|
83
|
+
const emitter = mitt();
|
|
84
|
+
try {
|
|
85
|
+
const nsPlugin = getNoiseSuppressionPlugin(
|
|
86
|
+
fullConfig.noiseSuppression?.pluginName
|
|
87
|
+
);
|
|
88
|
+
nsNode = await nsPlugin.createNode(context, fullConfig.noiseSuppression);
|
|
89
|
+
} catch (error) {
|
|
90
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
91
|
+
console.error("Failed to create noise suppression node:", err);
|
|
92
|
+
emitter.emit("error", err);
|
|
93
|
+
throw err;
|
|
94
|
+
}
|
|
95
|
+
const vadStateMachine = new VADStateMachine(fullConfig.vad);
|
|
96
|
+
try {
|
|
97
|
+
const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
|
|
98
|
+
vadNode = await vadPlugin.createNode(context, fullConfig.vad, (prob) => {
|
|
99
|
+
try {
|
|
100
|
+
const timestamp = context.currentTime * 1e3;
|
|
101
|
+
const newState = vadStateMachine.processFrame(prob, timestamp);
|
|
102
|
+
if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
|
|
103
|
+
emitter.emit("vadChange", newState);
|
|
104
|
+
lastVadState = newState;
|
|
105
|
+
updateGain(newState);
|
|
106
|
+
}
|
|
107
|
+
} catch (vadError) {
|
|
108
|
+
const err = vadError instanceof Error ? vadError : new Error(String(vadError));
|
|
109
|
+
console.error("Error in VAD callback:", err);
|
|
110
|
+
emitter.emit("error", err);
|
|
111
|
+
}
|
|
112
|
+
});
|
|
113
|
+
} catch (error) {
|
|
114
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
115
|
+
console.error("Failed to create VAD node:", err);
|
|
116
|
+
emitter.emit("error", err);
|
|
117
|
+
throw err;
|
|
118
|
+
}
|
|
119
|
+
let lastVadState = {
|
|
120
|
+
isSpeaking: false,
|
|
121
|
+
probability: 0,
|
|
122
|
+
state: "silent"
|
|
123
|
+
};
|
|
124
|
+
const splitter = context.createGain();
|
|
125
|
+
sourceNode.connect(nsNode);
|
|
126
|
+
nsNode.connect(splitter);
|
|
127
|
+
splitter.connect(vadNode);
|
|
128
|
+
const delayNode = context.createDelay(1);
|
|
129
|
+
const preRollSeconds = (fullConfig.vad?.preRollMs ?? 250) / 1e3;
|
|
130
|
+
delayNode.delayTime.value = preRollSeconds;
|
|
131
|
+
const gainNode = context.createGain();
|
|
132
|
+
gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
|
|
133
|
+
let compressor = null;
|
|
134
|
+
if (fullConfig.output?.enableCompression) {
|
|
135
|
+
compressor = context.createDynamicsCompressor();
|
|
136
|
+
const comp = fullConfig.output.compression;
|
|
137
|
+
compressor.threshold.value = comp.threshold ?? -24;
|
|
138
|
+
compressor.ratio.value = comp.ratio ?? 3;
|
|
139
|
+
compressor.attack.value = comp.attack ?? 3e-3;
|
|
140
|
+
compressor.release.value = comp.release ?? 0.05;
|
|
141
|
+
compressor.knee.value = 10;
|
|
142
|
+
}
|
|
143
|
+
const destination = context.createMediaStreamDestination();
|
|
144
|
+
try {
|
|
145
|
+
splitter.connect(delayNode);
|
|
146
|
+
delayNode.connect(gainNode);
|
|
147
|
+
if (compressor) {
|
|
148
|
+
gainNode.connect(compressor);
|
|
149
|
+
compressor.connect(destination);
|
|
150
|
+
console.log("Compression enabled:", fullConfig.output?.compression);
|
|
151
|
+
} else {
|
|
152
|
+
gainNode.connect(destination);
|
|
153
|
+
}
|
|
154
|
+
} catch (error) {
|
|
155
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
156
|
+
console.error("Failed to wire audio pipeline:", err);
|
|
157
|
+
emitter.emit("error", err);
|
|
158
|
+
throw err;
|
|
159
|
+
}
|
|
160
|
+
function updateGain(state) {
|
|
161
|
+
try {
|
|
162
|
+
const {
|
|
163
|
+
speechGain = 1,
|
|
164
|
+
silenceGain = 0,
|
|
165
|
+
gainRampTime = 0.015,
|
|
166
|
+
smoothTransitions = true,
|
|
167
|
+
maxGainDb = 6
|
|
168
|
+
} = fullConfig.output;
|
|
169
|
+
const maxGainLinear = Math.pow(10, maxGainDb / 20);
|
|
170
|
+
const limitedSpeechGain = Math.min(speechGain, maxGainLinear);
|
|
171
|
+
const targetGain = state.isSpeaking ? limitedSpeechGain : silenceGain;
|
|
172
|
+
const now = context.currentTime;
|
|
173
|
+
if (smoothTransitions) {
|
|
174
|
+
gainNode.gain.cancelScheduledValues(now);
|
|
175
|
+
gainNode.gain.setValueAtTime(gainNode.gain.value, now);
|
|
176
|
+
gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime / 3);
|
|
177
|
+
} else {
|
|
178
|
+
gainNode.gain.setValueAtTime(targetGain, now);
|
|
179
|
+
}
|
|
180
|
+
} catch (error) {
|
|
181
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
182
|
+
console.error("Failed to update gain:", err);
|
|
183
|
+
emitter.emit("error", err);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
const audioTracks = destination.stream.getAudioTracks();
|
|
187
|
+
console.log("Destination stream tracks:", {
|
|
188
|
+
count: audioTracks.length,
|
|
189
|
+
tracks: audioTracks.map((t) => ({
|
|
190
|
+
id: t.id,
|
|
191
|
+
label: t.label,
|
|
192
|
+
enabled: t.enabled,
|
|
193
|
+
readyState: t.readyState
|
|
194
|
+
}))
|
|
195
|
+
});
|
|
196
|
+
if (audioTracks.length === 0) {
|
|
197
|
+
const err = new Error(
|
|
198
|
+
"Failed to create processed audio track: destination stream has no audio tracks. This may indicate an issue with the audio graph connection."
|
|
199
|
+
);
|
|
200
|
+
console.error(err);
|
|
201
|
+
emitter.emit("error", err);
|
|
202
|
+
throw err;
|
|
203
|
+
}
|
|
204
|
+
const processedTrack = audioTracks[0];
|
|
205
|
+
if (!processedTrack || processedTrack.readyState === "ended") {
|
|
206
|
+
const err = new Error("Processed audio track is invalid or ended");
|
|
207
|
+
console.error(err);
|
|
208
|
+
emitter.emit("error", err);
|
|
209
|
+
throw err;
|
|
210
|
+
}
|
|
211
|
+
console.log("Audio pipeline created successfully:", {
|
|
212
|
+
sourceTrack: {
|
|
213
|
+
id: sourceTrack.id,
|
|
214
|
+
label: sourceTrack.label,
|
|
215
|
+
readyState: sourceTrack.readyState
|
|
216
|
+
},
|
|
217
|
+
processedTrack: {
|
|
218
|
+
id: processedTrack.id,
|
|
219
|
+
label: processedTrack.label,
|
|
220
|
+
readyState: processedTrack.readyState
|
|
221
|
+
},
|
|
222
|
+
config: {
|
|
223
|
+
noiseSuppression: fullConfig.noiseSuppression?.enabled,
|
|
224
|
+
vad: fullConfig.vad?.enabled
|
|
225
|
+
}
|
|
226
|
+
});
|
|
227
|
+
function dispose() {
|
|
228
|
+
try {
|
|
229
|
+
sourceNode.disconnect();
|
|
230
|
+
nsNode.disconnect();
|
|
231
|
+
splitter.disconnect();
|
|
232
|
+
vadNode.disconnect();
|
|
233
|
+
delayNode.disconnect();
|
|
234
|
+
gainNode.disconnect();
|
|
235
|
+
if (compressor) {
|
|
236
|
+
compressor.disconnect();
|
|
237
|
+
}
|
|
238
|
+
destination.stream.getTracks().forEach((t) => t.stop());
|
|
239
|
+
unregisterPipeline();
|
|
240
|
+
} catch (error) {
|
|
241
|
+
console.error("Error during pipeline disposal:", error);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
return {
|
|
245
|
+
processedTrack,
|
|
246
|
+
events: emitter,
|
|
247
|
+
get state() {
|
|
248
|
+
return lastVadState;
|
|
249
|
+
},
|
|
250
|
+
setConfig: (newConfig) => {
|
|
251
|
+
try {
|
|
252
|
+
if (newConfig.vad) {
|
|
253
|
+
vadStateMachine.updateConfig(newConfig.vad);
|
|
254
|
+
Object.assign(fullConfig.vad, newConfig.vad);
|
|
255
|
+
if (newConfig.vad.preRollMs !== void 0) {
|
|
256
|
+
const preRollSeconds2 = newConfig.vad.preRollMs / 1e3;
|
|
257
|
+
delayNode.delayTime.setValueAtTime(
|
|
258
|
+
preRollSeconds2,
|
|
259
|
+
context.currentTime
|
|
260
|
+
);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
if (newConfig.output) {
|
|
264
|
+
Object.assign(fullConfig.output, newConfig.output);
|
|
265
|
+
updateGain(lastVadState);
|
|
266
|
+
if (compressor && newConfig.output.compression) {
|
|
267
|
+
const comp = newConfig.output.compression;
|
|
268
|
+
if (comp.threshold !== void 0) {
|
|
269
|
+
compressor.threshold.setValueAtTime(
|
|
270
|
+
comp.threshold,
|
|
271
|
+
context.currentTime
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
if (comp.ratio !== void 0) {
|
|
275
|
+
compressor.ratio.setValueAtTime(comp.ratio, context.currentTime);
|
|
276
|
+
}
|
|
277
|
+
if (comp.attack !== void 0) {
|
|
278
|
+
compressor.attack.setValueAtTime(
|
|
279
|
+
comp.attack,
|
|
280
|
+
context.currentTime
|
|
281
|
+
);
|
|
282
|
+
}
|
|
283
|
+
if (comp.release !== void 0) {
|
|
284
|
+
compressor.release.setValueAtTime(
|
|
285
|
+
comp.release,
|
|
286
|
+
context.currentTime
|
|
287
|
+
);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
if (newConfig.livekit) {
|
|
292
|
+
Object.assign(fullConfig.livekit, newConfig.livekit);
|
|
293
|
+
}
|
|
294
|
+
console.log("Pipeline config updated:", newConfig);
|
|
295
|
+
} catch (error) {
|
|
296
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
297
|
+
console.error("Failed to update config:", err);
|
|
298
|
+
emitter.emit("error", err);
|
|
299
|
+
}
|
|
300
|
+
},
|
|
301
|
+
dispose
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
export {
|
|
306
|
+
createAudioPipeline
|
|
307
|
+
};
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import {
|
|
2
|
+
createAudioPipeline
|
|
3
|
+
} from "./chunk-AHBRT4RD.mjs";
|
|
4
|
+
|
|
5
|
+
// src/livekit/integration.ts
|
|
6
|
+
async function attachProcessingToTrack(track, config = {}) {
|
|
7
|
+
if (!track) {
|
|
8
|
+
throw new Error("attachProcessingToTrack requires a valid LocalAudioTrack");
|
|
9
|
+
}
|
|
10
|
+
const originalTrack = track.mediaStreamTrack;
|
|
11
|
+
if (!originalTrack) {
|
|
12
|
+
throw new Error("LocalAudioTrack has no underlying MediaStreamTrack");
|
|
13
|
+
}
|
|
14
|
+
if (originalTrack.readyState === "ended") {
|
|
15
|
+
throw new Error("Cannot attach processing to an ended MediaStreamTrack");
|
|
16
|
+
}
|
|
17
|
+
let pipeline;
|
|
18
|
+
try {
|
|
19
|
+
console.log("Creating audio processing pipeline...");
|
|
20
|
+
pipeline = await createAudioPipeline(originalTrack, config);
|
|
21
|
+
console.log("Audio processing pipeline created successfully");
|
|
22
|
+
} catch (error) {
|
|
23
|
+
const err = new Error(
|
|
24
|
+
`Failed to create audio pipeline: ${error instanceof Error ? error.message : String(error)}`
|
|
25
|
+
);
|
|
26
|
+
console.error(err);
|
|
27
|
+
throw err;
|
|
28
|
+
}
|
|
29
|
+
if (!pipeline.processedTrack) {
|
|
30
|
+
throw new Error("Pipeline did not return a processed track");
|
|
31
|
+
}
|
|
32
|
+
try {
|
|
33
|
+
console.log("Replacing LiveKit track with processed track...");
|
|
34
|
+
await track.replaceTrack(pipeline.processedTrack);
|
|
35
|
+
console.log("LiveKit track replaced successfully");
|
|
36
|
+
} catch (error) {
|
|
37
|
+
pipeline.dispose();
|
|
38
|
+
const err = new Error(
|
|
39
|
+
`Failed to replace LiveKit track: ${error instanceof Error ? error.message : String(error)}`
|
|
40
|
+
);
|
|
41
|
+
console.error(err);
|
|
42
|
+
throw err;
|
|
43
|
+
}
|
|
44
|
+
if (config.livekit?.manageTrackMute) {
|
|
45
|
+
let isVadMuted = false;
|
|
46
|
+
pipeline.events.on("vadChange", async (state) => {
|
|
47
|
+
try {
|
|
48
|
+
if (state.isSpeaking) {
|
|
49
|
+
if (isVadMuted) {
|
|
50
|
+
await track.unmute();
|
|
51
|
+
isVadMuted = false;
|
|
52
|
+
}
|
|
53
|
+
} else {
|
|
54
|
+
if (!track.isMuted) {
|
|
55
|
+
await track.mute();
|
|
56
|
+
isVadMuted = true;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
} catch (error) {
|
|
60
|
+
console.error("Error handling VAD-based track muting:", error);
|
|
61
|
+
}
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
pipeline.events.on("error", (error) => {
|
|
65
|
+
console.error("Audio pipeline error:", error);
|
|
66
|
+
});
|
|
67
|
+
const originalDispose = pipeline.dispose;
|
|
68
|
+
pipeline.dispose = () => {
|
|
69
|
+
try {
|
|
70
|
+
if (originalTrack.readyState === "live") {
|
|
71
|
+
console.log("Restoring original track...");
|
|
72
|
+
track.replaceTrack(originalTrack).catch((error) => {
|
|
73
|
+
console.error("Failed to restore original track:", error);
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
originalDispose();
|
|
77
|
+
} catch (error) {
|
|
78
|
+
console.error("Error during pipeline disposal:", error);
|
|
79
|
+
try {
|
|
80
|
+
originalDispose();
|
|
81
|
+
} catch (disposeError) {
|
|
82
|
+
console.error("Error calling original dispose:", disposeError);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
};
|
|
86
|
+
return pipeline;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export {
|
|
90
|
+
attachProcessingToTrack
|
|
91
|
+
};
|