@tensamin/audio 0.1.14 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +48 -231
- package/dist/chunk-6BJ4XGSA.mjs +80 -0
- package/dist/chunk-AQ5RVY33.mjs +74 -0
- package/dist/chunk-IS37FHDN.mjs +33 -0
- package/dist/chunk-K4J3UUOR.mjs +178 -0
- package/dist/chunk-QNQK6QFB.mjs +71 -0
- package/dist/context/audio-context.d.mts +0 -24
- package/dist/context/audio-context.d.ts +0 -24
- package/dist/index.d.mts +2 -8
- package/dist/index.d.ts +2 -8
- package/dist/index.js +285 -680
- package/dist/index.mjs +8 -43
- package/dist/livekit/integration.d.mts +3 -7
- package/dist/livekit/integration.d.ts +3 -7
- package/dist/livekit/integration.js +280 -626
- package/dist/livekit/integration.mjs +7 -8
- package/dist/noise-suppression/deepfilternet-node.d.mts +12 -0
- package/dist/noise-suppression/deepfilternet-node.d.ts +12 -0
- package/dist/noise-suppression/deepfilternet-node.js +57 -0
- package/dist/noise-suppression/deepfilternet-node.mjs +6 -0
- package/dist/pipeline/audio-pipeline.d.mts +2 -2
- package/dist/pipeline/audio-pipeline.d.ts +2 -2
- package/dist/pipeline/audio-pipeline.js +219 -554
- package/dist/pipeline/audio-pipeline.mjs +4 -5
- package/dist/types.d.mts +42 -257
- package/dist/types.d.ts +42 -257
- package/dist/vad/vad-node.d.mts +7 -9
- package/dist/vad/vad-node.d.ts +7 -9
- package/dist/vad/vad-node.js +47 -156
- package/dist/vad/vad-node.mjs +3 -3
- package/dist/vad/vad-state.d.mts +9 -11
- package/dist/vad/vad-state.d.ts +9 -11
- package/dist/vad/vad-state.js +50 -79
- package/dist/vad/vad-state.mjs +3 -3
- package/package.json +21 -21
- package/dist/chunk-2G2JFHJY.mjs +0 -180
- package/dist/chunk-6F2HZUYO.mjs +0 -91
- package/dist/chunk-K4YLH73B.mjs +0 -103
- package/dist/chunk-R5M2DGAQ.mjs +0 -311
- package/dist/chunk-UFKIAMG3.mjs +0 -47
- package/dist/chunk-XO6B3D4A.mjs +0 -67
- package/dist/extensibility/plugins.d.mts +0 -9
- package/dist/extensibility/plugins.d.ts +0 -9
- package/dist/extensibility/plugins.js +0 -320
- package/dist/extensibility/plugins.mjs +0 -14
- package/dist/noise-suppression/rnnoise-node.d.mts +0 -10
- package/dist/noise-suppression/rnnoise-node.d.ts +0 -10
- package/dist/noise-suppression/rnnoise-node.js +0 -101
- package/dist/noise-suppression/rnnoise-node.mjs +0 -6
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import {
|
|
2
2
|
createAudioPipeline
|
|
3
|
-
} from "../chunk-
|
|
4
|
-
import "../chunk-K4YLH73B.mjs";
|
|
3
|
+
} from "../chunk-K4J3UUOR.mjs";
|
|
5
4
|
import "../chunk-OZ7KMC4S.mjs";
|
|
6
|
-
import "../chunk-
|
|
7
|
-
import "../chunk-
|
|
8
|
-
import "../chunk-
|
|
5
|
+
import "../chunk-IS37FHDN.mjs";
|
|
6
|
+
import "../chunk-QNQK6QFB.mjs";
|
|
7
|
+
import "../chunk-AQ5RVY33.mjs";
|
|
9
8
|
export {
|
|
10
9
|
createAudioPipeline
|
|
11
10
|
};
|
package/dist/types.d.mts
CHANGED
|
@@ -1,270 +1,55 @@
|
|
|
1
1
|
import { Emitter } from 'mitt';
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
* Noise suppression configuration.
|
|
9
|
-
*/
|
|
10
|
-
noiseSuppression?: {
|
|
11
|
-
enabled: boolean;
|
|
12
|
-
/**
|
|
13
|
-
* Path or URL to the RNNoise WASM binary.
|
|
14
|
-
* REQUIRED if enabled.
|
|
15
|
-
*/
|
|
16
|
-
wasmUrl?: string;
|
|
17
|
-
/**
|
|
18
|
-
* Path or URL to the RNNoise SIMD WASM binary.
|
|
19
|
-
* REQUIRED if enabled.
|
|
20
|
-
*/
|
|
21
|
-
simdUrl?: string;
|
|
22
|
-
/**
|
|
23
|
-
* Path or URL to the RNNoise worklet script.
|
|
24
|
-
* REQUIRED if enabled.
|
|
25
|
-
*/
|
|
26
|
-
workletUrl?: string;
|
|
27
|
-
/**
|
|
28
|
-
* Plugin name to use. Defaults to 'rnnoise-ns'.
|
|
29
|
-
*/
|
|
30
|
-
pluginName?: string;
|
|
31
|
-
};
|
|
32
|
-
/**
|
|
33
|
-
* Voice Activity Detection (VAD) configuration.
|
|
34
|
-
*/
|
|
35
|
-
vad?: {
|
|
36
|
-
enabled: boolean;
|
|
37
|
-
/**
|
|
38
|
-
* Plugin name to use. Defaults to 'energy-vad'.
|
|
39
|
-
*/
|
|
40
|
-
pluginName?: string;
|
|
41
|
-
/**
|
|
42
|
-
* Probability threshold for speech onset (0-1).
|
|
43
|
-
* When VAD probability rises above this, audio is unmuted.
|
|
44
|
-
* Lower = more sensitive (catches quiet speech, may include noise)
|
|
45
|
-
* Higher = less sensitive (only confident speech, may clip quiet parts)
|
|
46
|
-
* Default: 0.8 (aggressive noise rejection)
|
|
47
|
-
*/
|
|
48
|
-
startThreshold?: number;
|
|
49
|
-
/**
|
|
50
|
-
* Probability threshold for speech offset (0-1).
|
|
51
|
-
* When VAD probability drops below this (after hangover), audio is muted.
|
|
52
|
-
* Lower = keeps audio on longer (less aggressive gating)
|
|
53
|
-
* Higher = mutes faster (more aggressive noise suppression)
|
|
54
|
-
* Default: 0.3 (wide hysteresis for stability)
|
|
55
|
-
*/
|
|
56
|
-
stopThreshold?: number;
|
|
57
|
-
/**
|
|
58
|
-
* Time in ms to wait after speech stops before muting.
|
|
59
|
-
* Prevents rapid on/off toggling during pauses.
|
|
60
|
-
* Lower = more aggressive gating, may clip between words
|
|
61
|
-
* Higher = smoother but may let trailing noise through
|
|
62
|
-
* Default: 300ms
|
|
63
|
-
*/
|
|
64
|
-
hangoverMs?: number;
|
|
65
|
-
/**
|
|
66
|
-
* Time in ms of audio to buffer before speech onset.
|
|
67
|
-
* Prevents cutting off the beginning of speech.
|
|
68
|
-
* Default: 250ms (generous pre-roll for voice)
|
|
69
|
-
*/
|
|
70
|
-
preRollMs?: number;
|
|
71
|
-
/**
|
|
72
|
-
* Minimum speech duration in ms to consider it valid speech.
|
|
73
|
-
* Filters out brief transients like keyboard clicks.
|
|
74
|
-
* Default: 250ms (aggressive transient rejection)
|
|
75
|
-
*/
|
|
76
|
-
minSpeechDurationMs?: number;
|
|
77
|
-
/**
|
|
78
|
-
* Minimum silence duration in ms before allowing another speech segment.
|
|
79
|
-
* Prevents false positives from quick noise bursts.
|
|
80
|
-
* Default: 150ms
|
|
81
|
-
*/
|
|
82
|
-
minSilenceDurationMs?: number;
|
|
83
|
-
/**
|
|
84
|
-
* Advanced: Energy VAD specific parameters
|
|
85
|
-
*/
|
|
86
|
-
energyVad?: {
|
|
87
|
-
/**
|
|
88
|
-
* Smoothing factor for energy calculation (0-1).
|
|
89
|
-
* Higher = more smoothing, slower to react
|
|
90
|
-
* Default: 0.95
|
|
91
|
-
*/
|
|
92
|
-
smoothing?: number;
|
|
93
|
-
/**
|
|
94
|
-
* Initial noise floor estimate.
|
|
95
|
-
* Default: 0.001
|
|
96
|
-
*/
|
|
97
|
-
initialNoiseFloor?: number;
|
|
98
|
-
/**
|
|
99
|
-
* Rate at which noise floor adapts to quiet signals (0-1).
|
|
100
|
-
* Default: 0.002 (very slow downward drift)
|
|
101
|
-
*/
|
|
102
|
-
noiseFloorAdaptRateQuiet?: number;
|
|
103
|
-
/**
|
|
104
|
-
* Rate at which noise floor adapts to loud signals (0-1).
|
|
105
|
-
* Applied to low-energy, low-crest-factor signals (background noise).
|
|
106
|
-
* Default: 0.02
|
|
107
|
-
*/
|
|
108
|
-
noiseFloorAdaptRateLoud?: number;
|
|
109
|
-
/**
|
|
110
|
-
* Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
|
|
111
|
-
* Default: 12.0 (aggressive noise rejection)
|
|
112
|
-
*/
|
|
113
|
-
minSNR?: number;
|
|
114
|
-
/**
|
|
115
|
-
* SNR range in dB for probability scaling.
|
|
116
|
-
* Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
|
|
117
|
-
*/
|
|
118
|
-
snrRange?: number;
|
|
119
|
-
/**
|
|
120
|
-
* Minimum absolute RMS energy to consider as speech.
|
|
121
|
-
* Prevents triggering on very quiet background noise.
|
|
122
|
-
* Default: 0.003 (approx -50dB, voice-appropriate level)
|
|
123
|
-
*/
|
|
124
|
-
minEnergy?: number;
|
|
125
|
-
};
|
|
126
|
-
};
|
|
127
|
-
/**
|
|
128
|
-
* Output gain and muting configuration.
|
|
129
|
-
*/
|
|
130
|
-
output?: {
|
|
131
|
-
/**
|
|
132
|
-
* Gain to apply when speaking (0-infinity).
|
|
133
|
-
* Values > 1.0 will amplify the voice.
|
|
134
|
-
* Default: 1.0 (unity gain)
|
|
135
|
-
*/
|
|
136
|
-
speechGain?: number;
|
|
137
|
-
/**
|
|
138
|
-
* Gain to apply when silent (0-1).
|
|
139
|
-
* 0.0 = complete mute (recommended for voice-only)
|
|
140
|
-
* 0.1-0.3 = allow some background ambience
|
|
141
|
-
* Default: 0.0 (full mute for voice-only)
|
|
142
|
-
*/
|
|
143
|
-
silenceGain?: number;
|
|
144
|
-
/**
|
|
145
|
-
* Time in seconds to ramp gain changes.
|
|
146
|
-
* Lower = faster transitions (may cause clicks)
|
|
147
|
-
* Higher = smoother transitions (may sound sluggish)
|
|
148
|
-
* Default: 0.015 (fast but smooth for voice)
|
|
149
|
-
*/
|
|
150
|
-
gainRampTime?: number;
|
|
151
|
-
/**
|
|
152
|
-
* Apply additional gain reduction during the transition to silence.
|
|
153
|
-
* Helps create cleaner cutoffs without abrupt clicks.
|
|
154
|
-
* Default: true
|
|
155
|
-
*/
|
|
156
|
-
smoothTransitions?: boolean;
|
|
157
|
-
/**
|
|
158
|
-
* Maximum gain in dB to apply (prevents clipping).
|
|
159
|
-
* Default: 6.0 dB (roughly 2x amplitude)
|
|
160
|
-
*/
|
|
161
|
-
maxGainDb?: number;
|
|
162
|
-
/**
|
|
163
|
-
* Apply dynamic range compression when speaking.
|
|
164
|
-
* Makes quiet parts louder and loud parts quieter.
|
|
165
|
-
* Default: false (transparent audio)
|
|
166
|
-
*/
|
|
167
|
-
enableCompression?: boolean;
|
|
168
|
-
/**
|
|
169
|
-
* Compression settings (when enabled)
|
|
170
|
-
*/
|
|
171
|
-
compression?: {
|
|
172
|
-
/**
|
|
173
|
-
* Threshold in dB above which compression starts.
|
|
174
|
-
* Default: -24.0 dB
|
|
175
|
-
*/
|
|
176
|
-
threshold?: number;
|
|
177
|
-
/**
|
|
178
|
-
* Compression ratio (1:N).
|
|
179
|
-
* Default: 3.0 (3:1 ratio)
|
|
180
|
-
*/
|
|
181
|
-
ratio?: number;
|
|
182
|
-
/**
|
|
183
|
-
* Attack time in seconds.
|
|
184
|
-
* Default: 0.003 (3ms)
|
|
185
|
-
*/
|
|
186
|
-
attack?: number;
|
|
187
|
-
/**
|
|
188
|
-
* Release time in seconds.
|
|
189
|
-
* Default: 0.05 (50ms)
|
|
190
|
-
*/
|
|
191
|
-
release?: number;
|
|
192
|
-
};
|
|
193
|
-
};
|
|
194
|
-
/**
|
|
195
|
-
* LiveKit integration configuration.
|
|
196
|
-
*/
|
|
197
|
-
livekit?: {
|
|
198
|
-
/**
|
|
199
|
-
* Whether to call track.mute()/unmute() on the LocalAudioTrack based on VAD.
|
|
200
|
-
* This saves bandwidth but has more signaling overhead.
|
|
201
|
-
* Default: false (uses gain gating only)
|
|
202
|
-
*/
|
|
203
|
-
manageTrackMute?: boolean;
|
|
3
|
+
interface NoiseSuppressionConfig {
|
|
4
|
+
enabled?: boolean;
|
|
5
|
+
noiseReductionLevel?: number;
|
|
6
|
+
assetConfig?: {
|
|
7
|
+
cdnUrl?: string;
|
|
204
8
|
};
|
|
205
9
|
}
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
state: "silent" | "speech_starting" | "speaking" | "speech_ending";
|
|
10
|
+
interface SpeakingDetectionConfig {
|
|
11
|
+
minDb: number;
|
|
12
|
+
maxDb: number;
|
|
13
|
+
speakOnRatio?: number;
|
|
14
|
+
speakOffRatio?: number;
|
|
15
|
+
hangoverMs?: number;
|
|
16
|
+
attackMs?: number;
|
|
17
|
+
releaseMs?: number;
|
|
18
|
+
}
|
|
19
|
+
interface OutputGainConfig {
|
|
20
|
+
speechGain?: number;
|
|
21
|
+
silenceGain?: number;
|
|
22
|
+
gainRampTime?: number;
|
|
23
|
+
maxGainDb?: number;
|
|
24
|
+
smoothTransitions?: boolean;
|
|
222
25
|
}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
26
|
+
interface LivekitSpeakingOptions {
|
|
27
|
+
noiseSuppression?: NoiseSuppressionConfig;
|
|
28
|
+
speaking?: SpeakingDetectionConfig;
|
|
29
|
+
output?: OutputGainConfig;
|
|
30
|
+
muteWhenSilent?: boolean;
|
|
31
|
+
}
|
|
32
|
+
interface SpeakingState {
|
|
33
|
+
speaking: boolean;
|
|
34
|
+
levelDb: number;
|
|
35
|
+
}
|
|
36
|
+
type SpeakingEvents = {
|
|
37
|
+
speakingChange: SpeakingState;
|
|
228
38
|
error: Error;
|
|
229
39
|
};
|
|
230
|
-
/**
|
|
231
|
-
* Handle to a running audio processing pipeline.
|
|
232
|
-
*/
|
|
233
40
|
interface AudioPipelineHandle {
|
|
234
|
-
/**
|
|
235
|
-
* The processed MediaStreamTrack.
|
|
236
|
-
*/
|
|
237
41
|
readonly processedTrack: MediaStreamTrack;
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
readonly events: Emitter<AudioPipelineEvents>;
|
|
242
|
-
/**
|
|
243
|
-
* Current VAD state.
|
|
244
|
-
*/
|
|
245
|
-
readonly state: VADState;
|
|
246
|
-
/**
|
|
247
|
-
* Update configuration at runtime.
|
|
248
|
-
*/
|
|
249
|
-
setConfig(config: Partial<AudioProcessingConfig>): void;
|
|
250
|
-
/**
|
|
251
|
-
* Stop processing and release resources.
|
|
252
|
-
*/
|
|
42
|
+
readonly events: Emitter<SpeakingEvents>;
|
|
43
|
+
readonly state: SpeakingState;
|
|
44
|
+
setConfig(config: Partial<LivekitSpeakingOptions>): void;
|
|
253
45
|
dispose(): void;
|
|
254
46
|
}
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
}
|
|
262
|
-
/**
|
|
263
|
-
* Interface for a VAD Plugin.
|
|
264
|
-
*/
|
|
265
|
-
interface VADPlugin {
|
|
266
|
-
name: string;
|
|
267
|
-
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
47
|
+
interface SpeakingController {
|
|
48
|
+
readonly speaking: boolean;
|
|
49
|
+
readonly levelDb: number;
|
|
50
|
+
onChange(listener: (state: SpeakingState) => void): () => void;
|
|
51
|
+
setConfig(config: Partial<LivekitSpeakingOptions>): void;
|
|
52
|
+
dispose(): void;
|
|
268
53
|
}
|
|
269
54
|
|
|
270
|
-
export type {
|
|
55
|
+
export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
|
package/dist/types.d.ts
CHANGED
|
@@ -1,270 +1,55 @@
|
|
|
1
1
|
import { Emitter } from 'mitt';
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
* Noise suppression configuration.
|
|
9
|
-
*/
|
|
10
|
-
noiseSuppression?: {
|
|
11
|
-
enabled: boolean;
|
|
12
|
-
/**
|
|
13
|
-
* Path or URL to the RNNoise WASM binary.
|
|
14
|
-
* REQUIRED if enabled.
|
|
15
|
-
*/
|
|
16
|
-
wasmUrl?: string;
|
|
17
|
-
/**
|
|
18
|
-
* Path or URL to the RNNoise SIMD WASM binary.
|
|
19
|
-
* REQUIRED if enabled.
|
|
20
|
-
*/
|
|
21
|
-
simdUrl?: string;
|
|
22
|
-
/**
|
|
23
|
-
* Path or URL to the RNNoise worklet script.
|
|
24
|
-
* REQUIRED if enabled.
|
|
25
|
-
*/
|
|
26
|
-
workletUrl?: string;
|
|
27
|
-
/**
|
|
28
|
-
* Plugin name to use. Defaults to 'rnnoise-ns'.
|
|
29
|
-
*/
|
|
30
|
-
pluginName?: string;
|
|
31
|
-
};
|
|
32
|
-
/**
|
|
33
|
-
* Voice Activity Detection (VAD) configuration.
|
|
34
|
-
*/
|
|
35
|
-
vad?: {
|
|
36
|
-
enabled: boolean;
|
|
37
|
-
/**
|
|
38
|
-
* Plugin name to use. Defaults to 'energy-vad'.
|
|
39
|
-
*/
|
|
40
|
-
pluginName?: string;
|
|
41
|
-
/**
|
|
42
|
-
* Probability threshold for speech onset (0-1).
|
|
43
|
-
* When VAD probability rises above this, audio is unmuted.
|
|
44
|
-
* Lower = more sensitive (catches quiet speech, may include noise)
|
|
45
|
-
* Higher = less sensitive (only confident speech, may clip quiet parts)
|
|
46
|
-
* Default: 0.8 (aggressive noise rejection)
|
|
47
|
-
*/
|
|
48
|
-
startThreshold?: number;
|
|
49
|
-
/**
|
|
50
|
-
* Probability threshold for speech offset (0-1).
|
|
51
|
-
* When VAD probability drops below this (after hangover), audio is muted.
|
|
52
|
-
* Lower = keeps audio on longer (less aggressive gating)
|
|
53
|
-
* Higher = mutes faster (more aggressive noise suppression)
|
|
54
|
-
* Default: 0.3 (wide hysteresis for stability)
|
|
55
|
-
*/
|
|
56
|
-
stopThreshold?: number;
|
|
57
|
-
/**
|
|
58
|
-
* Time in ms to wait after speech stops before muting.
|
|
59
|
-
* Prevents rapid on/off toggling during pauses.
|
|
60
|
-
* Lower = more aggressive gating, may clip between words
|
|
61
|
-
* Higher = smoother but may let trailing noise through
|
|
62
|
-
* Default: 300ms
|
|
63
|
-
*/
|
|
64
|
-
hangoverMs?: number;
|
|
65
|
-
/**
|
|
66
|
-
* Time in ms of audio to buffer before speech onset.
|
|
67
|
-
* Prevents cutting off the beginning of speech.
|
|
68
|
-
* Default: 250ms (generous pre-roll for voice)
|
|
69
|
-
*/
|
|
70
|
-
preRollMs?: number;
|
|
71
|
-
/**
|
|
72
|
-
* Minimum speech duration in ms to consider it valid speech.
|
|
73
|
-
* Filters out brief transients like keyboard clicks.
|
|
74
|
-
* Default: 250ms (aggressive transient rejection)
|
|
75
|
-
*/
|
|
76
|
-
minSpeechDurationMs?: number;
|
|
77
|
-
/**
|
|
78
|
-
* Minimum silence duration in ms before allowing another speech segment.
|
|
79
|
-
* Prevents false positives from quick noise bursts.
|
|
80
|
-
* Default: 150ms
|
|
81
|
-
*/
|
|
82
|
-
minSilenceDurationMs?: number;
|
|
83
|
-
/**
|
|
84
|
-
* Advanced: Energy VAD specific parameters
|
|
85
|
-
*/
|
|
86
|
-
energyVad?: {
|
|
87
|
-
/**
|
|
88
|
-
* Smoothing factor for energy calculation (0-1).
|
|
89
|
-
* Higher = more smoothing, slower to react
|
|
90
|
-
* Default: 0.95
|
|
91
|
-
*/
|
|
92
|
-
smoothing?: number;
|
|
93
|
-
/**
|
|
94
|
-
* Initial noise floor estimate.
|
|
95
|
-
* Default: 0.001
|
|
96
|
-
*/
|
|
97
|
-
initialNoiseFloor?: number;
|
|
98
|
-
/**
|
|
99
|
-
* Rate at which noise floor adapts to quiet signals (0-1).
|
|
100
|
-
* Default: 0.002 (very slow downward drift)
|
|
101
|
-
*/
|
|
102
|
-
noiseFloorAdaptRateQuiet?: number;
|
|
103
|
-
/**
|
|
104
|
-
* Rate at which noise floor adapts to loud signals (0-1).
|
|
105
|
-
* Applied to low-energy, low-crest-factor signals (background noise).
|
|
106
|
-
* Default: 0.02
|
|
107
|
-
*/
|
|
108
|
-
noiseFloorAdaptRateLoud?: number;
|
|
109
|
-
/**
|
|
110
|
-
* Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
|
|
111
|
-
* Default: 12.0 (aggressive noise rejection)
|
|
112
|
-
*/
|
|
113
|
-
minSNR?: number;
|
|
114
|
-
/**
|
|
115
|
-
* SNR range in dB for probability scaling.
|
|
116
|
-
* Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
|
|
117
|
-
*/
|
|
118
|
-
snrRange?: number;
|
|
119
|
-
/**
|
|
120
|
-
* Minimum absolute RMS energy to consider as speech.
|
|
121
|
-
* Prevents triggering on very quiet background noise.
|
|
122
|
-
* Default: 0.003 (approx -50dB, voice-appropriate level)
|
|
123
|
-
*/
|
|
124
|
-
minEnergy?: number;
|
|
125
|
-
};
|
|
126
|
-
};
|
|
127
|
-
/**
|
|
128
|
-
* Output gain and muting configuration.
|
|
129
|
-
*/
|
|
130
|
-
output?: {
|
|
131
|
-
/**
|
|
132
|
-
* Gain to apply when speaking (0-infinity).
|
|
133
|
-
* Values > 1.0 will amplify the voice.
|
|
134
|
-
* Default: 1.0 (unity gain)
|
|
135
|
-
*/
|
|
136
|
-
speechGain?: number;
|
|
137
|
-
/**
|
|
138
|
-
* Gain to apply when silent (0-1).
|
|
139
|
-
* 0.0 = complete mute (recommended for voice-only)
|
|
140
|
-
* 0.1-0.3 = allow some background ambience
|
|
141
|
-
* Default: 0.0 (full mute for voice-only)
|
|
142
|
-
*/
|
|
143
|
-
silenceGain?: number;
|
|
144
|
-
/**
|
|
145
|
-
* Time in seconds to ramp gain changes.
|
|
146
|
-
* Lower = faster transitions (may cause clicks)
|
|
147
|
-
* Higher = smoother transitions (may sound sluggish)
|
|
148
|
-
* Default: 0.015 (fast but smooth for voice)
|
|
149
|
-
*/
|
|
150
|
-
gainRampTime?: number;
|
|
151
|
-
/**
|
|
152
|
-
* Apply additional gain reduction during the transition to silence.
|
|
153
|
-
* Helps create cleaner cutoffs without abrupt clicks.
|
|
154
|
-
* Default: true
|
|
155
|
-
*/
|
|
156
|
-
smoothTransitions?: boolean;
|
|
157
|
-
/**
|
|
158
|
-
* Maximum gain in dB to apply (prevents clipping).
|
|
159
|
-
* Default: 6.0 dB (roughly 2x amplitude)
|
|
160
|
-
*/
|
|
161
|
-
maxGainDb?: number;
|
|
162
|
-
/**
|
|
163
|
-
* Apply dynamic range compression when speaking.
|
|
164
|
-
* Makes quiet parts louder and loud parts quieter.
|
|
165
|
-
* Default: false (transparent audio)
|
|
166
|
-
*/
|
|
167
|
-
enableCompression?: boolean;
|
|
168
|
-
/**
|
|
169
|
-
* Compression settings (when enabled)
|
|
170
|
-
*/
|
|
171
|
-
compression?: {
|
|
172
|
-
/**
|
|
173
|
-
* Threshold in dB above which compression starts.
|
|
174
|
-
* Default: -24.0 dB
|
|
175
|
-
*/
|
|
176
|
-
threshold?: number;
|
|
177
|
-
/**
|
|
178
|
-
* Compression ratio (1:N).
|
|
179
|
-
* Default: 3.0 (3:1 ratio)
|
|
180
|
-
*/
|
|
181
|
-
ratio?: number;
|
|
182
|
-
/**
|
|
183
|
-
* Attack time in seconds.
|
|
184
|
-
* Default: 0.003 (3ms)
|
|
185
|
-
*/
|
|
186
|
-
attack?: number;
|
|
187
|
-
/**
|
|
188
|
-
* Release time in seconds.
|
|
189
|
-
* Default: 0.05 (50ms)
|
|
190
|
-
*/
|
|
191
|
-
release?: number;
|
|
192
|
-
};
|
|
193
|
-
};
|
|
194
|
-
/**
|
|
195
|
-
* LiveKit integration configuration.
|
|
196
|
-
*/
|
|
197
|
-
livekit?: {
|
|
198
|
-
/**
|
|
199
|
-
* Whether to call track.mute()/unmute() on the LocalAudioTrack based on VAD.
|
|
200
|
-
* This saves bandwidth but has more signaling overhead.
|
|
201
|
-
* Default: false (uses gain gating only)
|
|
202
|
-
*/
|
|
203
|
-
manageTrackMute?: boolean;
|
|
3
|
+
interface NoiseSuppressionConfig {
|
|
4
|
+
enabled?: boolean;
|
|
5
|
+
noiseReductionLevel?: number;
|
|
6
|
+
assetConfig?: {
|
|
7
|
+
cdnUrl?: string;
|
|
204
8
|
};
|
|
205
9
|
}
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
state: "silent" | "speech_starting" | "speaking" | "speech_ending";
|
|
10
|
+
interface SpeakingDetectionConfig {
|
|
11
|
+
minDb: number;
|
|
12
|
+
maxDb: number;
|
|
13
|
+
speakOnRatio?: number;
|
|
14
|
+
speakOffRatio?: number;
|
|
15
|
+
hangoverMs?: number;
|
|
16
|
+
attackMs?: number;
|
|
17
|
+
releaseMs?: number;
|
|
18
|
+
}
|
|
19
|
+
interface OutputGainConfig {
|
|
20
|
+
speechGain?: number;
|
|
21
|
+
silenceGain?: number;
|
|
22
|
+
gainRampTime?: number;
|
|
23
|
+
maxGainDb?: number;
|
|
24
|
+
smoothTransitions?: boolean;
|
|
222
25
|
}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
26
|
+
interface LivekitSpeakingOptions {
|
|
27
|
+
noiseSuppression?: NoiseSuppressionConfig;
|
|
28
|
+
speaking?: SpeakingDetectionConfig;
|
|
29
|
+
output?: OutputGainConfig;
|
|
30
|
+
muteWhenSilent?: boolean;
|
|
31
|
+
}
|
|
32
|
+
interface SpeakingState {
|
|
33
|
+
speaking: boolean;
|
|
34
|
+
levelDb: number;
|
|
35
|
+
}
|
|
36
|
+
type SpeakingEvents = {
|
|
37
|
+
speakingChange: SpeakingState;
|
|
228
38
|
error: Error;
|
|
229
39
|
};
|
|
230
|
-
/**
|
|
231
|
-
* Handle to a running audio processing pipeline.
|
|
232
|
-
*/
|
|
233
40
|
interface AudioPipelineHandle {
|
|
234
|
-
/**
|
|
235
|
-
* The processed MediaStreamTrack.
|
|
236
|
-
*/
|
|
237
41
|
readonly processedTrack: MediaStreamTrack;
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
readonly events: Emitter<AudioPipelineEvents>;
|
|
242
|
-
/**
|
|
243
|
-
* Current VAD state.
|
|
244
|
-
*/
|
|
245
|
-
readonly state: VADState;
|
|
246
|
-
/**
|
|
247
|
-
* Update configuration at runtime.
|
|
248
|
-
*/
|
|
249
|
-
setConfig(config: Partial<AudioProcessingConfig>): void;
|
|
250
|
-
/**
|
|
251
|
-
* Stop processing and release resources.
|
|
252
|
-
*/
|
|
42
|
+
readonly events: Emitter<SpeakingEvents>;
|
|
43
|
+
readonly state: SpeakingState;
|
|
44
|
+
setConfig(config: Partial<LivekitSpeakingOptions>): void;
|
|
253
45
|
dispose(): void;
|
|
254
46
|
}
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
}
|
|
262
|
-
/**
|
|
263
|
-
* Interface for a VAD Plugin.
|
|
264
|
-
*/
|
|
265
|
-
interface VADPlugin {
|
|
266
|
-
name: string;
|
|
267
|
-
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
47
|
+
interface SpeakingController {
|
|
48
|
+
readonly speaking: boolean;
|
|
49
|
+
readonly levelDb: number;
|
|
50
|
+
onChange(listener: (state: SpeakingState) => void): () => void;
|
|
51
|
+
setConfig(config: Partial<LivekitSpeakingOptions>): void;
|
|
52
|
+
dispose(): void;
|
|
268
53
|
}
|
|
269
54
|
|
|
270
|
-
export type {
|
|
55
|
+
export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
|
package/dist/vad/vad-node.d.mts
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
declare class EnergyVADPlugin implements VADPlugin {
|
|
5
|
-
name: string;
|
|
6
|
-
private workletNode;
|
|
7
|
-
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
8
|
-
updateSpeakingState(isSpeaking: boolean): void;
|
|
1
|
+
interface LevelDetectorNode {
|
|
2
|
+
node: AudioWorkletNode;
|
|
3
|
+
dispose: () => void;
|
|
9
4
|
}
|
|
5
|
+
declare function createLevelDetectorNode(context: AudioContext, onLevel: (levelDb: number) => void, options?: {
|
|
6
|
+
smoothing?: number;
|
|
7
|
+
}): Promise<LevelDetectorNode>;
|
|
10
8
|
|
|
11
|
-
export {
|
|
9
|
+
export { type LevelDetectorNode, createLevelDetectorNode };
|
package/dist/vad/vad-node.d.ts
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
declare class EnergyVADPlugin implements VADPlugin {
|
|
5
|
-
name: string;
|
|
6
|
-
private workletNode;
|
|
7
|
-
createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
|
|
8
|
-
updateSpeakingState(isSpeaking: boolean): void;
|
|
1
|
+
interface LevelDetectorNode {
|
|
2
|
+
node: AudioWorkletNode;
|
|
3
|
+
dispose: () => void;
|
|
9
4
|
}
|
|
5
|
+
declare function createLevelDetectorNode(context: AudioContext, onLevel: (levelDb: number) => void, options?: {
|
|
6
|
+
smoothing?: number;
|
|
7
|
+
}): Promise<LevelDetectorNode>;
|
|
10
8
|
|
|
11
|
-
export {
|
|
9
|
+
export { type LevelDetectorNode, createLevelDetectorNode };
|