@tensamin/audio 0.1.14 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +48 -231
  2. package/dist/chunk-6BJ4XGSA.mjs +80 -0
  3. package/dist/chunk-AQ5RVY33.mjs +74 -0
  4. package/dist/chunk-IS37FHDN.mjs +33 -0
  5. package/dist/chunk-K4J3UUOR.mjs +178 -0
  6. package/dist/chunk-QNQK6QFB.mjs +71 -0
  7. package/dist/context/audio-context.d.mts +0 -24
  8. package/dist/context/audio-context.d.ts +0 -24
  9. package/dist/index.d.mts +2 -8
  10. package/dist/index.d.ts +2 -8
  11. package/dist/index.js +285 -680
  12. package/dist/index.mjs +8 -43
  13. package/dist/livekit/integration.d.mts +3 -7
  14. package/dist/livekit/integration.d.ts +3 -7
  15. package/dist/livekit/integration.js +280 -626
  16. package/dist/livekit/integration.mjs +7 -8
  17. package/dist/noise-suppression/deepfilternet-node.d.mts +12 -0
  18. package/dist/noise-suppression/deepfilternet-node.d.ts +12 -0
  19. package/dist/noise-suppression/deepfilternet-node.js +57 -0
  20. package/dist/noise-suppression/deepfilternet-node.mjs +6 -0
  21. package/dist/pipeline/audio-pipeline.d.mts +2 -2
  22. package/dist/pipeline/audio-pipeline.d.ts +2 -2
  23. package/dist/pipeline/audio-pipeline.js +219 -554
  24. package/dist/pipeline/audio-pipeline.mjs +4 -5
  25. package/dist/types.d.mts +42 -257
  26. package/dist/types.d.ts +42 -257
  27. package/dist/vad/vad-node.d.mts +7 -9
  28. package/dist/vad/vad-node.d.ts +7 -9
  29. package/dist/vad/vad-node.js +47 -156
  30. package/dist/vad/vad-node.mjs +3 -3
  31. package/dist/vad/vad-state.d.mts +9 -11
  32. package/dist/vad/vad-state.d.ts +9 -11
  33. package/dist/vad/vad-state.js +50 -79
  34. package/dist/vad/vad-state.mjs +3 -3
  35. package/package.json +21 -21
  36. package/dist/chunk-2G2JFHJY.mjs +0 -180
  37. package/dist/chunk-6F2HZUYO.mjs +0 -91
  38. package/dist/chunk-K4YLH73B.mjs +0 -103
  39. package/dist/chunk-R5M2DGAQ.mjs +0 -311
  40. package/dist/chunk-UFKIAMG3.mjs +0 -47
  41. package/dist/chunk-XO6B3D4A.mjs +0 -67
  42. package/dist/extensibility/plugins.d.mts +0 -9
  43. package/dist/extensibility/plugins.d.ts +0 -9
  44. package/dist/extensibility/plugins.js +0 -320
  45. package/dist/extensibility/plugins.mjs +0 -14
  46. package/dist/noise-suppression/rnnoise-node.d.mts +0 -10
  47. package/dist/noise-suppression/rnnoise-node.d.ts +0 -10
  48. package/dist/noise-suppression/rnnoise-node.js +0 -101
  49. package/dist/noise-suppression/rnnoise-node.mjs +0 -6
@@ -1,11 +1,10 @@
1
1
  import {
2
2
  createAudioPipeline
3
- } from "../chunk-R5M2DGAQ.mjs";
4
- import "../chunk-K4YLH73B.mjs";
3
+ } from "../chunk-K4J3UUOR.mjs";
5
4
  import "../chunk-OZ7KMC4S.mjs";
6
- import "../chunk-UFKIAMG3.mjs";
7
- import "../chunk-XO6B3D4A.mjs";
8
- import "../chunk-2G2JFHJY.mjs";
5
+ import "../chunk-IS37FHDN.mjs";
6
+ import "../chunk-QNQK6QFB.mjs";
7
+ import "../chunk-AQ5RVY33.mjs";
9
8
  export {
10
9
  createAudioPipeline
11
10
  };
package/dist/types.d.mts CHANGED
@@ -1,270 +1,55 @@
1
1
  import { Emitter } from 'mitt';
2
2
 
3
- /**
4
- * Configuration for the audio processing pipeline.
5
- */
6
- interface AudioProcessingConfig {
7
- /**
8
- * Noise suppression configuration.
9
- */
10
- noiseSuppression?: {
11
- enabled: boolean;
12
- /**
13
- * Path or URL to the RNNoise WASM binary.
14
- * REQUIRED if enabled.
15
- */
16
- wasmUrl?: string;
17
- /**
18
- * Path or URL to the RNNoise SIMD WASM binary.
19
- * REQUIRED if enabled.
20
- */
21
- simdUrl?: string;
22
- /**
23
- * Path or URL to the RNNoise worklet script.
24
- * REQUIRED if enabled.
25
- */
26
- workletUrl?: string;
27
- /**
28
- * Plugin name to use. Defaults to 'rnnoise-ns'.
29
- */
30
- pluginName?: string;
31
- };
32
- /**
33
- * Voice Activity Detection (VAD) configuration.
34
- */
35
- vad?: {
36
- enabled: boolean;
37
- /**
38
- * Plugin name to use. Defaults to 'energy-vad'.
39
- */
40
- pluginName?: string;
41
- /**
42
- * Probability threshold for speech onset (0-1).
43
- * When VAD probability rises above this, audio is unmuted.
44
- * Lower = more sensitive (catches quiet speech, may include noise)
45
- * Higher = less sensitive (only confident speech, may clip quiet parts)
46
- * Default: 0.8 (aggressive noise rejection)
47
- */
48
- startThreshold?: number;
49
- /**
50
- * Probability threshold for speech offset (0-1).
51
- * When VAD probability drops below this (after hangover), audio is muted.
52
- * Lower = keeps audio on longer (less aggressive gating)
53
- * Higher = mutes faster (more aggressive noise suppression)
54
- * Default: 0.3 (wide hysteresis for stability)
55
- */
56
- stopThreshold?: number;
57
- /**
58
- * Time in ms to wait after speech stops before muting.
59
- * Prevents rapid on/off toggling during pauses.
60
- * Lower = more aggressive gating, may clip between words
61
- * Higher = smoother but may let trailing noise through
62
- * Default: 300ms
63
- */
64
- hangoverMs?: number;
65
- /**
66
- * Time in ms of audio to buffer before speech onset.
67
- * Prevents cutting off the beginning of speech.
68
- * Default: 250ms (generous pre-roll for voice)
69
- */
70
- preRollMs?: number;
71
- /**
72
- * Minimum speech duration in ms to consider it valid speech.
73
- * Filters out brief transients like keyboard clicks.
74
- * Default: 250ms (aggressive transient rejection)
75
- */
76
- minSpeechDurationMs?: number;
77
- /**
78
- * Minimum silence duration in ms before allowing another speech segment.
79
- * Prevents false positives from quick noise bursts.
80
- * Default: 150ms
81
- */
82
- minSilenceDurationMs?: number;
83
- /**
84
- * Advanced: Energy VAD specific parameters
85
- */
86
- energyVad?: {
87
- /**
88
- * Smoothing factor for energy calculation (0-1).
89
- * Higher = more smoothing, slower to react
90
- * Default: 0.95
91
- */
92
- smoothing?: number;
93
- /**
94
- * Initial noise floor estimate.
95
- * Default: 0.001
96
- */
97
- initialNoiseFloor?: number;
98
- /**
99
- * Rate at which noise floor adapts to quiet signals (0-1).
100
- * Default: 0.002 (very slow downward drift)
101
- */
102
- noiseFloorAdaptRateQuiet?: number;
103
- /**
104
- * Rate at which noise floor adapts to loud signals (0-1).
105
- * Applied to low-energy, low-crest-factor signals (background noise).
106
- * Default: 0.02
107
- */
108
- noiseFloorAdaptRateLoud?: number;
109
- /**
110
- * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
111
- * Default: 12.0 (aggressive noise rejection)
112
- */
113
- minSNR?: number;
114
- /**
115
- * SNR range in dB for probability scaling.
116
- * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
117
- */
118
- snrRange?: number;
119
- /**
120
- * Minimum absolute RMS energy to consider as speech.
121
- * Prevents triggering on very quiet background noise.
122
- * Default: 0.003 (approx -50dB, voice-appropriate level)
123
- */
124
- minEnergy?: number;
125
- };
126
- };
127
- /**
128
- * Output gain and muting configuration.
129
- */
130
- output?: {
131
- /**
132
- * Gain to apply when speaking (0-infinity).
133
- * Values > 1.0 will amplify the voice.
134
- * Default: 1.0 (unity gain)
135
- */
136
- speechGain?: number;
137
- /**
138
- * Gain to apply when silent (0-1).
139
- * 0.0 = complete mute (recommended for voice-only)
140
- * 0.1-0.3 = allow some background ambience
141
- * Default: 0.0 (full mute for voice-only)
142
- */
143
- silenceGain?: number;
144
- /**
145
- * Time in seconds to ramp gain changes.
146
- * Lower = faster transitions (may cause clicks)
147
- * Higher = smoother transitions (may sound sluggish)
148
- * Default: 0.015 (fast but smooth for voice)
149
- */
150
- gainRampTime?: number;
151
- /**
152
- * Apply additional gain reduction during the transition to silence.
153
- * Helps create cleaner cutoffs without abrupt clicks.
154
- * Default: true
155
- */
156
- smoothTransitions?: boolean;
157
- /**
158
- * Maximum gain in dB to apply (prevents clipping).
159
- * Default: 6.0 dB (roughly 2x amplitude)
160
- */
161
- maxGainDb?: number;
162
- /**
163
- * Apply dynamic range compression when speaking.
164
- * Makes quiet parts louder and loud parts quieter.
165
- * Default: false (transparent audio)
166
- */
167
- enableCompression?: boolean;
168
- /**
169
- * Compression settings (when enabled)
170
- */
171
- compression?: {
172
- /**
173
- * Threshold in dB above which compression starts.
174
- * Default: -24.0 dB
175
- */
176
- threshold?: number;
177
- /**
178
- * Compression ratio (1:N).
179
- * Default: 3.0 (3:1 ratio)
180
- */
181
- ratio?: number;
182
- /**
183
- * Attack time in seconds.
184
- * Default: 0.003 (3ms)
185
- */
186
- attack?: number;
187
- /**
188
- * Release time in seconds.
189
- * Default: 0.05 (50ms)
190
- */
191
- release?: number;
192
- };
193
- };
194
- /**
195
- * LiveKit integration configuration.
196
- */
197
- livekit?: {
198
- /**
199
- * Whether to call track.mute()/unmute() on the LocalAudioTrack based on VAD.
200
- * This saves bandwidth but has more signaling overhead.
201
- * Default: false (uses gain gating only)
202
- */
203
- manageTrackMute?: boolean;
3
+ interface NoiseSuppressionConfig {
4
+ enabled?: boolean;
5
+ noiseReductionLevel?: number;
6
+ assetConfig?: {
7
+ cdnUrl?: string;
204
8
  };
205
9
  }
206
- /**
207
- * Represents the state of Voice Activity Detection.
208
- */
209
- interface VADState {
210
- /**
211
- * Whether speech is currently detected (after hysteresis).
212
- */
213
- isSpeaking: boolean;
214
- /**
215
- * Raw probability of speech from the VAD model (0-1).
216
- */
217
- probability: number;
218
- /**
219
- * Current state enum.
220
- */
221
- state: "silent" | "speech_starting" | "speaking" | "speech_ending";
10
+ interface SpeakingDetectionConfig {
11
+ minDb: number;
12
+ maxDb: number;
13
+ speakOnRatio?: number;
14
+ speakOffRatio?: number;
15
+ hangoverMs?: number;
16
+ attackMs?: number;
17
+ releaseMs?: number;
18
+ }
19
+ interface OutputGainConfig {
20
+ speechGain?: number;
21
+ silenceGain?: number;
22
+ gainRampTime?: number;
23
+ maxGainDb?: number;
24
+ smoothTransitions?: boolean;
222
25
  }
223
- /**
224
- * Events emitted by the audio pipeline.
225
- */
226
- type AudioPipelineEvents = {
227
- vadChange: VADState;
26
+ interface LivekitSpeakingOptions {
27
+ noiseSuppression?: NoiseSuppressionConfig;
28
+ speaking?: SpeakingDetectionConfig;
29
+ output?: OutputGainConfig;
30
+ muteWhenSilent?: boolean;
31
+ }
32
+ interface SpeakingState {
33
+ speaking: boolean;
34
+ levelDb: number;
35
+ }
36
+ type SpeakingEvents = {
37
+ speakingChange: SpeakingState;
228
38
  error: Error;
229
39
  };
230
- /**
231
- * Handle to a running audio processing pipeline.
232
- */
233
40
  interface AudioPipelineHandle {
234
- /**
235
- * The processed MediaStreamTrack.
236
- */
237
41
  readonly processedTrack: MediaStreamTrack;
238
- /**
239
- * Event emitter for VAD state and errors.
240
- */
241
- readonly events: Emitter<AudioPipelineEvents>;
242
- /**
243
- * Current VAD state.
244
- */
245
- readonly state: VADState;
246
- /**
247
- * Update configuration at runtime.
248
- */
249
- setConfig(config: Partial<AudioProcessingConfig>): void;
250
- /**
251
- * Stop processing and release resources.
252
- */
42
+ readonly events: Emitter<SpeakingEvents>;
43
+ readonly state: SpeakingState;
44
+ setConfig(config: Partial<LivekitSpeakingOptions>): void;
253
45
  dispose(): void;
254
46
  }
255
- /**
256
- * Interface for a Noise Suppression Plugin.
257
- */
258
- interface NoiseSuppressionPlugin {
259
- name: string;
260
- createNode(context: AudioContext, config: AudioProcessingConfig["noiseSuppression"]): Promise<AudioNode>;
261
- }
262
- /**
263
- * Interface for a VAD Plugin.
264
- */
265
- interface VADPlugin {
266
- name: string;
267
- createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
47
+ interface SpeakingController {
48
+ readonly speaking: boolean;
49
+ readonly levelDb: number;
50
+ onChange(listener: (state: SpeakingState) => void): () => void;
51
+ setConfig(config: Partial<LivekitSpeakingOptions>): void;
52
+ dispose(): void;
268
53
  }
269
54
 
270
- export type { AudioPipelineEvents, AudioPipelineHandle, AudioProcessingConfig, NoiseSuppressionPlugin, VADPlugin, VADState };
55
+ export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
package/dist/types.d.ts CHANGED
@@ -1,270 +1,55 @@
1
1
  import { Emitter } from 'mitt';
2
2
 
3
- /**
4
- * Configuration for the audio processing pipeline.
5
- */
6
- interface AudioProcessingConfig {
7
- /**
8
- * Noise suppression configuration.
9
- */
10
- noiseSuppression?: {
11
- enabled: boolean;
12
- /**
13
- * Path or URL to the RNNoise WASM binary.
14
- * REQUIRED if enabled.
15
- */
16
- wasmUrl?: string;
17
- /**
18
- * Path or URL to the RNNoise SIMD WASM binary.
19
- * REQUIRED if enabled.
20
- */
21
- simdUrl?: string;
22
- /**
23
- * Path or URL to the RNNoise worklet script.
24
- * REQUIRED if enabled.
25
- */
26
- workletUrl?: string;
27
- /**
28
- * Plugin name to use. Defaults to 'rnnoise-ns'.
29
- */
30
- pluginName?: string;
31
- };
32
- /**
33
- * Voice Activity Detection (VAD) configuration.
34
- */
35
- vad?: {
36
- enabled: boolean;
37
- /**
38
- * Plugin name to use. Defaults to 'energy-vad'.
39
- */
40
- pluginName?: string;
41
- /**
42
- * Probability threshold for speech onset (0-1).
43
- * When VAD probability rises above this, audio is unmuted.
44
- * Lower = more sensitive (catches quiet speech, may include noise)
45
- * Higher = less sensitive (only confident speech, may clip quiet parts)
46
- * Default: 0.8 (aggressive noise rejection)
47
- */
48
- startThreshold?: number;
49
- /**
50
- * Probability threshold for speech offset (0-1).
51
- * When VAD probability drops below this (after hangover), audio is muted.
52
- * Lower = keeps audio on longer (less aggressive gating)
53
- * Higher = mutes faster (more aggressive noise suppression)
54
- * Default: 0.3 (wide hysteresis for stability)
55
- */
56
- stopThreshold?: number;
57
- /**
58
- * Time in ms to wait after speech stops before muting.
59
- * Prevents rapid on/off toggling during pauses.
60
- * Lower = more aggressive gating, may clip between words
61
- * Higher = smoother but may let trailing noise through
62
- * Default: 300ms
63
- */
64
- hangoverMs?: number;
65
- /**
66
- * Time in ms of audio to buffer before speech onset.
67
- * Prevents cutting off the beginning of speech.
68
- * Default: 250ms (generous pre-roll for voice)
69
- */
70
- preRollMs?: number;
71
- /**
72
- * Minimum speech duration in ms to consider it valid speech.
73
- * Filters out brief transients like keyboard clicks.
74
- * Default: 250ms (aggressive transient rejection)
75
- */
76
- minSpeechDurationMs?: number;
77
- /**
78
- * Minimum silence duration in ms before allowing another speech segment.
79
- * Prevents false positives from quick noise bursts.
80
- * Default: 150ms
81
- */
82
- minSilenceDurationMs?: number;
83
- /**
84
- * Advanced: Energy VAD specific parameters
85
- */
86
- energyVad?: {
87
- /**
88
- * Smoothing factor for energy calculation (0-1).
89
- * Higher = more smoothing, slower to react
90
- * Default: 0.95
91
- */
92
- smoothing?: number;
93
- /**
94
- * Initial noise floor estimate.
95
- * Default: 0.001
96
- */
97
- initialNoiseFloor?: number;
98
- /**
99
- * Rate at which noise floor adapts to quiet signals (0-1).
100
- * Default: 0.002 (very slow downward drift)
101
- */
102
- noiseFloorAdaptRateQuiet?: number;
103
- /**
104
- * Rate at which noise floor adapts to loud signals (0-1).
105
- * Applied to low-energy, low-crest-factor signals (background noise).
106
- * Default: 0.02
107
- */
108
- noiseFloorAdaptRateLoud?: number;
109
- /**
110
- * Minimum SNR (Signal-to-Noise Ratio) in dB for speech detection.
111
- * Default: 12.0 (aggressive noise rejection)
112
- */
113
- minSNR?: number;
114
- /**
115
- * SNR range in dB for probability scaling.
116
- * Default: 10.0 (probability scales from minSNR to minSNR+snrRange)
117
- */
118
- snrRange?: number;
119
- /**
120
- * Minimum absolute RMS energy to consider as speech.
121
- * Prevents triggering on very quiet background noise.
122
- * Default: 0.003 (approx -50dB, voice-appropriate level)
123
- */
124
- minEnergy?: number;
125
- };
126
- };
127
- /**
128
- * Output gain and muting configuration.
129
- */
130
- output?: {
131
- /**
132
- * Gain to apply when speaking (0-infinity).
133
- * Values > 1.0 will amplify the voice.
134
- * Default: 1.0 (unity gain)
135
- */
136
- speechGain?: number;
137
- /**
138
- * Gain to apply when silent (0-1).
139
- * 0.0 = complete mute (recommended for voice-only)
140
- * 0.1-0.3 = allow some background ambience
141
- * Default: 0.0 (full mute for voice-only)
142
- */
143
- silenceGain?: number;
144
- /**
145
- * Time in seconds to ramp gain changes.
146
- * Lower = faster transitions (may cause clicks)
147
- * Higher = smoother transitions (may sound sluggish)
148
- * Default: 0.015 (fast but smooth for voice)
149
- */
150
- gainRampTime?: number;
151
- /**
152
- * Apply additional gain reduction during the transition to silence.
153
- * Helps create cleaner cutoffs without abrupt clicks.
154
- * Default: true
155
- */
156
- smoothTransitions?: boolean;
157
- /**
158
- * Maximum gain in dB to apply (prevents clipping).
159
- * Default: 6.0 dB (roughly 2x amplitude)
160
- */
161
- maxGainDb?: number;
162
- /**
163
- * Apply dynamic range compression when speaking.
164
- * Makes quiet parts louder and loud parts quieter.
165
- * Default: false (transparent audio)
166
- */
167
- enableCompression?: boolean;
168
- /**
169
- * Compression settings (when enabled)
170
- */
171
- compression?: {
172
- /**
173
- * Threshold in dB above which compression starts.
174
- * Default: -24.0 dB
175
- */
176
- threshold?: number;
177
- /**
178
- * Compression ratio (1:N).
179
- * Default: 3.0 (3:1 ratio)
180
- */
181
- ratio?: number;
182
- /**
183
- * Attack time in seconds.
184
- * Default: 0.003 (3ms)
185
- */
186
- attack?: number;
187
- /**
188
- * Release time in seconds.
189
- * Default: 0.05 (50ms)
190
- */
191
- release?: number;
192
- };
193
- };
194
- /**
195
- * LiveKit integration configuration.
196
- */
197
- livekit?: {
198
- /**
199
- * Whether to call track.mute()/unmute() on the LocalAudioTrack based on VAD.
200
- * This saves bandwidth but has more signaling overhead.
201
- * Default: false (uses gain gating only)
202
- */
203
- manageTrackMute?: boolean;
3
+ interface NoiseSuppressionConfig {
4
+ enabled?: boolean;
5
+ noiseReductionLevel?: number;
6
+ assetConfig?: {
7
+ cdnUrl?: string;
204
8
  };
205
9
  }
206
- /**
207
- * Represents the state of Voice Activity Detection.
208
- */
209
- interface VADState {
210
- /**
211
- * Whether speech is currently detected (after hysteresis).
212
- */
213
- isSpeaking: boolean;
214
- /**
215
- * Raw probability of speech from the VAD model (0-1).
216
- */
217
- probability: number;
218
- /**
219
- * Current state enum.
220
- */
221
- state: "silent" | "speech_starting" | "speaking" | "speech_ending";
10
+ interface SpeakingDetectionConfig {
11
+ minDb: number;
12
+ maxDb: number;
13
+ speakOnRatio?: number;
14
+ speakOffRatio?: number;
15
+ hangoverMs?: number;
16
+ attackMs?: number;
17
+ releaseMs?: number;
18
+ }
19
+ interface OutputGainConfig {
20
+ speechGain?: number;
21
+ silenceGain?: number;
22
+ gainRampTime?: number;
23
+ maxGainDb?: number;
24
+ smoothTransitions?: boolean;
222
25
  }
223
- /**
224
- * Events emitted by the audio pipeline.
225
- */
226
- type AudioPipelineEvents = {
227
- vadChange: VADState;
26
+ interface LivekitSpeakingOptions {
27
+ noiseSuppression?: NoiseSuppressionConfig;
28
+ speaking?: SpeakingDetectionConfig;
29
+ output?: OutputGainConfig;
30
+ muteWhenSilent?: boolean;
31
+ }
32
+ interface SpeakingState {
33
+ speaking: boolean;
34
+ levelDb: number;
35
+ }
36
+ type SpeakingEvents = {
37
+ speakingChange: SpeakingState;
228
38
  error: Error;
229
39
  };
230
- /**
231
- * Handle to a running audio processing pipeline.
232
- */
233
40
  interface AudioPipelineHandle {
234
- /**
235
- * The processed MediaStreamTrack.
236
- */
237
41
  readonly processedTrack: MediaStreamTrack;
238
- /**
239
- * Event emitter for VAD state and errors.
240
- */
241
- readonly events: Emitter<AudioPipelineEvents>;
242
- /**
243
- * Current VAD state.
244
- */
245
- readonly state: VADState;
246
- /**
247
- * Update configuration at runtime.
248
- */
249
- setConfig(config: Partial<AudioProcessingConfig>): void;
250
- /**
251
- * Stop processing and release resources.
252
- */
42
+ readonly events: Emitter<SpeakingEvents>;
43
+ readonly state: SpeakingState;
44
+ setConfig(config: Partial<LivekitSpeakingOptions>): void;
253
45
  dispose(): void;
254
46
  }
255
- /**
256
- * Interface for a Noise Suppression Plugin.
257
- */
258
- interface NoiseSuppressionPlugin {
259
- name: string;
260
- createNode(context: AudioContext, config: AudioProcessingConfig["noiseSuppression"]): Promise<AudioNode>;
261
- }
262
- /**
263
- * Interface for a VAD Plugin.
264
- */
265
- interface VADPlugin {
266
- name: string;
267
- createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
47
+ interface SpeakingController {
48
+ readonly speaking: boolean;
49
+ readonly levelDb: number;
50
+ onChange(listener: (state: SpeakingState) => void): () => void;
51
+ setConfig(config: Partial<LivekitSpeakingOptions>): void;
52
+ dispose(): void;
268
53
  }
269
54
 
270
- export type { AudioPipelineEvents, AudioPipelineHandle, AudioProcessingConfig, NoiseSuppressionPlugin, VADPlugin, VADState };
55
+ export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
@@ -1,11 +1,9 @@
1
- import { VADPlugin, AudioProcessingConfig } from '../types.mjs';
2
- import 'mitt';
3
-
4
- declare class EnergyVADPlugin implements VADPlugin {
5
- name: string;
6
- private workletNode;
7
- createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
8
- updateSpeakingState(isSpeaking: boolean): void;
1
+ interface LevelDetectorNode {
2
+ node: AudioWorkletNode;
3
+ dispose: () => void;
9
4
  }
5
+ declare function createLevelDetectorNode(context: AudioContext, onLevel: (levelDb: number) => void, options?: {
6
+ smoothing?: number;
7
+ }): Promise<LevelDetectorNode>;
10
8
 
11
- export { EnergyVADPlugin };
9
+ export { type LevelDetectorNode, createLevelDetectorNode };
@@ -1,11 +1,9 @@
1
- import { VADPlugin, AudioProcessingConfig } from '../types.js';
2
- import 'mitt';
3
-
4
- declare class EnergyVADPlugin implements VADPlugin {
5
- name: string;
6
- private workletNode;
7
- createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
8
- updateSpeakingState(isSpeaking: boolean): void;
1
+ interface LevelDetectorNode {
2
+ node: AudioWorkletNode;
3
+ dispose: () => void;
9
4
  }
5
+ declare function createLevelDetectorNode(context: AudioContext, onLevel: (levelDb: number) => void, options?: {
6
+ smoothing?: number;
7
+ }): Promise<LevelDetectorNode>;
10
8
 
11
- export { EnergyVADPlugin };
9
+ export { type LevelDetectorNode, createLevelDetectorNode };