@tensamin/audio 0.1.15 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +48 -229
  2. package/dist/chunk-6BJ4XGSA.mjs +80 -0
  3. package/dist/chunk-AQ5RVY33.mjs +74 -0
  4. package/dist/chunk-IS37FHDN.mjs +33 -0
  5. package/dist/chunk-K4J3UUOR.mjs +178 -0
  6. package/dist/chunk-QNQK6QFB.mjs +71 -0
  7. package/dist/context/audio-context.d.mts +0 -24
  8. package/dist/context/audio-context.d.ts +0 -24
  9. package/dist/index.d.mts +2 -8
  10. package/dist/index.d.ts +2 -8
  11. package/dist/index.js +285 -655
  12. package/dist/index.mjs +8 -43
  13. package/dist/livekit/integration.d.mts +3 -7
  14. package/dist/livekit/integration.d.ts +3 -7
  15. package/dist/livekit/integration.js +280 -601
  16. package/dist/livekit/integration.mjs +7 -8
  17. package/dist/noise-suppression/deepfilternet-node.d.mts +12 -0
  18. package/dist/noise-suppression/deepfilternet-node.d.ts +12 -0
  19. package/dist/noise-suppression/deepfilternet-node.js +57 -0
  20. package/dist/noise-suppression/deepfilternet-node.mjs +6 -0
  21. package/dist/pipeline/audio-pipeline.d.mts +2 -2
  22. package/dist/pipeline/audio-pipeline.d.ts +2 -2
  23. package/dist/pipeline/audio-pipeline.js +219 -529
  24. package/dist/pipeline/audio-pipeline.mjs +4 -5
  25. package/dist/types.d.mts +42 -246
  26. package/dist/types.d.ts +42 -246
  27. package/dist/vad/vad-node.d.mts +7 -9
  28. package/dist/vad/vad-node.d.ts +7 -9
  29. package/dist/vad/vad-node.js +47 -134
  30. package/dist/vad/vad-node.mjs +3 -3
  31. package/dist/vad/vad-state.d.mts +9 -11
  32. package/dist/vad/vad-state.d.ts +9 -11
  33. package/dist/vad/vad-state.js +50 -77
  34. package/dist/vad/vad-state.mjs +3 -3
  35. package/package.json +21 -21
  36. package/dist/chunk-GLKAWCEW.mjs +0 -158
  37. package/dist/chunk-KLBA2CPE.mjs +0 -101
  38. package/dist/chunk-QQFKHTCQ.mjs +0 -91
  39. package/dist/chunk-U26F3GJN.mjs +0 -47
  40. package/dist/chunk-WQVMSR7V.mjs +0 -310
  41. package/dist/chunk-XO6B3D4A.mjs +0 -67
  42. package/dist/extensibility/plugins.d.mts +0 -9
  43. package/dist/extensibility/plugins.d.ts +0 -9
  44. package/dist/extensibility/plugins.js +0 -298
  45. package/dist/extensibility/plugins.mjs +0 -14
  46. package/dist/noise-suppression/rnnoise-node.d.mts +0 -10
  47. package/dist/noise-suppression/rnnoise-node.d.ts +0 -10
  48. package/dist/noise-suppression/rnnoise-node.js +0 -101
  49. package/dist/noise-suppression/rnnoise-node.mjs +0 -6
package/README.md CHANGED
@@ -1,16 +1,13 @@
1
1
  # @tensamin/audio
2
2
 
3
- Audio processing library for the web with RNNoise-based noise suppression and Voice Activity Detection (VAD). Designed for voice communication applications with LiveKit integration support.
3
+ DeepFilterNet3-based noise suppression and realtime speaking detection for LiveKit.
4
4
 
5
5
  ## Features
6
6
 
7
- - Configurable Voice Activity Detection with energy-based algorithm
8
- - RNNoise noise suppression via `@sapphi-red/web-noise-suppressor`
9
- - Automatic audio gating based on voice detection
10
- - Runtime configuration updates
11
- - LiveKit `LocalAudioTrack` integration
12
- - Plugin system for custom audio processors
13
- - Optional dynamic range compression
7
+ - DeepFilterNet3 WASM noise suppression
8
+ - Realtime `speaking` boolean + dB level
9
+ - Automatic mute/unmute for LiveKit tracks
10
+ - Simple min/max dB speaking thresholds
14
11
 
15
12
  ## Installation
16
13
 
@@ -18,259 +15,81 @@ Audio processing library for the web with RNNoise-based noise suppression and Vo
18
15
  npm install @tensamin/audio livekit-client
19
16
  ```
20
17
 
21
- ## Requirements
22
-
23
- For noise suppression, the following files must be provided:
24
-
25
- - `rnnoise.wasm`
26
- - `rnnoise_simd.wasm`
27
- - `worklet.js`
28
-
29
- Available at: `https://unpkg.com/@sapphi-red/web-noise-suppressor@0.3.5/dist/`
30
-
31
- Place these files in a publicly accessible directory (e.g., `public/audio-processor/`).
32
-
33
- ## Usage
34
-
35
- ### Basic Example
18
+ ## Quick Start (LiveKit)
36
19
 
37
20
  ```ts
38
- import { createAudioPipeline } from "@tensamin/audio";
39
-
40
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
41
- const track = stream.getAudioTracks()[0];
42
-
43
- const pipeline = await createAudioPipeline(track, {
44
- noiseSuppression: {
45
- enabled: true,
46
- wasmUrl: "/audio-processor/rnnoise.wasm",
47
- simdUrl: "/audio-processor/rnnoise_simd.wasm",
48
- workletUrl: "/audio-processor/worklet.js",
49
- },
50
- vad: { enabled: true },
51
- });
52
-
53
- const processedStream = new MediaStream([pipeline.processedTrack]);
54
- ```
55
-
56
- ### LiveKit Integration
57
-
58
- ```ts
59
- import { attachProcessingToTrack } from "@tensamin/audio";
60
21
  import { LocalAudioTrack } from "livekit-client";
22
+ import { attachSpeakingDetectionToTrack } from "@tensamin/audio";
61
23
 
62
24
  const localTrack = await LocalAudioTrack.create();
63
25
 
64
- const pipeline = await attachProcessingToTrack(localTrack, {
26
+ const controller = await attachSpeakingDetectionToTrack(localTrack, {
27
+ speaking: {
28
+ minDb: -60,
29
+ maxDb: -20,
30
+ },
65
31
  noiseSuppression: {
66
32
  enabled: true,
67
- wasmUrl: "/audio-processor/rnnoise.wasm",
68
- simdUrl: "/audio-processor/rnnoise_simd.wasm",
69
- workletUrl: "/audio-processor/worklet.js",
70
33
  },
71
- vad: { enabled: true },
72
- livekit: { manageTrackMute: true },
34
+ muteWhenSilent: true,
73
35
  });
74
36
 
75
- await room.localParticipant.publishTrack(localTrack);
76
- ```
77
-
78
- ### Monitoring VAD State
79
-
80
- ```ts
81
- pipeline.events.on("vadChange", (state) => {
82
- console.log("Speaking:", state.isSpeaking);
83
- console.log("Probability:", state.probability);
84
- console.log("State:", state.state);
37
+ controller.onChange((state) => {
38
+ console.log("speaking", state.speaking);
39
+ console.log("levelDb", state.levelDb);
85
40
  });
86
- ```
87
41
 
88
- ## Configuration
89
-
90
- ### Voice Activity Detection
91
-
92
- ```ts
93
- vad: {
94
- enabled: boolean;
95
- startThreshold: number; // Default: 0.6 (range: 0-1)
96
- stopThreshold: number; // Default: 0.45 (range: 0-1)
97
- hangoverMs: number; // Default: 400
98
- preRollMs: number; // Default: 250
99
- minSpeechDurationMs: number; // Default: 100
100
- minSilenceDurationMs: number; // Default: 150
101
- energyVad?: {
102
- smoothing: number; // Default: 0.95
103
- initialNoiseFloor: number; // Default: 0.001
104
- minSNR: number; // Default: 8.0 (dB)
105
- snrRange: number; // Default: 12.0 (dB)
106
- minEnergy: number; // Default: 0.01
107
- };
108
- }
42
+ await room.localParticipant.publishTrack(localTrack);
109
43
  ```
110
44
 
111
- **Threshold Parameters:**
112
-
113
- - `startThreshold`: Probability threshold to unmute audio (Default: 0.8, ~18dB SNR)
114
- - `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
115
- - `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
116
- - `preRollMs`: Audio buffer duration before speech onset
117
- - `minSpeechDurationMs`: Minimum duration to consider as valid speech (Default: 250ms)
118
- - `minSilenceDurationMs`: Minimum silence duration between speech segments
119
-
120
- **Energy VAD Parameters:**
45
+ ## Configuration
121
46
 
122
- - `smoothing`: Energy calculation smoothing factor (0-1)
123
- - `minSNR`: Minimum signal-to-noise ratio in dB for speech detection
124
- - `snrRange`: Range in dB for probability scaling from minSNR
125
- - `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.01, ~-40dB)
47
+ All options are passed via `LivekitSpeakingOptions` to `attachSpeakingDetectionToTrack`.
126
48
 
127
- ### Output Control
49
+ ### Noise suppression (DeepFilterNet3)
128
50
 
129
51
  ```ts
130
- output: {
131
- speechGain: number; // Default: 1.0
132
- silenceGain: number; // Default: 0.0
133
- gainRampTime: number; // Default: 0.015 (seconds)
134
- smoothTransitions: boolean; // Default: true
135
- maxGainDb: number; // Default: 6.0
136
- enableCompression: boolean; // Default: false
137
- compression?: {
138
- threshold: number; // Default: -24.0 (dB)
139
- ratio: number; // Default: 3.0
140
- attack: number; // Default: 0.003 (seconds)
141
- release: number; // Default: 0.05 (seconds)
52
+ noiseSuppression: {
53
+ enabled?: boolean; // default: true
54
+ noiseReductionLevel?: number; // 0-100, default: 60
55
+ assetConfig?: {
56
+ cdnUrl?: string;
142
57
  };
143
58
  }
144
59
  ```
145
60
 
146
- **Gain Parameters:**
147
-
148
- - `speechGain`: Gain multiplier when speaking (1.0 = unity)
149
- - `silenceGain`: Gain multiplier when silent (0.0 = mute)
150
- - `gainRampTime`: Transition duration for gain changes
151
- - `maxGainDb`: Maximum gain limit to prevent clipping
152
-
153
- **Compression Parameters:**
154
-
155
- - `threshold`: Level above which compression is applied
156
- - `ratio`: Compression ratio (e.g., 3.0 = 3:1)
157
- - `attack`: Time to reach full compression
158
- - `release`: Time to release compression
159
-
160
- ### Runtime Configuration Updates
161
-
162
- ```ts
163
- pipeline.setConfig({
164
- vad: {
165
- startThreshold: 0.7,
166
- stopThreshold: 0.55,
167
- },
168
- output: {
169
- speechGain: 1.3,
170
- },
171
- });
172
- ```
173
-
174
- ## Configuration Examples
175
-
176
- ### Noisy Environment
177
-
178
- ```ts
179
- {
180
- vad: {
181
- startThreshold: 0.7,
182
- stopThreshold: 0.55,
183
- minSpeechDurationMs: 150,
184
- energyVad: { minSNR: 3.0 }
185
- }
186
- }
187
- ```
188
-
189
- ### Quiet Speaker
61
+ ### Speaking detection (dB-based)
190
62
 
191
63
  ```ts
192
- {
193
- vad: {
194
- startThreshold: 0.4,
195
- stopThreshold: 0.25,
196
- energyVad: { minSNR: 1.5 }
197
- },
198
- output: {
199
- speechGain: 1.5
200
- }
64
+ speaking: {
65
+ minDb: number; // e.g. -60
66
+ maxDb: number; // e.g. -20
67
+ speakOnRatio?: number; // default: 0.6
68
+ speakOffRatio?: number; // default: 0.3
69
+ hangoverMs?: number; // default: 350
70
+ attackMs?: number; // default: 50
71
+ releaseMs?: number; // default: 120
201
72
  }
202
73
  ```
203
74
 
204
- ### Natural Conversation
75
+ `minDb` / `maxDb` define the dynamic range used for level normalization. `speakOnRatio` and `speakOffRatio` (0–1) control when speech starts/stops within that range.
205
76
 
206
- ```ts
207
- {
208
- vad: {
209
- startThreshold: 0.5,
210
- stopThreshold: 0.3,
211
- hangoverMs: 600,
212
- },
213
- output: {
214
- silenceGain: 0.2
215
- }
216
- }
217
- ```
218
-
219
- ## API Reference
220
-
221
- ### `createAudioPipeline(track, config)`
222
-
223
- Creates an audio processing pipeline from a MediaStreamTrack.
224
-
225
- **Parameters:**
77
+ ### Output gain control
226
78
 
227
- - `track`: MediaStreamTrack - Source audio track
228
- - `config`: AudioProcessingConfig - Configuration object
229
-
230
- **Returns:** `Promise<AudioPipelineHandle>`
231
-
232
- ### AudioPipelineHandle
233
-
234
- ```ts
235
- interface AudioPipelineHandle {
236
- processedTrack: MediaStreamTrack;
237
- events: Emitter<AudioPipelineEvents>;
238
- state: VADState;
239
- setConfig(config: Partial<AudioProcessingConfig>): void;
240
- dispose(): void;
79
+ ````ts
80
+ output: {
81
+ speechGain?: number; // default: 1.0
82
+ silenceGain?: number; // default: 0.0
83
+ gainRampTime?: number; // default: 0.015 (s)
84
+ maxGainDb?: number; // default: 6.0
85
+ smoothTransitions?: boolean;// default: true
241
86
  }
242
- ```
87
+ ``+
243
88
 
244
- ### AudioPipelineEvents
89
+ ### LiveKit mute handling
245
90
 
246
91
  ```ts
247
- type AudioPipelineEvents = {
248
- vadChange: VADState;
249
- error: Error;
250
- };
251
- ```
252
-
253
- ### VADState
254
-
255
- ```ts
256
- interface VADState {
257
- isSpeaking: boolean;
258
- probability: number;
259
- state: "silent" | "speech_starting" | "speaking" | "speech_ending";
260
- }
261
- ```
262
-
263
- ## Default Values
92
+ muteWhenSilent?: boolean; // default: false
93
+ ````
264
94
 
265
- | Parameter | Default | Description |
266
- | ---------------------- | ------- | -------------------------------- |
267
- | `startThreshold` | 0.6 | Unmute at 60% confidence |
268
- | `stopThreshold` | 0.45 | Mute below 45% confidence |
269
- | `hangoverMs` | 400 | Wait 400ms before muting |
270
- | `preRollMs` | 250 | Buffer 250ms before speech |
271
- | `minSpeechDurationMs` | 100 | Minimum valid speech duration |
272
- | `minSilenceDurationMs` | 150 | Minimum silence between speech |
273
- | `silenceGain` | 0.0 | Complete mute when silent |
274
- | `speechGain` | 1.0 | Unity gain when speaking |
275
- | `minSNR` | 2.0 | Voice must be 2x noise floor |
276
- | `snrRange` | 8.0 | Probability scales over SNR 2-10 |
95
+ When `muteWhenSilent` is `true`, the library automatically calls `track.mute()` when silence is detected and `track.unmute()` when speech resumes (only if it muted the track itself).
@@ -0,0 +1,80 @@
1
+ import {
2
+ createAudioPipeline
3
+ } from "./chunk-K4J3UUOR.mjs";
4
+
5
+ // src/livekit/integration.ts
6
+ import "mitt";
7
+ async function attachSpeakingDetectionToTrack(track, options = {}) {
8
+ if (!track) {
9
+ throw new Error(
10
+ "attachSpeakingDetectionToTrack requires a valid LocalAudioTrack"
11
+ );
12
+ }
13
+ const originalTrack = track.mediaStreamTrack;
14
+ if (!originalTrack || originalTrack.readyState === "ended") {
15
+ throw new Error("LocalAudioTrack has no live MediaStreamTrack to process");
16
+ }
17
+ const pipeline = await createAudioPipeline(originalTrack, options);
18
+ await track.replaceTrack(pipeline.processedTrack);
19
+ const listeners = /* @__PURE__ */ new Set();
20
+ let mutedByController = false;
21
+ let currentState = pipeline.state;
22
+ const speakingHandler = (state) => {
23
+ currentState = state;
24
+ listeners.forEach((listener) => listener(state));
25
+ if (options.muteWhenSilent) {
26
+ if (!state.speaking && !track.isMuted) {
27
+ track.mute().catch((error) => console.error("mute failed", error));
28
+ mutedByController = true;
29
+ }
30
+ if (state.speaking && mutedByController) {
31
+ track.unmute().catch((error) => console.error("unmute failed", error));
32
+ mutedByController = false;
33
+ }
34
+ }
35
+ };
36
+ pipeline.events.on("speakingChange", speakingHandler);
37
+ const errorHandler = (error) => {
38
+ console.error("Audio pipeline error", error);
39
+ };
40
+ pipeline.events.on("error", errorHandler);
41
+ const controller = {
42
+ get speaking() {
43
+ return currentState.speaking;
44
+ },
45
+ get levelDb() {
46
+ return currentState.levelDb;
47
+ },
48
+ onChange: (listener) => {
49
+ listeners.add(listener);
50
+ listener(currentState);
51
+ return () => listeners.delete(listener);
52
+ },
53
+ setConfig: (config) => {
54
+ pipeline.setConfig(config);
55
+ if (typeof config.muteWhenSilent === "boolean") {
56
+ options.muteWhenSilent = config.muteWhenSilent;
57
+ }
58
+ },
59
+ dispose: () => {
60
+ pipeline.events.off("speakingChange", speakingHandler);
61
+ pipeline.events.off("error", errorHandler);
62
+ listeners.clear();
63
+ if (mutedByController && !track.isMuted) {
64
+ track.unmute().catch((error) => console.error("unmute failed", error));
65
+ mutedByController = false;
66
+ }
67
+ pipeline.dispose();
68
+ if (originalTrack.readyState === "live") {
69
+ track.replaceTrack(originalTrack).catch((error) => {
70
+ console.error("Failed to restore original track", error);
71
+ });
72
+ }
73
+ }
74
+ };
75
+ return controller;
76
+ }
77
+
78
+ export {
79
+ attachSpeakingDetectionToTrack
80
+ };
@@ -0,0 +1,74 @@
1
+ // src/vad/vad-state.ts
2
+ var LevelBasedVAD = class {
3
+ config;
4
+ speaking = false;
5
+ pendingSpeechSince = null;
6
+ pendingSilenceSince = null;
7
+ constructor(config) {
8
+ this.config = {
9
+ minDb: config.minDb,
10
+ maxDb: config.maxDb,
11
+ speakOnRatio: config.speakOnRatio ?? 0.6,
12
+ speakOffRatio: config.speakOffRatio ?? 0.3,
13
+ hangoverMs: config.hangoverMs ?? 350,
14
+ attackMs: config.attackMs ?? 50,
15
+ releaseMs: config.releaseMs ?? 120
16
+ };
17
+ }
18
+ updateConfig(config) {
19
+ this.config = {
20
+ ...this.config,
21
+ ...config,
22
+ speakOnRatio: config.speakOnRatio ?? this.config.speakOnRatio,
23
+ speakOffRatio: config.speakOffRatio ?? this.config.speakOffRatio,
24
+ hangoverMs: config.hangoverMs ?? this.config.hangoverMs,
25
+ attackMs: config.attackMs ?? this.config.attackMs,
26
+ releaseMs: config.releaseMs ?? this.config.releaseMs
27
+ };
28
+ }
29
+ process(levelDb, timestampMs) {
30
+ const {
31
+ minDb,
32
+ maxDb,
33
+ speakOnRatio,
34
+ speakOffRatio,
35
+ hangoverMs,
36
+ attackMs,
37
+ releaseMs
38
+ } = this.config;
39
+ const clamped = Math.min(maxDb, Math.max(minDb, levelDb));
40
+ const norm = (clamped - minDb) / Math.max(1, maxDb - minDb);
41
+ if (!this.speaking) {
42
+ if (norm >= speakOnRatio) {
43
+ this.pendingSpeechSince = this.pendingSpeechSince ?? timestampMs;
44
+ if (timestampMs - this.pendingSpeechSince >= attackMs) {
45
+ this.speaking = true;
46
+ this.pendingSpeechSince = null;
47
+ this.pendingSilenceSince = null;
48
+ }
49
+ } else {
50
+ this.pendingSpeechSince = null;
51
+ }
52
+ } else {
53
+ if (norm <= speakOffRatio) {
54
+ this.pendingSilenceSince = this.pendingSilenceSince ?? timestampMs;
55
+ const releaseWindow = Math.max(releaseMs, hangoverMs);
56
+ if (timestampMs - this.pendingSilenceSince >= releaseWindow) {
57
+ this.speaking = false;
58
+ this.pendingSilenceSince = null;
59
+ this.pendingSpeechSince = null;
60
+ }
61
+ } else {
62
+ this.pendingSilenceSince = null;
63
+ }
64
+ }
65
+ return {
66
+ speaking: this.speaking,
67
+ levelDb: clamped
68
+ };
69
+ }
70
+ };
71
+
72
+ export {
73
+ LevelBasedVAD
74
+ };
@@ -0,0 +1,33 @@
1
+ // src/noise-suppression/deepfilternet-node.ts
2
+ import { DeepFilterNet3Processor } from "deepfilternet3-noise-filter";
3
+ async function createDeepFilterNet3Node(context, config) {
4
+ const processorConfig = {
5
+ sampleRate: context.sampleRate,
6
+ noiseReductionLevel: config?.noiseReductionLevel ?? 60
7
+ };
8
+ if (config?.assetConfig) {
9
+ processorConfig.assetConfig = config.assetConfig;
10
+ }
11
+ const processor = new DeepFilterNet3Processor(processorConfig);
12
+ await processor.initialize();
13
+ const node = await processor.createAudioWorkletNode(context);
14
+ const enabled = config?.enabled ?? true;
15
+ if (!enabled) {
16
+ processor.setNoiseSuppressionEnabled(false);
17
+ }
18
+ return {
19
+ node,
20
+ processor,
21
+ dispose: () => {
22
+ try {
23
+ processor.destroy();
24
+ } catch (error) {
25
+ console.error("Failed to dispose DeepFilterNet3 processor", error);
26
+ }
27
+ }
28
+ };
29
+ }
30
+
31
+ export {
32
+ createDeepFilterNet3Node
33
+ };
@@ -0,0 +1,178 @@
1
+ import {
2
+ getAudioContext,
3
+ registerPipeline,
4
+ unregisterPipeline
5
+ } from "./chunk-OZ7KMC4S.mjs";
6
+ import {
7
+ createDeepFilterNet3Node
8
+ } from "./chunk-IS37FHDN.mjs";
9
+ import {
10
+ createLevelDetectorNode
11
+ } from "./chunk-QNQK6QFB.mjs";
12
+ import {
13
+ LevelBasedVAD
14
+ } from "./chunk-AQ5RVY33.mjs";
15
+
16
+ // src/pipeline/audio-pipeline.ts
17
+ import mitt from "mitt";
18
+ async function createAudioPipeline(sourceTrack, config = {}) {
19
+ const context = getAudioContext();
20
+ registerPipeline();
21
+ const nsConfig = {
22
+ enabled: config.noiseSuppression?.enabled ?? true,
23
+ noiseReductionLevel: config.noiseSuppression?.noiseReductionLevel ?? 60
24
+ };
25
+ if (config.noiseSuppression?.assetConfig) {
26
+ nsConfig.assetConfig = config.noiseSuppression.assetConfig;
27
+ }
28
+ const fullConfig = {
29
+ noiseSuppression: nsConfig,
30
+ speaking: {
31
+ minDb: config.speaking?.minDb ?? -60,
32
+ maxDb: config.speaking?.maxDb ?? -20,
33
+ speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
34
+ speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
35
+ hangoverMs: config.speaking?.hangoverMs ?? 350,
36
+ attackMs: config.speaking?.attackMs ?? 50,
37
+ releaseMs: config.speaking?.releaseMs ?? 120
38
+ },
39
+ output: {
40
+ speechGain: config.output?.speechGain ?? 1,
41
+ silenceGain: config.output?.silenceGain ?? 0,
42
+ gainRampTime: config.output?.gainRampTime ?? 0.015,
43
+ maxGainDb: config.output?.maxGainDb ?? 6,
44
+ smoothTransitions: config.output?.smoothTransitions ?? true
45
+ },
46
+ muteWhenSilent: config.muteWhenSilent ?? false
47
+ };
48
+ if (!sourceTrack || sourceTrack.kind !== "audio") {
49
+ throw new Error(
50
+ "createAudioPipeline requires a valid audio MediaStreamTrack"
51
+ );
52
+ }
53
+ if (sourceTrack.readyState === "ended") {
54
+ throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
55
+ }
56
+ const sourceStream = new MediaStream([sourceTrack]);
57
+ const sourceNode = context.createMediaStreamSource(sourceStream);
58
+ const emitter = mitt();
59
+ const vad = new LevelBasedVAD(fullConfig.speaking);
60
+ let lastState = { speaking: false, levelDb: -Infinity };
61
+ const nsHandle = await createDeepFilterNet3Node(
62
+ context,
63
+ fullConfig.noiseSuppression
64
+ );
65
+ const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
66
+ try {
67
+ const timestamp = context.currentTime * 1e3;
68
+ const nextState = vad.process(levelDb, timestamp);
69
+ const speakingChanged = nextState.speaking !== lastState.speaking;
70
+ const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
71
+ if (speakingChanged || levelChanged) {
72
+ lastState = nextState;
73
+ updateGain(nextState);
74
+ emitter.emit("speakingChange", nextState);
75
+ }
76
+ } catch (error) {
77
+ const err = error instanceof Error ? error : new Error(String(error));
78
+ emitter.emit("error", err);
79
+ }
80
+ });
81
+ const splitter = context.createGain();
82
+ sourceNode.connect(nsHandle.node);
83
+ nsHandle.node.connect(splitter);
84
+ splitter.connect(levelHandle.node);
85
+ const gainNode = context.createGain();
86
+ gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
87
+ splitter.connect(gainNode);
88
+ const destination = context.createMediaStreamDestination();
89
+ gainNode.connect(destination);
90
+ function updateGain(state) {
91
+ const {
92
+ speechGain = 1,
93
+ silenceGain = 0,
94
+ gainRampTime = 0.015,
95
+ smoothTransitions = true,
96
+ maxGainDb = 6
97
+ } = fullConfig.output ?? {};
98
+ const maxGainLinear = Math.pow(10, maxGainDb / 20);
99
+ const limitedSpeechGain = Math.min(speechGain ?? 1, maxGainLinear);
100
+ const target = state.speaking ? limitedSpeechGain : silenceGain ?? 0;
101
+ const now = context.currentTime;
102
+ gainNode.gain.cancelScheduledValues(now);
103
+ gainNode.gain.setValueAtTime(gainNode.gain.value, now);
104
+ if (smoothTransitions) {
105
+ gainNode.gain.setTargetAtTime(target, now, gainRampTime / 3);
106
+ } else {
107
+ gainNode.gain.setValueAtTime(target, now);
108
+ }
109
+ }
110
+ const audioTracks = destination.stream.getAudioTracks();
111
+ if (audioTracks.length === 0) {
112
+ nsHandle.dispose();
113
+ levelHandle.dispose();
114
+ unregisterPipeline();
115
+ throw new Error("Failed to create processed audio track");
116
+ }
117
+ const processedTrack = audioTracks[0];
118
+ function dispose() {
119
+ try {
120
+ sourceNode.disconnect();
121
+ nsHandle.node.disconnect();
122
+ splitter.disconnect();
123
+ levelHandle.node.disconnect();
124
+ gainNode.disconnect();
125
+ destination.stream.getTracks().forEach((t) => t.stop());
126
+ levelHandle.dispose();
127
+ nsHandle.dispose();
128
+ } catch (error) {
129
+ console.error("Error during pipeline disposal", error);
130
+ } finally {
131
+ unregisterPipeline();
132
+ }
133
+ }
134
+ const handle = {
135
+ processedTrack,
136
+ events: emitter,
137
+ get state() {
138
+ return lastState;
139
+ },
140
+ setConfig: (next) => {
141
+ try {
142
+ if (next.speaking) {
143
+ vad.updateConfig(next.speaking);
144
+ fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
145
+ }
146
+ if (next.output) {
147
+ fullConfig.output = { ...fullConfig.output, ...next.output };
148
+ updateGain(lastState);
149
+ }
150
+ if (next.noiseSuppression) {
151
+ const ns = next.noiseSuppression;
152
+ fullConfig.noiseSuppression = {
153
+ ...fullConfig.noiseSuppression,
154
+ ...ns
155
+ };
156
+ if (typeof ns.noiseReductionLevel === "number") {
157
+ nsHandle.processor.setSuppressionLevel(ns.noiseReductionLevel);
158
+ }
159
+ if (typeof ns.enabled === "boolean") {
160
+ nsHandle.processor.setNoiseSuppressionEnabled(ns.enabled);
161
+ }
162
+ }
163
+ if (typeof next.muteWhenSilent === "boolean") {
164
+ fullConfig.muteWhenSilent = next.muteWhenSilent;
165
+ }
166
+ } catch (error) {
167
+ const err = error instanceof Error ? error : new Error(String(error));
168
+ emitter.emit("error", err);
169
+ }
170
+ },
171
+ dispose
172
+ };
173
+ return handle;
174
+ }
175
+
176
+ export {
177
+ createAudioPipeline
178
+ };