@tensamin/audio 0.1.15 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/README.md +52 -229
  2. package/dist/chunk-AQ5RVY33.mjs +74 -0
  3. package/dist/chunk-BSYE2MWZ.mjs +178 -0
  4. package/dist/chunk-DTIMONGP.mjs +92 -0
  5. package/dist/chunk-IS37FHDN.mjs +33 -0
  6. package/dist/chunk-JBGGED5Q.mjs +129 -0
  7. package/dist/chunk-QNQK6QFB.mjs +71 -0
  8. package/dist/context/audio-context.d.mts +0 -24
  9. package/dist/context/audio-context.d.ts +0 -24
  10. package/dist/index.d.mts +2 -8
  11. package/dist/index.d.ts +2 -8
  12. package/dist/index.js +403 -651
  13. package/dist/index.mjs +11 -43
  14. package/dist/livekit/integration.d.mts +5 -8
  15. package/dist/livekit/integration.d.ts +5 -8
  16. package/dist/livekit/integration.js +401 -598
  17. package/dist/livekit/integration.mjs +10 -8
  18. package/dist/noise-suppression/deepfilternet-node.d.mts +12 -0
  19. package/dist/noise-suppression/deepfilternet-node.d.ts +12 -0
  20. package/dist/noise-suppression/deepfilternet-node.js +57 -0
  21. package/dist/noise-suppression/deepfilternet-node.mjs +6 -0
  22. package/dist/pipeline/audio-pipeline.d.mts +2 -2
  23. package/dist/pipeline/audio-pipeline.d.ts +2 -2
  24. package/dist/pipeline/audio-pipeline.js +219 -529
  25. package/dist/pipeline/audio-pipeline.mjs +4 -5
  26. package/dist/pipeline/remote-audio-monitor.d.mts +12 -0
  27. package/dist/pipeline/remote-audio-monitor.d.ts +12 -0
  28. package/dist/pipeline/remote-audio-monitor.js +276 -0
  29. package/dist/pipeline/remote-audio-monitor.mjs +9 -0
  30. package/dist/types.d.mts +45 -246
  31. package/dist/types.d.ts +45 -246
  32. package/dist/vad/vad-node.d.mts +7 -9
  33. package/dist/vad/vad-node.d.ts +7 -9
  34. package/dist/vad/vad-node.js +47 -134
  35. package/dist/vad/vad-node.mjs +3 -3
  36. package/dist/vad/vad-state.d.mts +9 -11
  37. package/dist/vad/vad-state.d.ts +9 -11
  38. package/dist/vad/vad-state.js +50 -77
  39. package/dist/vad/vad-state.mjs +3 -3
  40. package/package.json +21 -21
  41. package/dist/chunk-GLKAWCEW.mjs +0 -158
  42. package/dist/chunk-KLBA2CPE.mjs +0 -101
  43. package/dist/chunk-QQFKHTCQ.mjs +0 -91
  44. package/dist/chunk-U26F3GJN.mjs +0 -47
  45. package/dist/chunk-WQVMSR7V.mjs +0 -310
  46. package/dist/chunk-XO6B3D4A.mjs +0 -67
  47. package/dist/extensibility/plugins.d.mts +0 -9
  48. package/dist/extensibility/plugins.d.ts +0 -9
  49. package/dist/extensibility/plugins.js +0 -298
  50. package/dist/extensibility/plugins.mjs +0 -14
  51. package/dist/noise-suppression/rnnoise-node.d.mts +0 -10
  52. package/dist/noise-suppression/rnnoise-node.d.ts +0 -10
  53. package/dist/noise-suppression/rnnoise-node.js +0 -101
  54. package/dist/noise-suppression/rnnoise-node.mjs +0 -6
package/README.md CHANGED
@@ -1,16 +1,17 @@
1
1
  # @tensamin/audio
2
2
 
3
- Audio processing library for the web with RNNoise-based noise suppression and Voice Activity Detection (VAD). Designed for voice communication applications with LiveKit integration support.
3
+ DeepFilterNet3-based noise suppression and realtime speaking detection for LiveKit.
4
4
 
5
5
  ## Features
6
6
 
7
- - Configurable Voice Activity Detection with energy-based algorithm
8
- - RNNoise noise suppression via `@sapphi-red/web-noise-suppressor`
9
- - Automatic audio gating based on voice detection
10
- - Runtime configuration updates
11
- - LiveKit `LocalAudioTrack` integration
12
- - Plugin system for custom audio processors
13
- - Optional dynamic range compression
7
+ - DeepFilterNet3 WASM noise suppression
8
+ - Realtime `speaking` boolean + dB level
9
+ - Automatic mute/unmute for LiveKit tracks
10
+ - Simple min/max dB speaking thresholds
11
+
12
+ > [Noise suppression is provided via the `deepfilternet3-noise-filter` package.](https://www.npmjs.com/package/deepfilternet3-noise-filter)
13
+ > [That package is based on DeepFilterNet by Rikorose.](https://github.com/Rikorose/DeepFilterNet)
14
+
14
15
 
15
16
  ## Installation
16
17
 
@@ -18,259 +19,81 @@ Audio processing library for the web with RNNoise-based noise suppression and Vo
18
19
  npm install @tensamin/audio livekit-client
19
20
  ```
20
21
 
21
- ## Requirements
22
-
23
- For noise suppression, the following files must be provided:
24
-
25
- - `rnnoise.wasm`
26
- - `rnnoise_simd.wasm`
27
- - `worklet.js`
28
-
29
- Available at: `https://unpkg.com/@sapphi-red/web-noise-suppressor@0.3.5/dist/`
30
-
31
- Place these files in a publicly accessible directory (e.g., `public/audio-processor/`).
32
-
33
- ## Usage
34
-
35
- ### Basic Example
36
-
37
- ```ts
38
- import { createAudioPipeline } from "@tensamin/audio";
39
-
40
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
41
- const track = stream.getAudioTracks()[0];
42
-
43
- const pipeline = await createAudioPipeline(track, {
44
- noiseSuppression: {
45
- enabled: true,
46
- wasmUrl: "/audio-processor/rnnoise.wasm",
47
- simdUrl: "/audio-processor/rnnoise_simd.wasm",
48
- workletUrl: "/audio-processor/worklet.js",
49
- },
50
- vad: { enabled: true },
51
- });
52
-
53
- const processedStream = new MediaStream([pipeline.processedTrack]);
54
- ```
55
-
56
- ### LiveKit Integration
22
+ ## Quick Start (LiveKit)
57
23
 
58
24
  ```ts
59
- import { attachProcessingToTrack } from "@tensamin/audio";
60
25
  import { LocalAudioTrack } from "livekit-client";
26
+ import { attachSpeakingDetectionToTrack } from "@tensamin/audio";
61
27
 
62
28
  const localTrack = await LocalAudioTrack.create();
63
29
 
64
- const pipeline = await attachProcessingToTrack(localTrack, {
30
+ const controller = await attachSpeakingDetectionToTrack(localTrack, {
31
+ speaking: {
32
+ minDb: -60,
33
+ maxDb: -20,
34
+ },
65
35
  noiseSuppression: {
66
36
  enabled: true,
67
- wasmUrl: "/audio-processor/rnnoise.wasm",
68
- simdUrl: "/audio-processor/rnnoise_simd.wasm",
69
- workletUrl: "/audio-processor/worklet.js",
70
37
  },
71
- vad: { enabled: true },
72
- livekit: { manageTrackMute: true },
38
+ muteWhenSilent: true,
73
39
  });
74
40
 
75
- await room.localParticipant.publishTrack(localTrack);
76
- ```
77
-
78
- ### Monitoring VAD State
79
-
80
- ```ts
81
- pipeline.events.on("vadChange", (state) => {
82
- console.log("Speaking:", state.isSpeaking);
83
- console.log("Probability:", state.probability);
84
- console.log("State:", state.state);
41
+ controller.onChange((state) => {
42
+ console.log("speaking", state.speaking);
43
+ console.log("levelDb", state.levelDb);
85
44
  });
86
- ```
87
-
88
- ## Configuration
89
-
90
- ### Voice Activity Detection
91
45
 
92
- ```ts
93
- vad: {
94
- enabled: boolean;
95
- startThreshold: number; // Default: 0.6 (range: 0-1)
96
- stopThreshold: number; // Default: 0.45 (range: 0-1)
97
- hangoverMs: number; // Default: 400
98
- preRollMs: number; // Default: 250
99
- minSpeechDurationMs: number; // Default: 100
100
- minSilenceDurationMs: number; // Default: 150
101
- energyVad?: {
102
- smoothing: number; // Default: 0.95
103
- initialNoiseFloor: number; // Default: 0.001
104
- minSNR: number; // Default: 8.0 (dB)
105
- snrRange: number; // Default: 12.0 (dB)
106
- minEnergy: number; // Default: 0.01
107
- };
108
- }
46
+ await room.localParticipant.publishTrack(localTrack);
109
47
  ```
110
48
 
111
- **Threshold Parameters:**
112
-
113
- - `startThreshold`: Probability threshold to unmute audio (Default: 0.8, ~18dB SNR)
114
- - `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
115
- - `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
116
- - `preRollMs`: Audio buffer duration before speech onset
117
- - `minSpeechDurationMs`: Minimum duration to consider as valid speech (Default: 250ms)
118
- - `minSilenceDurationMs`: Minimum silence duration between speech segments
119
-
120
- **Energy VAD Parameters:**
49
+ ## Configuration
121
50
 
122
- - `smoothing`: Energy calculation smoothing factor (0-1)
123
- - `minSNR`: Minimum signal-to-noise ratio in dB for speech detection
124
- - `snrRange`: Range in dB for probability scaling from minSNR
125
- - `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.01, ~-40dB)
51
+ All options are passed via `LivekitSpeakingOptions` to `attachSpeakingDetectionToTrack`.
126
52
 
127
- ### Output Control
53
+ ### Noise suppression (DeepFilterNet3)
128
54
 
129
55
  ```ts
130
- output: {
131
- speechGain: number; // Default: 1.0
132
- silenceGain: number; // Default: 0.0
133
- gainRampTime: number; // Default: 0.015 (seconds)
134
- smoothTransitions: boolean; // Default: true
135
- maxGainDb: number; // Default: 6.0
136
- enableCompression: boolean; // Default: false
137
- compression?: {
138
- threshold: number; // Default: -24.0 (dB)
139
- ratio: number; // Default: 3.0
140
- attack: number; // Default: 0.003 (seconds)
141
- release: number; // Default: 0.05 (seconds)
56
+ noiseSuppression: {
57
+ enabled?: boolean; // default: true
58
+ noiseReductionLevel?: number; // 0-100, default: 60
59
+ assetConfig?: {
60
+ cdnUrl?: string;
142
61
  };
143
62
  }
144
63
  ```
145
64
 
146
- **Gain Parameters:**
147
-
148
- - `speechGain`: Gain multiplier when speaking (1.0 = unity)
149
- - `silenceGain`: Gain multiplier when silent (0.0 = mute)
150
- - `gainRampTime`: Transition duration for gain changes
151
- - `maxGainDb`: Maximum gain limit to prevent clipping
152
-
153
- **Compression Parameters:**
154
-
155
- - `threshold`: Level above which compression is applied
156
- - `ratio`: Compression ratio (e.g., 3.0 = 3:1)
157
- - `attack`: Time to reach full compression
158
- - `release`: Time to release compression
159
-
160
- ### Runtime Configuration Updates
161
-
162
- ```ts
163
- pipeline.setConfig({
164
- vad: {
165
- startThreshold: 0.7,
166
- stopThreshold: 0.55,
167
- },
168
- output: {
169
- speechGain: 1.3,
170
- },
171
- });
172
- ```
173
-
174
- ## Configuration Examples
175
-
176
- ### Noisy Environment
177
-
178
- ```ts
179
- {
180
- vad: {
181
- startThreshold: 0.7,
182
- stopThreshold: 0.55,
183
- minSpeechDurationMs: 150,
184
- energyVad: { minSNR: 3.0 }
185
- }
186
- }
187
- ```
188
-
189
- ### Quiet Speaker
190
-
191
- ```ts
192
- {
193
- vad: {
194
- startThreshold: 0.4,
195
- stopThreshold: 0.25,
196
- energyVad: { minSNR: 1.5 }
197
- },
198
- output: {
199
- speechGain: 1.5
200
- }
201
- }
202
- ```
203
-
204
- ### Natural Conversation
65
+ ### Speaking detection (dB-based)
205
66
 
206
67
  ```ts
207
- {
208
- vad: {
209
- startThreshold: 0.5,
210
- stopThreshold: 0.3,
211
- hangoverMs: 600,
212
- },
213
- output: {
214
- silenceGain: 0.2
215
- }
68
+ speaking: {
69
+ minDb: number; // e.g. -60
70
+ maxDb: number; // e.g. -20
71
+ speakOnRatio?: number; // default: 0.6
72
+ speakOffRatio?: number; // default: 0.3
73
+ hangoverMs?: number; // default: 350
74
+ attackMs?: number; // default: 50
75
+ releaseMs?: number; // default: 120
216
76
  }
217
77
  ```
218
78
 
219
- ## API Reference
220
-
221
- ### `createAudioPipeline(track, config)`
79
+ `minDb` / `maxDb` define the dynamic range used for level normalization. `speakOnRatio` and `speakOffRatio` (0–1) control when speech starts/stops within that range.
222
80
 
223
- Creates an audio processing pipeline from a MediaStreamTrack.
81
+ ### Output gain control
224
82
 
225
- **Parameters:**
226
-
227
- - `track`: MediaStreamTrack - Source audio track
228
- - `config`: AudioProcessingConfig - Configuration object
229
-
230
- **Returns:** `Promise<AudioPipelineHandle>`
231
-
232
- ### AudioPipelineHandle
233
-
234
- ```ts
235
- interface AudioPipelineHandle {
236
- processedTrack: MediaStreamTrack;
237
- events: Emitter<AudioPipelineEvents>;
238
- state: VADState;
239
- setConfig(config: Partial<AudioProcessingConfig>): void;
240
- dispose(): void;
83
+ ````ts
84
+ output: {
85
+ speechGain?: number; // default: 1.0
86
+ silenceGain?: number; // default: 0.0
87
+ gainRampTime?: number; // default: 0.015 (s)
88
+ maxGainDb?: number; // default: 6.0
89
+ smoothTransitions?: boolean;// default: true
241
90
  }
242
- ```
91
+ ``+
243
92
 
244
- ### AudioPipelineEvents
93
+ ### LiveKit mute handling
245
94
 
246
95
  ```ts
247
- type AudioPipelineEvents = {
248
- vadChange: VADState;
249
- error: Error;
250
- };
251
- ```
252
-
253
- ### VADState
254
-
255
- ```ts
256
- interface VADState {
257
- isSpeaking: boolean;
258
- probability: number;
259
- state: "silent" | "speech_starting" | "speaking" | "speech_ending";
260
- }
261
- ```
262
-
263
- ## Default Values
96
+ muteWhenSilent?: boolean; // default: false
97
+ ````
264
98
 
265
- | Parameter | Default | Description |
266
- | ---------------------- | ------- | -------------------------------- |
267
- | `startThreshold` | 0.6 | Unmute at 60% confidence |
268
- | `stopThreshold` | 0.45 | Mute below 45% confidence |
269
- | `hangoverMs` | 400 | Wait 400ms before muting |
270
- | `preRollMs` | 250 | Buffer 250ms before speech |
271
- | `minSpeechDurationMs` | 100 | Minimum valid speech duration |
272
- | `minSilenceDurationMs` | 150 | Minimum silence between speech |
273
- | `silenceGain` | 0.0 | Complete mute when silent |
274
- | `speechGain` | 1.0 | Unity gain when speaking |
275
- | `minSNR` | 2.0 | Voice must be 2x noise floor |
276
- | `snrRange` | 8.0 | Probability scales over SNR 2-10 |
99
+ When `muteWhenSilent` is `true`, the library automatically calls `track.mute()` when silence is detected and `track.unmute()` when speech resumes (only if it muted the track itself).
@@ -0,0 +1,74 @@
1
+ // src/vad/vad-state.ts
2
+ var LevelBasedVAD = class {
3
+ config;
4
+ speaking = false;
5
+ pendingSpeechSince = null;
6
+ pendingSilenceSince = null;
7
+ constructor(config) {
8
+ this.config = {
9
+ minDb: config.minDb,
10
+ maxDb: config.maxDb,
11
+ speakOnRatio: config.speakOnRatio ?? 0.6,
12
+ speakOffRatio: config.speakOffRatio ?? 0.3,
13
+ hangoverMs: config.hangoverMs ?? 350,
14
+ attackMs: config.attackMs ?? 50,
15
+ releaseMs: config.releaseMs ?? 120
16
+ };
17
+ }
18
+ updateConfig(config) {
19
+ this.config = {
20
+ ...this.config,
21
+ ...config,
22
+ speakOnRatio: config.speakOnRatio ?? this.config.speakOnRatio,
23
+ speakOffRatio: config.speakOffRatio ?? this.config.speakOffRatio,
24
+ hangoverMs: config.hangoverMs ?? this.config.hangoverMs,
25
+ attackMs: config.attackMs ?? this.config.attackMs,
26
+ releaseMs: config.releaseMs ?? this.config.releaseMs
27
+ };
28
+ }
29
+ process(levelDb, timestampMs) {
30
+ const {
31
+ minDb,
32
+ maxDb,
33
+ speakOnRatio,
34
+ speakOffRatio,
35
+ hangoverMs,
36
+ attackMs,
37
+ releaseMs
38
+ } = this.config;
39
+ const clamped = Math.min(maxDb, Math.max(minDb, levelDb));
40
+ const norm = (clamped - minDb) / Math.max(1, maxDb - minDb);
41
+ if (!this.speaking) {
42
+ if (norm >= speakOnRatio) {
43
+ this.pendingSpeechSince = this.pendingSpeechSince ?? timestampMs;
44
+ if (timestampMs - this.pendingSpeechSince >= attackMs) {
45
+ this.speaking = true;
46
+ this.pendingSpeechSince = null;
47
+ this.pendingSilenceSince = null;
48
+ }
49
+ } else {
50
+ this.pendingSpeechSince = null;
51
+ }
52
+ } else {
53
+ if (norm <= speakOffRatio) {
54
+ this.pendingSilenceSince = this.pendingSilenceSince ?? timestampMs;
55
+ const releaseWindow = Math.max(releaseMs, hangoverMs);
56
+ if (timestampMs - this.pendingSilenceSince >= releaseWindow) {
57
+ this.speaking = false;
58
+ this.pendingSilenceSince = null;
59
+ this.pendingSpeechSince = null;
60
+ }
61
+ } else {
62
+ this.pendingSilenceSince = null;
63
+ }
64
+ }
65
+ return {
66
+ speaking: this.speaking,
67
+ levelDb: clamped
68
+ };
69
+ }
70
+ };
71
+
72
+ export {
73
+ LevelBasedVAD
74
+ };
@@ -0,0 +1,178 @@
1
+ import {
2
+ createDeepFilterNet3Node
3
+ } from "./chunk-IS37FHDN.mjs";
4
+ import {
5
+ LevelBasedVAD
6
+ } from "./chunk-AQ5RVY33.mjs";
7
+ import {
8
+ getAudioContext,
9
+ registerPipeline,
10
+ unregisterPipeline
11
+ } from "./chunk-OZ7KMC4S.mjs";
12
+ import {
13
+ createLevelDetectorNode
14
+ } from "./chunk-QNQK6QFB.mjs";
15
+
16
+ // src/pipeline/audio-pipeline.ts
17
+ import mitt from "mitt";
18
+ async function createAudioPipeline(sourceTrack, config = {}) {
19
+ const context = getAudioContext();
20
+ registerPipeline();
21
+ const nsConfig = {
22
+ enabled: config.noiseSuppression?.enabled ?? true,
23
+ noiseReductionLevel: config.noiseSuppression?.noiseReductionLevel ?? 60
24
+ };
25
+ if (config.noiseSuppression?.assetConfig) {
26
+ nsConfig.assetConfig = config.noiseSuppression.assetConfig;
27
+ }
28
+ const fullConfig = {
29
+ noiseSuppression: nsConfig,
30
+ speaking: {
31
+ minDb: config.speaking?.minDb ?? -60,
32
+ maxDb: config.speaking?.maxDb ?? -20,
33
+ speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
34
+ speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
35
+ hangoverMs: config.speaking?.hangoverMs ?? 350,
36
+ attackMs: config.speaking?.attackMs ?? 50,
37
+ releaseMs: config.speaking?.releaseMs ?? 120
38
+ },
39
+ output: {
40
+ speechGain: config.output?.speechGain ?? 1,
41
+ silenceGain: config.output?.silenceGain ?? 0,
42
+ gainRampTime: config.output?.gainRampTime ?? 0.015,
43
+ maxGainDb: config.output?.maxGainDb ?? 6,
44
+ smoothTransitions: config.output?.smoothTransitions ?? true
45
+ },
46
+ muteWhenSilent: config.muteWhenSilent ?? false
47
+ };
48
+ if (!sourceTrack || sourceTrack.kind !== "audio") {
49
+ throw new Error(
50
+ "createAudioPipeline requires a valid audio MediaStreamTrack"
51
+ );
52
+ }
53
+ if (sourceTrack.readyState === "ended") {
54
+ throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
55
+ }
56
+ const sourceStream = new MediaStream([sourceTrack]);
57
+ const sourceNode = context.createMediaStreamSource(sourceStream);
58
+ const emitter = mitt();
59
+ const vad = new LevelBasedVAD(fullConfig.speaking);
60
+ let lastState = { speaking: false, levelDb: -Infinity };
61
+ const nsHandle = await createDeepFilterNet3Node(
62
+ context,
63
+ fullConfig.noiseSuppression
64
+ );
65
+ const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
66
+ try {
67
+ const timestamp = context.currentTime * 1e3;
68
+ const nextState = vad.process(levelDb, timestamp);
69
+ const speakingChanged = nextState.speaking !== lastState.speaking;
70
+ const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
71
+ if (speakingChanged || levelChanged) {
72
+ lastState = nextState;
73
+ updateGain(nextState);
74
+ emitter.emit("speakingChange", nextState);
75
+ }
76
+ } catch (error) {
77
+ const err = error instanceof Error ? error : new Error(String(error));
78
+ emitter.emit("error", err);
79
+ }
80
+ });
81
+ const splitter = context.createGain();
82
+ sourceNode.connect(nsHandle.node);
83
+ nsHandle.node.connect(splitter);
84
+ splitter.connect(levelHandle.node);
85
+ const gainNode = context.createGain();
86
+ gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
87
+ splitter.connect(gainNode);
88
+ const destination = context.createMediaStreamDestination();
89
+ gainNode.connect(destination);
90
+ function updateGain(state) {
91
+ const {
92
+ speechGain = 1,
93
+ silenceGain = 0,
94
+ gainRampTime = 0.015,
95
+ smoothTransitions = true,
96
+ maxGainDb = 6
97
+ } = fullConfig.output ?? {};
98
+ const maxGainLinear = Math.pow(10, maxGainDb / 20);
99
+ const limitedSpeechGain = Math.min(speechGain ?? 1, maxGainLinear);
100
+ const target = state.speaking ? limitedSpeechGain : silenceGain ?? 0;
101
+ const now = context.currentTime;
102
+ gainNode.gain.cancelScheduledValues(now);
103
+ gainNode.gain.setValueAtTime(gainNode.gain.value, now);
104
+ if (smoothTransitions) {
105
+ gainNode.gain.setTargetAtTime(target, now, gainRampTime / 3);
106
+ } else {
107
+ gainNode.gain.setValueAtTime(target, now);
108
+ }
109
+ }
110
+ const audioTracks = destination.stream.getAudioTracks();
111
+ if (audioTracks.length === 0) {
112
+ nsHandle.dispose();
113
+ levelHandle.dispose();
114
+ unregisterPipeline();
115
+ throw new Error("Failed to create processed audio track");
116
+ }
117
+ const processedTrack = audioTracks[0];
118
+ function dispose() {
119
+ try {
120
+ sourceNode.disconnect();
121
+ nsHandle.node.disconnect();
122
+ splitter.disconnect();
123
+ levelHandle.node.disconnect();
124
+ gainNode.disconnect();
125
+ destination.stream.getTracks().forEach((t) => t.stop());
126
+ levelHandle.dispose();
127
+ nsHandle.dispose();
128
+ } catch (error) {
129
+ console.error("Error during pipeline disposal", error);
130
+ } finally {
131
+ unregisterPipeline();
132
+ }
133
+ }
134
+ const handle = {
135
+ processedTrack,
136
+ events: emitter,
137
+ get state() {
138
+ return lastState;
139
+ },
140
+ setConfig: (next) => {
141
+ try {
142
+ if (next.speaking) {
143
+ vad.updateConfig(next.speaking);
144
+ fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
145
+ }
146
+ if (next.output) {
147
+ fullConfig.output = { ...fullConfig.output, ...next.output };
148
+ updateGain(lastState);
149
+ }
150
+ if (next.noiseSuppression) {
151
+ const ns = next.noiseSuppression;
152
+ fullConfig.noiseSuppression = {
153
+ ...fullConfig.noiseSuppression,
154
+ ...ns
155
+ };
156
+ if (typeof ns.noiseReductionLevel === "number") {
157
+ nsHandle.processor.setSuppressionLevel(ns.noiseReductionLevel);
158
+ }
159
+ if (typeof ns.enabled === "boolean") {
160
+ nsHandle.processor.setNoiseSuppressionEnabled(ns.enabled);
161
+ }
162
+ }
163
+ if (typeof next.muteWhenSilent === "boolean") {
164
+ fullConfig.muteWhenSilent = next.muteWhenSilent;
165
+ }
166
+ } catch (error) {
167
+ const err = error instanceof Error ? error : new Error(String(error));
168
+ emitter.emit("error", err);
169
+ }
170
+ },
171
+ dispose
172
+ };
173
+ return handle;
174
+ }
175
+
176
+ export {
177
+ createAudioPipeline
178
+ };
@@ -0,0 +1,92 @@
1
+ import {
2
+ LevelBasedVAD
3
+ } from "./chunk-AQ5RVY33.mjs";
4
+ import {
5
+ getAudioContext,
6
+ registerPipeline,
7
+ unregisterPipeline
8
+ } from "./chunk-OZ7KMC4S.mjs";
9
+ import {
10
+ createLevelDetectorNode
11
+ } from "./chunk-QNQK6QFB.mjs";
12
+
13
+ // src/pipeline/remote-audio-monitor.ts
14
+ import mitt from "mitt";
15
+ async function createRemoteAudioMonitor(sourceTrack, config = {}) {
16
+ const context = getAudioContext();
17
+ registerPipeline();
18
+ const fullConfig = {
19
+ speaking: {
20
+ minDb: config.speaking?.minDb ?? -60,
21
+ maxDb: config.speaking?.maxDb ?? -20,
22
+ speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
23
+ speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
24
+ hangoverMs: config.speaking?.hangoverMs ?? 350,
25
+ attackMs: config.speaking?.attackMs ?? 50,
26
+ releaseMs: config.speaking?.releaseMs ?? 120
27
+ }
28
+ };
29
+ if (!sourceTrack || sourceTrack.kind !== "audio") {
30
+ throw new Error(
31
+ "createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
32
+ );
33
+ }
34
+ if (sourceTrack.readyState === "ended") {
35
+ throw new Error("Cannot create monitor from an ended MediaStreamTrack");
36
+ }
37
+ const sourceStream = new MediaStream([sourceTrack]);
38
+ const sourceNode = context.createMediaStreamSource(sourceStream);
39
+ const emitter = mitt();
40
+ const vad = new LevelBasedVAD(fullConfig.speaking);
41
+ let lastState = { speaking: false, levelDb: -Infinity };
42
+ const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
43
+ try {
44
+ const timestamp = context.currentTime * 1e3;
45
+ const nextState = vad.process(levelDb, timestamp);
46
+ const speakingChanged = nextState.speaking !== lastState.speaking;
47
+ const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
48
+ if (speakingChanged || levelChanged) {
49
+ lastState = nextState;
50
+ emitter.emit("speakingChange", nextState);
51
+ }
52
+ } catch (error) {
53
+ const err = error instanceof Error ? error : new Error(String(error));
54
+ emitter.emit("error", err);
55
+ }
56
+ });
57
+ sourceNode.connect(levelHandle.node);
58
+ function dispose() {
59
+ try {
60
+ sourceNode.disconnect();
61
+ levelHandle.node.disconnect();
62
+ levelHandle.dispose();
63
+ } catch (error) {
64
+ console.error("Error during remote monitor disposal", error);
65
+ } finally {
66
+ unregisterPipeline();
67
+ }
68
+ }
69
+ const handle = {
70
+ events: emitter,
71
+ get state() {
72
+ return lastState;
73
+ },
74
+ setConfig: (next) => {
75
+ try {
76
+ if (next.speaking) {
77
+ vad.updateConfig(next.speaking);
78
+ fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
79
+ }
80
+ } catch (error) {
81
+ const err = error instanceof Error ? error : new Error(String(error));
82
+ emitter.emit("error", err);
83
+ }
84
+ },
85
+ dispose
86
+ };
87
+ return handle;
88
+ }
89
+
90
+ export {
91
+ createRemoteAudioMonitor
92
+ };