@tensamin/audio 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +24 -2
  2. package/dist/chunk-FS635GMR.mjs +47 -0
  3. package/dist/chunk-HFSKQ33X.mjs +38 -0
  4. package/{src/vad/vad-state.ts → dist/chunk-JJASCVEW.mjs} +21 -33
  5. package/dist/chunk-OZ7KMC4S.mjs +46 -0
  6. package/dist/chunk-QU7E5HBA.mjs +106 -0
  7. package/dist/chunk-SDTOKWM2.mjs +39 -0
  8. package/{src/vad/vad-node.ts → dist/chunk-UMU2KIB6.mjs} +10 -20
  9. package/dist/chunk-WBQAMGXK.mjs +0 -0
  10. package/dist/context/audio-context.d.mts +32 -0
  11. package/dist/context/audio-context.d.ts +32 -0
  12. package/dist/context/audio-context.js +75 -0
  13. package/dist/context/audio-context.mjs +16 -0
  14. package/dist/extensibility/plugins.d.mts +9 -0
  15. package/dist/extensibility/plugins.d.ts +9 -0
  16. package/dist/extensibility/plugins.js +180 -0
  17. package/dist/extensibility/plugins.mjs +14 -0
  18. package/dist/index.d.mts +10 -0
  19. package/dist/index.d.ts +10 -0
  20. package/dist/index.js +419 -0
  21. package/dist/index.mjs +47 -0
  22. package/dist/livekit/integration.d.mts +11 -0
  23. package/dist/livekit/integration.d.ts +11 -0
  24. package/dist/livekit/integration.js +368 -0
  25. package/dist/livekit/integration.mjs +12 -0
  26. package/dist/noise-suppression/rnnoise-node.d.mts +10 -0
  27. package/dist/noise-suppression/rnnoise-node.d.ts +10 -0
  28. package/dist/noise-suppression/rnnoise-node.js +73 -0
  29. package/dist/noise-suppression/rnnoise-node.mjs +6 -0
  30. package/dist/pipeline/audio-pipeline.d.mts +6 -0
  31. package/dist/pipeline/audio-pipeline.d.ts +6 -0
  32. package/dist/pipeline/audio-pipeline.js +335 -0
  33. package/dist/pipeline/audio-pipeline.mjs +11 -0
  34. package/dist/types.d.mts +155 -0
  35. package/dist/types.d.ts +155 -0
  36. package/dist/types.js +18 -0
  37. package/dist/types.mjs +1 -0
  38. package/dist/vad/vad-node.d.mts +9 -0
  39. package/dist/vad/vad-node.d.ts +9 -0
  40. package/dist/vad/vad-node.js +92 -0
  41. package/dist/vad/vad-node.mjs +6 -0
  42. package/dist/vad/vad-state.d.mts +15 -0
  43. package/dist/vad/vad-state.d.ts +15 -0
  44. package/dist/vad/vad-state.js +83 -0
  45. package/dist/vad/vad-state.mjs +6 -0
  46. package/package.json +11 -14
  47. package/.github/workflows/publish.yml +0 -23
  48. package/src/context/audio-context.ts +0 -69
  49. package/src/extensibility/plugins.ts +0 -45
  50. package/src/index.ts +0 -8
  51. package/src/livekit/integration.ts +0 -61
  52. package/src/noise-suppression/rnnoise-node.ts +0 -62
  53. package/src/pipeline/audio-pipeline.ts +0 -154
  54. package/src/types.ts +0 -167
  55. package/tsconfig.json +0 -29
@@ -0,0 +1,335 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/pipeline/audio-pipeline.ts
31
+ var audio_pipeline_exports = {};
32
+ __export(audio_pipeline_exports, {
33
+ createAudioPipeline: () => createAudioPipeline
34
+ });
35
+ module.exports = __toCommonJS(audio_pipeline_exports);
36
+ var import_mitt = __toESM(require("mitt"));
37
+
38
+ // src/context/audio-context.ts
39
+ var sharedContext = null;
40
+ var activePipelines = 0;
41
+ function getAudioContext(options) {
42
+ if (typeof window === "undefined" || typeof AudioContext === "undefined") {
43
+ throw new Error(
44
+ "AudioContext is not supported in this environment (browser only)."
45
+ );
46
+ }
47
+ if (!sharedContext || sharedContext.state === "closed") {
48
+ sharedContext = new AudioContext(options);
49
+ }
50
+ return sharedContext;
51
+ }
52
+ function registerPipeline() {
53
+ activePipelines++;
54
+ }
55
+ function unregisterPipeline() {
56
+ activePipelines = Math.max(0, activePipelines - 1);
57
+ }
58
+
59
+ // src/noise-suppression/rnnoise-node.ts
60
+ var RNNoisePlugin = class {
61
+ name = "rnnoise-ns";
62
+ wasmBuffer = null;
63
+ async createNode(context, config) {
64
+ const { loadRnnoise, RnnoiseWorkletNode } = await import("@sapphi-red/web-noise-suppressor");
65
+ if (!config?.enabled) {
66
+ const pass = context.createGain();
67
+ return pass;
68
+ }
69
+ if (!config?.wasmUrl || !config?.simdUrl || !config?.workletUrl) {
70
+ throw new Error(
71
+ "RNNoisePlugin requires 'wasmUrl', 'simdUrl', and 'workletUrl' to be configured. Please download the assets and provide the URLs."
72
+ );
73
+ }
74
+ if (!this.wasmBuffer) {
75
+ this.wasmBuffer = await loadRnnoise({
76
+ url: config.wasmUrl,
77
+ simdUrl: config.simdUrl
78
+ });
79
+ }
80
+ const workletUrl = config.workletUrl;
81
+ try {
82
+ await context.audioWorklet.addModule(workletUrl);
83
+ } catch (e) {
84
+ console.warn("Failed to add RNNoise worklet module:", e);
85
+ }
86
+ const node = new RnnoiseWorkletNode(context, {
87
+ wasmBinary: this.wasmBuffer,
88
+ maxChannels: 1
89
+ // Mono for now
90
+ });
91
+ return node;
92
+ }
93
+ };
94
+
95
+ // src/vad/vad-node.ts
96
+ var energyVadWorkletCode = `
97
+ class EnergyVadProcessor extends AudioWorkletProcessor {
98
+ constructor() {
99
+ super();
100
+ this.smoothing = 0.95;
101
+ this.energy = 0;
102
+ this.noiseFloor = 0.001;
103
+ }
104
+
105
+ process(inputs, outputs, parameters) {
106
+ const input = inputs[0];
107
+ if (!input || !input.length) return true;
108
+ const channel = input[0];
109
+
110
+ // Calculate RMS
111
+ let sum = 0;
112
+ for (let i = 0; i < channel.length; i++) {
113
+ sum += channel[i] * channel[i];
114
+ }
115
+ const rms = Math.sqrt(sum / channel.length);
116
+
117
+ // Simple adaptive noise floor (very basic)
118
+ if (rms < this.noiseFloor) {
119
+ this.noiseFloor = this.noiseFloor * 0.99 + rms * 0.01;
120
+ } else {
121
+ this.noiseFloor = this.noiseFloor * 0.999 + rms * 0.001;
122
+ }
123
+
124
+ // Calculate "probability" based on SNR
125
+ // This is a heuristic mapping from energy to 0-1
126
+ const snr = rms / (this.noiseFloor + 1e-6);
127
+ const probability = Math.min(1, Math.max(0, (snr - 1.5) / 10)); // Arbitrary scaling
128
+
129
+ this.port.postMessage({ probability });
130
+
131
+ return true;
132
+ }
133
+ }
134
+ registerProcessor('energy-vad-processor', EnergyVadProcessor);
135
+ `;
136
+ var EnergyVADPlugin = class {
137
+ name = "energy-vad";
138
+ async createNode(context, config, onDecision) {
139
+ const blob = new Blob([energyVadWorkletCode], {
140
+ type: "application/javascript"
141
+ });
142
+ const url = URL.createObjectURL(blob);
143
+ try {
144
+ await context.audioWorklet.addModule(url);
145
+ } catch (e) {
146
+ console.warn("Failed to add Energy VAD worklet:", e);
147
+ throw e;
148
+ } finally {
149
+ URL.revokeObjectURL(url);
150
+ }
151
+ const node = new AudioWorkletNode(context, "energy-vad-processor");
152
+ node.port.onmessage = (event) => {
153
+ const { probability } = event.data;
154
+ onDecision(probability);
155
+ };
156
+ return node;
157
+ }
158
+ };
159
+
160
+ // src/extensibility/plugins.ts
161
+ var nsPlugins = /* @__PURE__ */ new Map();
162
+ var vadPlugins = /* @__PURE__ */ new Map();
163
+ var defaultNs = new RNNoisePlugin();
164
+ nsPlugins.set(defaultNs.name, defaultNs);
165
+ var defaultVad = new EnergyVADPlugin();
166
+ vadPlugins.set(defaultVad.name, defaultVad);
167
+ function getNoiseSuppressionPlugin(name) {
168
+ if (!name) return defaultNs;
169
+ const plugin = nsPlugins.get(name);
170
+ if (!plugin) {
171
+ console.warn(
172
+ `Noise suppression plugin '${name}' not found, falling back to default.`
173
+ );
174
+ return defaultNs;
175
+ }
176
+ return plugin;
177
+ }
178
+ function getVADPlugin(name) {
179
+ if (!name) return defaultVad;
180
+ const plugin = vadPlugins.get(name);
181
+ if (!plugin) {
182
+ console.warn(`VAD plugin '${name}' not found, falling back to default.`);
183
+ return defaultVad;
184
+ }
185
+ return plugin;
186
+ }
187
+
188
+ // src/vad/vad-state.ts
189
+ var VADStateMachine = class {
190
+ config;
191
+ currentState = "silent";
192
+ lastSpeechTime = 0;
193
+ speechStartTime = 0;
194
+ frameDurationMs = 20;
195
+ // Assumed frame duration, updated by calls
196
+ constructor(config) {
197
+ this.config = {
198
+ enabled: config?.enabled ?? true,
199
+ pluginName: config?.pluginName ?? "energy-vad",
200
+ startThreshold: config?.startThreshold ?? 0.5,
201
+ stopThreshold: config?.stopThreshold ?? 0.4,
202
+ hangoverMs: config?.hangoverMs ?? 300,
203
+ preRollMs: config?.preRollMs ?? 200
204
+ };
205
+ }
206
+ updateConfig(config) {
207
+ this.config = { ...this.config, ...config };
208
+ }
209
+ processFrame(probability, timestamp) {
210
+ const { startThreshold, stopThreshold, hangoverMs } = this.config;
211
+ let newState = this.currentState;
212
+ if (this.currentState === "silent" || this.currentState === "speech_ending") {
213
+ if (probability >= startThreshold) {
214
+ newState = "speech_starting";
215
+ this.speechStartTime = timestamp;
216
+ this.lastSpeechTime = timestamp;
217
+ } else {
218
+ newState = "silent";
219
+ }
220
+ } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
221
+ if (probability >= stopThreshold) {
222
+ newState = "speaking";
223
+ this.lastSpeechTime = timestamp;
224
+ } else {
225
+ const timeSinceSpeech = timestamp - this.lastSpeechTime;
226
+ if (timeSinceSpeech < hangoverMs) {
227
+ newState = "speaking";
228
+ } else {
229
+ newState = "speech_ending";
230
+ }
231
+ }
232
+ }
233
+ if (newState === "speech_starting") newState = "speaking";
234
+ if (newState === "speech_ending") newState = "silent";
235
+ this.currentState = newState;
236
+ return {
237
+ isSpeaking: newState === "speaking",
238
+ probability,
239
+ state: newState
240
+ };
241
+ }
242
+ };
243
+
244
+ // src/pipeline/audio-pipeline.ts
245
+ async function createAudioPipeline(sourceTrack, config = {}) {
246
+ const context = getAudioContext();
247
+ registerPipeline();
248
+ const fullConfig = {
249
+ noiseSuppression: { enabled: true, ...config.noiseSuppression },
250
+ vad: { enabled: true, ...config.vad },
251
+ output: {
252
+ speechGain: 1,
253
+ silenceGain: 0,
254
+ gainRampTime: 0.02,
255
+ ...config.output
256
+ },
257
+ livekit: { manageTrackMute: false, ...config.livekit }
258
+ };
259
+ const sourceStream = new MediaStream([sourceTrack]);
260
+ const sourceNode = context.createMediaStreamSource(sourceStream);
261
+ const nsPlugin = getNoiseSuppressionPlugin(
262
+ fullConfig.noiseSuppression?.pluginName
263
+ );
264
+ const nsNode = await nsPlugin.createNode(
265
+ context,
266
+ fullConfig.noiseSuppression
267
+ );
268
+ const vadPlugin = getVADPlugin(fullConfig.vad?.pluginName);
269
+ const vadStateMachine = new VADStateMachine(fullConfig.vad);
270
+ const emitter = (0, import_mitt.default)();
271
+ const vadNode = await vadPlugin.createNode(
272
+ context,
273
+ fullConfig.vad,
274
+ (prob) => {
275
+ const timestamp = context.currentTime * 1e3;
276
+ const newState = vadStateMachine.processFrame(prob, timestamp);
277
+ if (newState.state !== lastVadState.state || Math.abs(newState.probability - lastVadState.probability) > 0.1) {
278
+ emitter.emit("vadChange", newState);
279
+ lastVadState = newState;
280
+ updateGain(newState);
281
+ }
282
+ }
283
+ );
284
+ let lastVadState = {
285
+ isSpeaking: false,
286
+ probability: 0,
287
+ state: "silent"
288
+ };
289
+ const splitter = context.createGain();
290
+ sourceNode.connect(nsNode);
291
+ nsNode.connect(splitter);
292
+ splitter.connect(vadNode);
293
+ const delayNode = context.createDelay(1);
294
+ const preRollSeconds = (fullConfig.vad?.preRollMs ?? 200) / 1e3;
295
+ delayNode.delayTime.value = preRollSeconds;
296
+ const gainNode = context.createGain();
297
+ gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
298
+ const destination = context.createMediaStreamDestination();
299
+ splitter.connect(delayNode);
300
+ delayNode.connect(gainNode);
301
+ gainNode.connect(destination);
302
+ function updateGain(state) {
303
+ const { speechGain, silenceGain, gainRampTime } = fullConfig.output;
304
+ const targetGain = state.isSpeaking ? speechGain ?? 1 : silenceGain ?? 0;
305
+ const now = context.currentTime;
306
+ gainNode.gain.setTargetAtTime(targetGain, now, gainRampTime ?? 0.02);
307
+ }
308
+ function dispose() {
309
+ sourceNode.disconnect();
310
+ nsNode.disconnect();
311
+ splitter.disconnect();
312
+ vadNode.disconnect();
313
+ delayNode.disconnect();
314
+ gainNode.disconnect();
315
+ destination.stream.getTracks().forEach((t) => t.stop());
316
+ unregisterPipeline();
317
+ }
318
+ return {
319
+ processedTrack: destination.stream.getAudioTracks()[0],
320
+ events: emitter,
321
+ get state() {
322
+ return lastVadState;
323
+ },
324
+ setConfig: (newConfig) => {
325
+ if (newConfig.vad) {
326
+ vadStateMachine.updateConfig(newConfig.vad);
327
+ }
328
+ },
329
+ dispose
330
+ };
331
+ }
332
+ // Annotate the CommonJS export names for ESM import in node:
333
+ 0 && (module.exports = {
334
+ createAudioPipeline
335
+ });
@@ -0,0 +1,11 @@
1
+ import {
2
+ createAudioPipeline
3
+ } from "../chunk-QU7E5HBA.mjs";
4
+ import "../chunk-JJASCVEW.mjs";
5
+ import "../chunk-OZ7KMC4S.mjs";
6
+ import "../chunk-FS635GMR.mjs";
7
+ import "../chunk-SDTOKWM2.mjs";
8
+ import "../chunk-UMU2KIB6.mjs";
9
+ export {
10
+ createAudioPipeline
11
+ };
@@ -0,0 +1,155 @@
1
+ import { Emitter } from 'mitt';
2
+
3
+ /**
4
+ * Configuration for the audio processing pipeline.
5
+ */
6
+ interface AudioProcessingConfig {
7
+ /**
8
+ * Noise suppression configuration.
9
+ */
10
+ noiseSuppression?: {
11
+ enabled: boolean;
12
+ /**
13
+ * Path or URL to the RNNoise WASM binary.
14
+ * REQUIRED if enabled.
15
+ */
16
+ wasmUrl?: string;
17
+ /**
18
+ * Path or URL to the RNNoise SIMD WASM binary.
19
+ * REQUIRED if enabled.
20
+ */
21
+ simdUrl?: string;
22
+ /**
23
+ * Path or URL to the RNNoise worklet script.
24
+ * REQUIRED if enabled.
25
+ */
26
+ workletUrl?: string;
27
+ /**
28
+ * Plugin name to use. Defaults to 'rnnoise-ns'.
29
+ */
30
+ pluginName?: string;
31
+ };
32
+ /**
33
+ * Voice Activity Detection (VAD) configuration.
34
+ */
35
+ vad?: {
36
+ enabled: boolean;
37
+ /**
38
+ * Plugin name to use. Defaults to 'rnnoise-vad' or 'energy-vad'.
39
+ */
40
+ pluginName?: string;
41
+ /**
42
+ * Probability threshold for speech onset (0-1).
43
+ * Default: 0.5
44
+ */
45
+ startThreshold?: number;
46
+ /**
47
+ * Probability threshold for speech offset (0-1).
48
+ * Default: 0.4
49
+ */
50
+ stopThreshold?: number;
51
+ /**
52
+ * Time in ms to wait after speech stops before considering it silent.
53
+ * Default: 300ms
54
+ */
55
+ hangoverMs?: number;
56
+ /**
57
+ * Time in ms of audio to buffer before speech onset to avoid cutting the start.
58
+ * Default: 200ms
59
+ */
60
+ preRollMs?: number;
61
+ };
62
+ /**
63
+ * Output gain and muting configuration.
64
+ */
65
+ output?: {
66
+ /**
67
+ * Gain to apply when speaking (0-1+). Default: 1.0
68
+ */
69
+ speechGain?: number;
70
+ /**
71
+ * Gain to apply when silent (0-1). Default: 0.0 (mute)
72
+ */
73
+ silenceGain?: number;
74
+ /**
75
+ * Time in seconds to ramp gain changes. Default: 0.02
76
+ */
77
+ gainRampTime?: number;
78
+ };
79
+ /**
80
+ * LiveKit integration configuration.
81
+ */
82
+ livekit?: {
83
+ /**
84
+ * Whether to call track.mute()/unmute() on the LocalAudioTrack based on VAD.
85
+ * This saves bandwidth but has more signaling overhead.
86
+ * Default: false (uses gain gating only)
87
+ */
88
+ manageTrackMute?: boolean;
89
+ };
90
+ }
91
+ /**
92
+ * Represents the state of Voice Activity Detection.
93
+ */
94
+ interface VADState {
95
+ /**
96
+ * Whether speech is currently detected (after hysteresis).
97
+ */
98
+ isSpeaking: boolean;
99
+ /**
100
+ * Raw probability of speech from the VAD model (0-1).
101
+ */
102
+ probability: number;
103
+ /**
104
+ * Current state enum.
105
+ */
106
+ state: "silent" | "speech_starting" | "speaking" | "speech_ending";
107
+ }
108
+ /**
109
+ * Events emitted by the audio pipeline.
110
+ */
111
+ type AudioPipelineEvents = {
112
+ vadChange: VADState;
113
+ error: Error;
114
+ };
115
+ /**
116
+ * Handle to a running audio processing pipeline.
117
+ */
118
+ interface AudioPipelineHandle {
119
+ /**
120
+ * The processed MediaStreamTrack.
121
+ */
122
+ readonly processedTrack: MediaStreamTrack;
123
+ /**
124
+ * Event emitter for VAD state and errors.
125
+ */
126
+ readonly events: Emitter<AudioPipelineEvents>;
127
+ /**
128
+ * Current VAD state.
129
+ */
130
+ readonly state: VADState;
131
+ /**
132
+ * Update configuration at runtime.
133
+ */
134
+ setConfig(config: Partial<AudioProcessingConfig>): void;
135
+ /**
136
+ * Stop processing and release resources.
137
+ */
138
+ dispose(): void;
139
+ }
140
+ /**
141
+ * Interface for a Noise Suppression Plugin.
142
+ */
143
+ interface NoiseSuppressionPlugin {
144
+ name: string;
145
+ createNode(context: AudioContext, config: AudioProcessingConfig["noiseSuppression"]): Promise<AudioNode>;
146
+ }
147
+ /**
148
+ * Interface for a VAD Plugin.
149
+ */
150
+ interface VADPlugin {
151
+ name: string;
152
+ createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
153
+ }
154
+
155
+ export type { AudioPipelineEvents, AudioPipelineHandle, AudioProcessingConfig, NoiseSuppressionPlugin, VADPlugin, VADState };
@@ -0,0 +1,155 @@
1
+ import { Emitter } from 'mitt';
2
+
3
+ /**
4
+ * Configuration for the audio processing pipeline.
5
+ */
6
+ interface AudioProcessingConfig {
7
+ /**
8
+ * Noise suppression configuration.
9
+ */
10
+ noiseSuppression?: {
11
+ enabled: boolean;
12
+ /**
13
+ * Path or URL to the RNNoise WASM binary.
14
+ * REQUIRED if enabled.
15
+ */
16
+ wasmUrl?: string;
17
+ /**
18
+ * Path or URL to the RNNoise SIMD WASM binary.
19
+ * REQUIRED if enabled.
20
+ */
21
+ simdUrl?: string;
22
+ /**
23
+ * Path or URL to the RNNoise worklet script.
24
+ * REQUIRED if enabled.
25
+ */
26
+ workletUrl?: string;
27
+ /**
28
+ * Plugin name to use. Defaults to 'rnnoise-ns'.
29
+ */
30
+ pluginName?: string;
31
+ };
32
+ /**
33
+ * Voice Activity Detection (VAD) configuration.
34
+ */
35
+ vad?: {
36
+ enabled: boolean;
37
+ /**
38
+ * Plugin name to use. Defaults to 'rnnoise-vad' or 'energy-vad'.
39
+ */
40
+ pluginName?: string;
41
+ /**
42
+ * Probability threshold for speech onset (0-1).
43
+ * Default: 0.5
44
+ */
45
+ startThreshold?: number;
46
+ /**
47
+ * Probability threshold for speech offset (0-1).
48
+ * Default: 0.4
49
+ */
50
+ stopThreshold?: number;
51
+ /**
52
+ * Time in ms to wait after speech stops before considering it silent.
53
+ * Default: 300ms
54
+ */
55
+ hangoverMs?: number;
56
+ /**
57
+ * Time in ms of audio to buffer before speech onset to avoid cutting the start.
58
+ * Default: 200ms
59
+ */
60
+ preRollMs?: number;
61
+ };
62
+ /**
63
+ * Output gain and muting configuration.
64
+ */
65
+ output?: {
66
+ /**
67
+ * Gain to apply when speaking (0-1+). Default: 1.0
68
+ */
69
+ speechGain?: number;
70
+ /**
71
+ * Gain to apply when silent (0-1). Default: 0.0 (mute)
72
+ */
73
+ silenceGain?: number;
74
+ /**
75
+ * Time in seconds to ramp gain changes. Default: 0.02
76
+ */
77
+ gainRampTime?: number;
78
+ };
79
+ /**
80
+ * LiveKit integration configuration.
81
+ */
82
+ livekit?: {
83
+ /**
84
+ * Whether to call track.mute()/unmute() on the LocalAudioTrack based on VAD.
85
+ * This saves bandwidth but has more signaling overhead.
86
+ * Default: false (uses gain gating only)
87
+ */
88
+ manageTrackMute?: boolean;
89
+ };
90
+ }
91
+ /**
92
+ * Represents the state of Voice Activity Detection.
93
+ */
94
+ interface VADState {
95
+ /**
96
+ * Whether speech is currently detected (after hysteresis).
97
+ */
98
+ isSpeaking: boolean;
99
+ /**
100
+ * Raw probability of speech from the VAD model (0-1).
101
+ */
102
+ probability: number;
103
+ /**
104
+ * Current state enum.
105
+ */
106
+ state: "silent" | "speech_starting" | "speaking" | "speech_ending";
107
+ }
108
+ /**
109
+ * Events emitted by the audio pipeline.
110
+ */
111
+ type AudioPipelineEvents = {
112
+ vadChange: VADState;
113
+ error: Error;
114
+ };
115
+ /**
116
+ * Handle to a running audio processing pipeline.
117
+ */
118
+ interface AudioPipelineHandle {
119
+ /**
120
+ * The processed MediaStreamTrack.
121
+ */
122
+ readonly processedTrack: MediaStreamTrack;
123
+ /**
124
+ * Event emitter for VAD state and errors.
125
+ */
126
+ readonly events: Emitter<AudioPipelineEvents>;
127
+ /**
128
+ * Current VAD state.
129
+ */
130
+ readonly state: VADState;
131
+ /**
132
+ * Update configuration at runtime.
133
+ */
134
+ setConfig(config: Partial<AudioProcessingConfig>): void;
135
+ /**
136
+ * Stop processing and release resources.
137
+ */
138
+ dispose(): void;
139
+ }
140
+ /**
141
+ * Interface for a Noise Suppression Plugin.
142
+ */
143
+ interface NoiseSuppressionPlugin {
144
+ name: string;
145
+ createNode(context: AudioContext, config: AudioProcessingConfig["noiseSuppression"]): Promise<AudioNode>;
146
+ }
147
+ /**
148
+ * Interface for a VAD Plugin.
149
+ */
150
+ interface VADPlugin {
151
+ name: string;
152
+ createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
153
+ }
154
+
155
+ export type { AudioPipelineEvents, AudioPipelineHandle, AudioProcessingConfig, NoiseSuppressionPlugin, VADPlugin, VADState };
package/dist/types.js ADDED
@@ -0,0 +1,18 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __copyProps = (to, from, except, desc) => {
7
+ if (from && typeof from === "object" || typeof from === "function") {
8
+ for (let key of __getOwnPropNames(from))
9
+ if (!__hasOwnProp.call(to, key) && key !== except)
10
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
11
+ }
12
+ return to;
13
+ };
14
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
15
+
16
+ // src/types.ts
17
+ var types_exports = {};
18
+ module.exports = __toCommonJS(types_exports);
package/dist/types.mjs ADDED
@@ -0,0 +1 @@
1
+ import "./chunk-WBQAMGXK.mjs";
@@ -0,0 +1,9 @@
1
+ import { VADPlugin, AudioProcessingConfig } from '../types.mjs';
2
+ import 'mitt';
3
+
4
+ declare class EnergyVADPlugin implements VADPlugin {
5
+ name: string;
6
+ createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
7
+ }
8
+
9
+ export { EnergyVADPlugin };
@@ -0,0 +1,9 @@
1
+ import { VADPlugin, AudioProcessingConfig } from '../types.js';
2
+ import 'mitt';
3
+
4
+ declare class EnergyVADPlugin implements VADPlugin {
5
+ name: string;
6
+ createNode(context: AudioContext, config: AudioProcessingConfig["vad"], onDecision: (probability: number) => void): Promise<AudioNode>;
7
+ }
8
+
9
+ export { EnergyVADPlugin };