npm - @tensamin/audio - Versions diffs - 0.1.3 → 0.1.5 - Mend

@tensamin/audio 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +217 -54
package/dist/{chunk-XMTQPMQ6.mjs → chunk-GVKCBKW6.mjs} +1 -1
package/dist/{chunk-6P2RDBW5.mjs → chunk-H5UKZU2Y.mjs} +1 -1
package/dist/chunk-N553RHTI.mjs +93 -0
package/dist/chunk-VEJXAEMM.mjs +136 -0
package/dist/{chunk-EXH2PNUE.mjs → chunk-XXTNAUYX.mjs} +133 -34
package/dist/extensibility/plugins.js +52 -14
package/dist/extensibility/plugins.mjs +2 -2
package/dist/index.js +225 -54
package/dist/index.mjs +5 -5
package/dist/livekit/integration.js +225 -54
package/dist/livekit/integration.mjs +5 -5
package/dist/pipeline/audio-pipeline.js +225 -54
package/dist/pipeline/audio-pipeline.mjs +4 -4
package/dist/types.d.mts +118 -10
package/dist/types.d.ts +118 -10
package/dist/vad/vad-node.d.mts +2 -0
package/dist/vad/vad-node.d.ts +2 -0
package/dist/vad/vad-node.js +52 -14
package/dist/vad/vad-node.mjs +1 -1
package/dist/vad/vad-state.d.mts +1 -0
package/dist/vad/vad-state.d.ts +1 -0
package/dist/vad/vad-state.js +42 -8
package/dist/vad/vad-state.mjs +1 -1
package/package.json +1 -1
package/dist/chunk-JJASCVEW.mjs +0 -59
package/dist/chunk-R5JVHKWA.mjs +0 -98

package/README.md CHANGED Viewed

@@ -1,90 +1,56 @@
 # @tensamin/audio
-A audio processing library for the web, featuring RNNoise-based noise suppression and robust Voice Activity Detection (VAD). Designed for seamless integration with LiveKit.
+Audio processing library for the web with RNNoise-based noise suppression and Voice Activity Detection (VAD). Designed for voice communication applications with LiveKit integration support.
 ## Features
-- **Noise Suppression**: Uses `@sapphi-red/web-noise-suppressor` (RNNoise) for high-quality noise reduction.
-- **Robust VAD**: Energy-based VAD with hysteresis, hangover, and pre-roll buffering to prevent cutting off speech onset.
-- **Intelligent Muting**: Automatically gates audio or mutes LiveKit tracks when silent.
-- **LiveKit Integration**: Good support for `LocalAudioTrack`.
-- **Extensible**: Plugin system for custom WASM/Worklet processors.
+- Configurable Voice Activity Detection with energy-based algorithm
+- RNNoise noise suppression via `@sapphi-red/web-noise-suppressor`
+- Automatic audio gating based on voice detection
+- Runtime configuration updates
+- LiveKit `LocalAudioTrack` integration
+- Plugin system for custom audio processors
+- Optional dynamic range compression
 ## Installation
 ```bash
 npm install @tensamin/audio livekit-client
-bun add @tensamin/audio livekit-client
-pnpm install @tensamin/audio livekit-client
 ```
-## Setup Assets
+## Requirements
-This library uses WASM and AudioWorklets for processing. **Asset setup is optional** - the pipeline can run in passthrough mode without them.
-### For Noise Suppression (Optional)
-If you want to enable noise suppression, download these files from `https://unpkg.com/@sapphi-red/web-noise-suppressor@0.3.5/dist/`:
+For noise suppression, the following files must be provided:
 - `rnnoise.wasm`
 - `rnnoise_simd.wasm`
-- `noise-suppressor-worklet.min.js`
+- `worklet.js`
-Place them in your project's public directory (e.g., `public/audio-processor/`).
+Available at: `https://unpkg.com/@sapphi-red/web-noise-suppressor@0.3.5/dist/`
-**Note:** The pipeline will automatically disable noise suppression if these URLs are not provided, and will use passthrough mode instead.
+Place these files in a publicly accessible directory (e.g., `public/audio-processor/`).
 ## Usage
-### Minimal Setup (Passthrough Mode)
-If you want to use the pipeline without noise suppression or VAD (e.g., for testing or when features are not needed), you can disable them:
+### Basic Example
 ```ts
 import { createAudioPipeline } from "@tensamin/audio";
-// Get a stream
 const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
 const track = stream.getAudioTracks()[0];
-// Create pipeline
-const pipeline = await createAudioPipeline(track, {
-  noiseSuppression: { enabled: false },
-  vad: { enabled: false },
-});
-// Use the processed track
-const processedStream = new MediaStream([pipeline.processedTrack]);
-```
-### Basic Usage (Raw MediaStream)
-```ts
-import { createAudioPipeline } from "@tensamin/audio";
-// Get a stream
-const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-const track = stream.getAudioTracks()[0];
-// Create pipeline
 const pipeline = await createAudioPipeline(track, {
   noiseSuppression: {
     enabled: true,
     wasmUrl: "/audio-processor/rnnoise.wasm",
     simdUrl: "/audio-processor/rnnoise_simd.wasm",
-    workletUrl: "/audio-processor/noise-suppressor-worklet.min.js",
+    workletUrl: "/audio-processor/worklet.js",
   },
   vad: { enabled: true },
 });
-// Use the processed track
 const processedStream = new MediaStream([pipeline.processedTrack]);
-// audioElement.srcObject = processedStream;
-// Listen to VAD events
-pipeline.events.on("vadChange", (state) => {
-  console.log("Is Speaking:", state.isSpeaking);
-});
 ```
 ### LiveKit Integration
@@ -93,21 +59,218 @@ pipeline.events.on("vadChange", (state) => {
 import { attachProcessingToTrack } from "@tensamin/audio";
 import { LocalAudioTrack } from "livekit-client";
-// Assume you have a LocalAudioTrack
 const localTrack = await LocalAudioTrack.create();
-// Attach processing (replaces the underlying track)
 const pipeline = await attachProcessingToTrack(localTrack, {
   noiseSuppression: {
     enabled: true,
     wasmUrl: "/audio-processor/rnnoise.wasm",
     simdUrl: "/audio-processor/rnnoise_simd.wasm",
-    workletUrl: "/audio-processor/noise-suppressor-worklet.min.js",
+    workletUrl: "/audio-processor/worklet.js",
   },
   vad: { enabled: true },
-  livekit: { manageTrackMute: true }, // Optional: mute the track object itself
+  livekit: { manageTrackMute: true },
 });
-// Publish the track
 await room.localParticipant.publishTrack(localTrack);
 ```
+### Monitoring VAD State
+```ts
+pipeline.events.on("vadChange", (state) => {
+  console.log("Speaking:", state.isSpeaking);
+  console.log("Probability:", state.probability);
+  console.log("State:", state.state);
+});
+```
+## Configuration
+### Voice Activity Detection
+```ts
+vad: {
+  enabled: boolean;
+  startThreshold: number;              // Default: 0.6 (range: 0-1)
+  stopThreshold: number;               // Default: 0.45 (range: 0-1)
+  hangoverMs: number;                  // Default: 400
+  preRollMs: number;                   // Default: 250
+  minSpeechDurationMs: number;         // Default: 100
+  minSilenceDurationMs: number;        // Default: 150
+  energyVad?: {
+    smoothing: number;                 // Default: 0.95
+    initialNoiseFloor: number;         // Default: 0.001
+    noiseFloorAdaptRateQuiet: number;  // Default: 0.01
+    noiseFloorAdaptRateLoud: number;   // Default: 0.001
+    minSNR: number;                    // Default: 2.0
+    snrRange: number;                  // Default: 8.0
+  };
+}
+```
+**Threshold Parameters:**
+- `startThreshold`: Probability threshold to unmute audio
+- `stopThreshold`: Probability threshold to mute audio (after hangover)
+- `hangoverMs`: Delay before muting after speech stops
+- `preRollMs`: Audio buffer duration before speech onset
+- `minSpeechDurationMs`: Minimum duration to consider as valid speech
+- `minSilenceDurationMs`: Minimum silence duration between speech segments
+**Energy VAD Parameters:**
+- `smoothing`: Energy calculation smoothing factor (0-1)
+- `minSNR`: Minimum signal-to-noise ratio for speech detection
+- `snrRange`: Range for probability scaling from minSNR
+### Output Control
+```ts
+output: {
+  speechGain: number;                  // Default: 1.0
+  silenceGain: number;                 // Default: 0.0
+  gainRampTime: number;                // Default: 0.015 (seconds)
+  smoothTransitions: boolean;          // Default: true
+  maxGainDb: number;                   // Default: 6.0
+  enableCompression: boolean;          // Default: false
+  compression?: {
+    threshold: number;                 // Default: -24.0 (dB)
+    ratio: number;                     // Default: 3.0
+    attack: number;                    // Default: 0.003 (seconds)
+    release: number;                   // Default: 0.05 (seconds)
+  };
+}
+```
+**Gain Parameters:**
+- `speechGain`: Gain multiplier when speaking (1.0 = unity)
+- `silenceGain`: Gain multiplier when silent (0.0 = mute)
+- `gainRampTime`: Transition duration for gain changes
+- `maxGainDb`: Maximum gain limit to prevent clipping
+**Compression Parameters:**
+- `threshold`: Level above which compression is applied
+- `ratio`: Compression ratio (e.g., 3.0 = 3:1)
+- `attack`: Time to reach full compression
+- `release`: Time to release compression
+### Runtime Configuration Updates
+```ts
+pipeline.setConfig({
+  vad: {
+    startThreshold: 0.7,
+    stopThreshold: 0.55,
+  },
+  output: {
+    speechGain: 1.3,
+  },
+});
+```
+## Configuration Examples
+### Noisy Environment
+```ts
+{
+  vad: {
+    startThreshold: 0.7,
+    stopThreshold: 0.55,
+    minSpeechDurationMs: 150,
+    energyVad: { minSNR: 3.0 }
+  }
+}
+```
+### Quiet Speaker
+```ts
+{
+  vad: {
+    startThreshold: 0.4,
+    stopThreshold: 0.25,
+    energyVad: { minSNR: 1.5 }
+  },
+  output: {
+    speechGain: 1.5
+  }
+}
+```
+### Natural Conversation
+```ts
+{
+  vad: {
+    startThreshold: 0.5,
+    stopThreshold: 0.3,
+    hangoverMs: 600,
+  },
+  output: {
+    silenceGain: 0.2
+  }
+}
+```
+## API Reference
+### `createAudioPipeline(track, config)`
+Creates an audio processing pipeline from a MediaStreamTrack.
+**Parameters:**
+- `track`: MediaStreamTrack - Source audio track
+- `config`: AudioProcessingConfig - Configuration object
+**Returns:** `Promise<AudioPipelineHandle>`
+### AudioPipelineHandle
+```ts
+interface AudioPipelineHandle {
+  processedTrack: MediaStreamTrack;
+  events: Emitter<AudioPipelineEvents>;
+  state: VADState;
+  setConfig(config: Partial<AudioProcessingConfig>): void;
+  dispose(): void;
+}
+```
+### AudioPipelineEvents
+```ts
+type AudioPipelineEvents = {
+  vadChange: VADState;
+  error: Error;
+};
+```
+### VADState
+```ts
+interface VADState {
+  isSpeaking: boolean;
+  probability: number;
+  state: "silent" | "speech_starting" | "speaking" | "speech_ending";
+}
+```
+## Default Values
+| Parameter              | Default | Description                      |
+| ---------------------- | ------- | -------------------------------- |
+| `startThreshold`       | 0.6     | Unmute at 60% confidence         |
+| `stopThreshold`        | 0.45    | Mute below 45% confidence        |
+| `hangoverMs`           | 400     | Wait 400ms before muting         |
+| `preRollMs`            | 250     | Buffer 250ms before speech       |
+| `minSpeechDurationMs`  | 100     | Minimum valid speech duration    |
+| `minSilenceDurationMs` | 150     | Minimum silence between speech   |
+| `silenceGain`          | 0.0     | Complete mute when silent        |
+| `speechGain`           | 1.0     | Unity gain when speaking         |
+| `minSNR`               | 2.0     | Voice must be 2x noise floor     |
+| `snrRange`             | 8.0     | Probability scales over SNR 2-10 |

package/dist/{chunk-XMTQPMQ6.mjs → chunk-GVKCBKW6.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   createAudioPipeline
-} from "./chunk-EXH2PNUE.mjs";
+} from "./chunk-XXTNAUYX.mjs";
 // src/livekit/integration.ts
 async function attachProcessingToTrack(track, config = {}) {

package/dist/{chunk-6P2RDBW5.mjs → chunk-H5UKZU2Y.mjs} RENAMED Viewed

@@ -3,7 +3,7 @@ import {
 } from "./chunk-XO6B3D4A.mjs";
 import {
   EnergyVADPlugin
-} from "./chunk-R5JVHKWA.mjs";
+} from "./chunk-VEJXAEMM.mjs";
 // src/extensibility/plugins.ts
 var nsPlugins = /* @__PURE__ */ new Map();

package/dist/chunk-N553RHTI.mjs ADDED Viewed

@@ -0,0 +1,93 @@
+// src/vad/vad-state.ts
+var VADStateMachine = class {
+  config;
+  currentState = "silent";
+  lastSpeechTime = 0;
+  speechStartTime = 0;
+  lastSilenceTime = 0;
+  frameDurationMs = 20;
+  // Assumed frame duration, updated by calls
+  constructor(config) {
+    this.config = {
+      enabled: config?.enabled ?? true,
+      pluginName: config?.pluginName ?? "energy-vad",
+      // Voice-optimized defaults
+      startThreshold: config?.startThreshold ?? 0.6,
+      // Higher threshold to avoid noise
+      stopThreshold: config?.stopThreshold ?? 0.45,
+      // Balanced for voice
+      hangoverMs: config?.hangoverMs ?? 400,
+      // Smooth for natural speech
+      preRollMs: config?.preRollMs ?? 250,
+      // Generous pre-roll
+      minSpeechDurationMs: config?.minSpeechDurationMs ?? 100,
+      minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
+      energyVad: {
+        smoothing: config?.energyVad?.smoothing ?? 0.95,
+        initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
+        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 0.01,
+        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 1e-3,
+        minSNR: config?.energyVad?.minSNR ?? 2,
+        snrRange: config?.energyVad?.snrRange ?? 8
+      }
+    };
+    this.lastSilenceTime = Date.now();
+  }
+  updateConfig(config) {
+    this.config = { ...this.config, ...config };
+  }
+  processFrame(probability, timestamp) {
+    const {
+      startThreshold,
+      stopThreshold,
+      hangoverMs,
+      minSpeechDurationMs,
+      minSilenceDurationMs
+    } = this.config;
+    let newState = this.currentState;
+    if (this.currentState === "silent" || this.currentState === "speech_ending") {
+      if (probability >= startThreshold) {
+        const silenceDuration = timestamp - this.lastSilenceTime;
+        if (silenceDuration >= minSilenceDurationMs) {
+          newState = "speech_starting";
+          this.speechStartTime = timestamp;
+          this.lastSpeechTime = timestamp;
+        } else {
+          newState = "silent";
+        }
+      } else {
+        newState = "silent";
+        this.lastSilenceTime = timestamp;
+      }
+    } else if (this.currentState === "speech_starting" || this.currentState === "speaking") {
+      if (probability >= stopThreshold) {
+        newState = "speaking";
+        this.lastSpeechTime = timestamp;
+      } else {
+        const timeSinceSpeech = timestamp - this.lastSpeechTime;
+        const speechDuration = timestamp - this.speechStartTime;
+        if (timeSinceSpeech < hangoverMs) {
+          newState = "speaking";
+        } else if (speechDuration < minSpeechDurationMs) {
+          newState = "silent";
+          this.lastSilenceTime = timestamp;
+        } else {
+          newState = "speech_ending";
+          this.lastSilenceTime = timestamp;
+        }
+      }
+    }
+    if (newState === "speech_starting") newState = "speaking";
+    if (newState === "speech_ending") newState = "silent";
+    this.currentState = newState;
+    return {
+      isSpeaking: newState === "speaking",
+      probability,
+      state: newState
+    };
+  }
+};
+export {
+  VADStateMachine
+};

package/dist/chunk-VEJXAEMM.mjs ADDED Viewed

@@ -0,0 +1,136 @@
+// src/vad/vad-node.ts
+var createEnergyVadWorkletCode = (vadConfig) => {
+  const energyParams = vadConfig?.energyVad || {};
+  const smoothing = energyParams.smoothing ?? 0.95;
+  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
+  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 0.01;
+  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 1e-3;
+  const minSNR = energyParams.minSNR ?? 2;
+  const snrRange = energyParams.snrRange ?? 8;
+  return `
+class EnergyVadProcessor extends AudioWorkletProcessor {
+  constructor() {
+    super();
+    this.smoothing = ${smoothing};
+    this.energy = 0;
+    this.noiseFloor = ${initialNoiseFloor};
+    this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
+    this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
+    this.minSNR = ${minSNR};
+    this.snrRange = ${snrRange};
+    this.isSpeaking = false;
+    this.port.onmessage = (event) => {
+      if (event.data && event.data.isSpeaking !== undefined) {
+        this.isSpeaking = event.data.isSpeaking;
+      }
+    };
+  }
+  process(inputs, outputs, parameters) {
+    const input = inputs[0];
+    if (!input || !input.length) return true;
+    const channel = input[0];
+    // Calculate RMS (Root Mean Square) energy
+    let sum = 0;
+    for (let i = 0; i < channel.length; i++) {
+      sum += channel[i] * channel[i];
+    }
+    const rms = Math.sqrt(sum / channel.length);
+    // Adaptive noise floor estimation - ONLY during silence
+    // This prevents the noise floor from rising during speech
+    if (!this.isSpeaking) {
+      if (rms < this.noiseFloor) {
+        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + rms * this.noiseFloorAdaptRateQuiet;
+      } else {
+        // Even during silence, if we detect a loud signal, adapt very slowly
+        // This could be brief noise we haven't classified as speech yet
+        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + rms * this.noiseFloorAdaptRateLoud;
+      }
+    }
+    // During speech, freeze the noise floor to maintain consistent detection
+    // Calculate Signal-to-Noise Ratio (SNR)
+    const snr = rms / (this.noiseFloor + 1e-6);
+    // Map SNR to probability (0-1)
+    // Probability is 0 when SNR <= minSNR
+    // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
+    // Probability is 1 when SNR >= (minSNR + snrRange)
+    const probability = Math.min(1, Math.max(0, (snr - this.minSNR) / this.snrRange));
+    this.port.postMessage({ probability, snr, noiseFloor: this.noiseFloor, rms });
+    return true;
+  }
+}
+registerProcessor('energy-vad-processor', EnergyVadProcessor);
+`;
+};
+var EnergyVADPlugin = class {
+  name = "energy-vad";
+  workletNode = null;
+  async createNode(context, config, onDecision) {
+    if (!config?.enabled) {
+      console.log("VAD disabled, using passthrough node");
+      const pass = context.createGain();
+      return pass;
+    }
+    const workletCode = createEnergyVadWorkletCode(config);
+    const blob = new Blob([workletCode], {
+      type: "application/javascript"
+    });
+    const url = URL.createObjectURL(blob);
+    try {
+      await context.audioWorklet.addModule(url);
+      console.log("Energy VAD worklet loaded successfully");
+    } catch (e) {
+      const error = new Error(
+        `Failed to load Energy VAD worklet: ${e instanceof Error ? e.message : String(e)}`
+      );
+      console.error(error.message);
+      URL.revokeObjectURL(url);
+      throw error;
+    }
+    URL.revokeObjectURL(url);
+    let node;
+    try {
+      node = new AudioWorkletNode(context, "energy-vad-processor");
+      this.workletNode = node;
+      console.log("Energy VAD node created successfully");
+    } catch (e) {
+      const error = new Error(
+        `Failed to create Energy VAD node: ${e instanceof Error ? e.message : String(e)}`
+      );
+      console.error(error.message);
+      throw error;
+    }
+    node.port.onmessage = (event) => {
+      try {
+        const { probability } = event.data;
+        if (typeof probability === "number" && !isNaN(probability)) {
+          onDecision(probability);
+        } else {
+          console.warn("Invalid VAD probability received:", event.data);
+        }
+      } catch (error) {
+        console.error("Error in VAD message handler:", error);
+      }
+    };
+    node.port.onmessageerror = (event) => {
+      console.error("VAD port message error:", event);
+    };
+    return node;
+  }
+  updateSpeakingState(isSpeaking) {
+    if (this.workletNode) {
+      this.workletNode.port.postMessage({ isSpeaking });
+    }
+  }
+};
+export {
+  EnergyVADPlugin
+};