npm - @tensamin/audio - Versions diffs - 0.1.14 → 0.2.0 - Mend

@tensamin/audio 0.1.14 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/README.md +48 -231
package/dist/chunk-6BJ4XGSA.mjs +80 -0
package/dist/chunk-AQ5RVY33.mjs +74 -0
package/dist/chunk-IS37FHDN.mjs +33 -0
package/dist/chunk-K4J3UUOR.mjs +178 -0
package/dist/chunk-QNQK6QFB.mjs +71 -0
package/dist/context/audio-context.d.mts +0 -24
package/dist/context/audio-context.d.ts +0 -24
package/dist/index.d.mts +2 -8
package/dist/index.d.ts +2 -8
package/dist/index.js +285 -680
package/dist/index.mjs +8 -43
package/dist/livekit/integration.d.mts +3 -7
package/dist/livekit/integration.d.ts +3 -7
package/dist/livekit/integration.js +280 -626
package/dist/livekit/integration.mjs +7 -8
package/dist/noise-suppression/deepfilternet-node.d.mts +12 -0
package/dist/noise-suppression/deepfilternet-node.d.ts +12 -0
package/dist/noise-suppression/deepfilternet-node.js +57 -0
package/dist/noise-suppression/deepfilternet-node.mjs +6 -0
package/dist/pipeline/audio-pipeline.d.mts +2 -2
package/dist/pipeline/audio-pipeline.d.ts +2 -2
package/dist/pipeline/audio-pipeline.js +219 -554
package/dist/pipeline/audio-pipeline.mjs +4 -5
package/dist/types.d.mts +42 -257
package/dist/types.d.ts +42 -257
package/dist/vad/vad-node.d.mts +7 -9
package/dist/vad/vad-node.d.ts +7 -9
package/dist/vad/vad-node.js +47 -156
package/dist/vad/vad-node.mjs +3 -3
package/dist/vad/vad-state.d.mts +9 -11
package/dist/vad/vad-state.d.ts +9 -11
package/dist/vad/vad-state.js +50 -79
package/dist/vad/vad-state.mjs +3 -3
package/package.json +21 -21
package/dist/chunk-2G2JFHJY.mjs +0 -180
package/dist/chunk-6F2HZUYO.mjs +0 -91
package/dist/chunk-K4YLH73B.mjs +0 -103
package/dist/chunk-R5M2DGAQ.mjs +0 -311
package/dist/chunk-UFKIAMG3.mjs +0 -47
package/dist/chunk-XO6B3D4A.mjs +0 -67
package/dist/extensibility/plugins.d.mts +0 -9
package/dist/extensibility/plugins.d.ts +0 -9
package/dist/extensibility/plugins.js +0 -320
package/dist/extensibility/plugins.mjs +0 -14
package/dist/noise-suppression/rnnoise-node.d.mts +0 -10
package/dist/noise-suppression/rnnoise-node.d.ts +0 -10
package/dist/noise-suppression/rnnoise-node.js +0 -101
package/dist/noise-suppression/rnnoise-node.mjs +0 -6

package/dist/vad/vad-node.js CHANGED Viewed

@@ -20,185 +20,76 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 // src/vad/vad-node.ts
 var vad_node_exports = {};
 __export(vad_node_exports, {
-  EnergyVADPlugin: () => EnergyVADPlugin
+  createLevelDetectorNode: () => createLevelDetectorNode
 });
 module.exports = __toCommonJS(vad_node_exports);
-var createEnergyVadWorkletCode = (vadConfig) => {
-  const energyParams = vadConfig?.energyVad || {};
-  const smoothing = energyParams.smoothing ?? 0.95;
-  const initialNoiseFloor = energyParams.initialNoiseFloor ?? 1e-3;
-  const noiseFloorAdaptRateQuiet = energyParams.noiseFloorAdaptRateQuiet ?? 2e-3;
-  const noiseFloorAdaptRateLoud = energyParams.noiseFloorAdaptRateLoud ?? 0.02;
-  const minSNR = energyParams.minSNR ?? 12;
-  const snrRange = energyParams.snrRange ?? 10;
-  const minEnergy = energyParams.minEnergy ?? 3e-3;
+function createLevelDetectorWorkletCode(smoothing) {
   return `
-class EnergyVadProcessor extends AudioWorkletProcessor {
+class LevelDetectorProcessor extends AudioWorkletProcessor {
   constructor() {
     super();
+    this.smoothed = 0;
     this.smoothing = ${smoothing};
-    this.energy = 0;
-    this.noiseFloor = ${initialNoiseFloor};
-    this.noiseFloorAdaptRateQuiet = ${noiseFloorAdaptRateQuiet};
-    this.noiseFloorAdaptRateLoud = ${noiseFloorAdaptRateLoud};
-    this.minSNR = ${minSNR};
-    this.snrRange = ${snrRange};
-    this.minEnergy = ${minEnergy};
-    this.isSpeaking = false;
-    this.port.onmessage = (event) => {
-      if (event.data && event.data.isSpeaking !== undefined) {
-        this.isSpeaking = event.data.isSpeaking;
-      }
-    };
   }
-  process(inputs, outputs, parameters) {
+  process(inputs) {
     const input = inputs[0];
-    if (!input || !input.length) return true;
+    if (!input || input.length === 0) return true;
     const channel = input[0];
-    // Calculate instantaneous RMS (Root Mean Square) energy
+    if (!channel || channel.length === 0) return true;
     let sum = 0;
-    let peak = 0;
     for (let i = 0; i < channel.length; i++) {
-      const sample = Math.abs(channel[i]);
-      sum += channel[i] * channel[i];
-      peak = Math.max(peak, sample);
-    }
-    const instantRms = Math.sqrt(sum / channel.length);
-    // Smooth the RMS energy to reduce jitter
-    // this.energy acts as the smoothed RMS value
-    this.energy = this.energy * this.smoothing + instantRms * (1 - this.smoothing);
-    // Calculate Crest Factor (peak-to-RMS ratio)
-    // Voice typically has crest factor of 2-4 (6-12dB)
-    // Keyboard clicks have crest factor of 10-30+ (20-30dB)
-    const crestFactor = peak / (instantRms + 1e-10);
-    const crestFactorDb = 20 * Math.log10(Math.max(1e-6, crestFactor));
-    // Adaptive noise floor estimation using SMOOTHED energy (not instantaneous)
-    // This prevents sharp transients from affecting the noise floor
-    if (this.energy < this.noiseFloor) {
-      // Signal is quieter than noise floor, adapt downwards slowly
-      this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateQuiet) + this.energy * this.noiseFloorAdaptRateQuiet;
-    } else {
-      // Calculate SNR based on smoothed energy
-      const smoothedSnr = this.energy / (this.noiseFloor + 1e-6);
-      const smoothedSnrDb = 20 * Math.log10(Math.max(1e-6, smoothedSnr));
-      // Only adapt upwards if:
-      // 1. SNR is low (< 10dB) - likely just background noise
-      // 2. AND crest factor is low (< 15dB) - not a sharp transient
-      if (smoothedSnrDb < 10 && crestFactorDb < 15) {
-        // This is persistent background noise, adapt upwards
-        this.noiseFloor = this.noiseFloor * (1 - this.noiseFloorAdaptRateLoud) + this.energy * this.noiseFloorAdaptRateLoud;
-      } else {
-        // Either high SNR (speech) or high crest factor (click) - adapt very slowly
-        const slowRate = this.noiseFloorAdaptRateLoud * 0.01;
-        this.noiseFloor = this.noiseFloor * (1 - slowRate) + this.energy * slowRate;
-      }
-    }
-    // Ensure noise floor doesn't drop to absolute zero
-    this.noiseFloor = Math.max(this.noiseFloor, 0.0001);
-    // SECOND PASS: Calculate Signal-to-Noise Ratio (SNR) in dB using smoothed energy
-    const snr = this.energy / (this.noiseFloor + 1e-6);
-    const snrDb = 20 * Math.log10(Math.max(1e-6, snr));
-    // Map SNR dB to probability (0-1)
-    // Probability is 0 when snrDb <= minSNR
-    // Probability scales linearly from 0 to 1 between minSNR and (minSNR + snrRange)
-    let probability = Math.min(1, Math.max(0, (snrDb - this.minSNR) / this.snrRange));
-    // Apply absolute energy threshold with soft knee
-    if (this.energy < this.minEnergy) {
-      const energyRatio = this.energy / (this.minEnergy + 1e-6);
-      probability *= Math.pow(energyRatio, 2);
-    }
-    // Apply crest factor penalty
-    // Reject signals with high crest factor (sharp transients like keyboard clicks)
-    // Voice: 6-12dB, Keyboard: 20-30dB
-    // We penalize anything above 14dB
-    if (crestFactorDb > 14) {
-      const excess = crestFactorDb - 14;
-      const penalty = Math.max(0, 1 - (excess / 10)); // Linear falloff over 10dB
-      probability *= penalty;
+      const sample = channel[i];
+      sum += sample * sample;
     }
-    this.port.postMessage({ probability, snr: snrDb, noiseFloor: this.noiseFloor, rms: this.energy });
+    const rms = Math.sqrt(sum / channel.length);
+    this.smoothed = this.smoothed * this.smoothing + rms * (1 - this.smoothing);
+    const levelDb = 20 * Math.log10(Math.max(1e-8, this.smoothed));
+    this.port.postMessage({ levelDb });
     return true;
   }
 }
-registerProcessor('energy-vad-processor', EnergyVadProcessor);
+registerProcessor('level-detector-processor', LevelDetectorProcessor);
 `;
-};
-var EnergyVADPlugin = class {
-  name = "energy-vad";
-  workletNode = null;
-  async createNode(context, config, onDecision) {
-    if (!config?.enabled) {
-      console.log("VAD disabled, using passthrough node");
-      const pass = context.createGain();
-      return pass;
-    }
-    const workletCode = createEnergyVadWorkletCode(config);
-    const blob = new Blob([workletCode], {
-      type: "application/javascript"
-    });
-    const url = URL.createObjectURL(blob);
-    try {
-      await context.audioWorklet.addModule(url);
-      console.log("Energy VAD worklet loaded successfully");
-    } catch (e) {
-      const error = new Error(
-        `Failed to load Energy VAD worklet: ${e instanceof Error ? e.message : String(e)}`
-      );
-      console.error(error.message);
-      URL.revokeObjectURL(url);
-      throw error;
-    }
+}
+async function createLevelDetectorNode(context, onLevel, options) {
+  const smoothing = options?.smoothing ?? 0.9;
+  const workletCode = createLevelDetectorWorkletCode(smoothing);
+  const blob = new Blob([workletCode], { type: "application/javascript" });
+  const url = URL.createObjectURL(blob);
+  try {
+    await context.audioWorklet.addModule(url);
+  } finally {
     URL.revokeObjectURL(url);
-    let node;
-    try {
-      node = new AudioWorkletNode(context, "energy-vad-processor");
-      this.workletNode = node;
-      console.log("Energy VAD node created successfully");
-    } catch (e) {
-      const error = new Error(
-        `Failed to create Energy VAD node: ${e instanceof Error ? e.message : String(e)}`
-      );
-      console.error(error.message);
-      throw error;
+  }
+  const node = new AudioWorkletNode(context, "level-detector-processor", {
+    numberOfInputs: 1,
+    numberOfOutputs: 0
+  });
+  node.port.onmessage = (event) => {
+    const { levelDb } = event.data ?? {};
+    if (typeof levelDb === "number" && !Number.isNaN(levelDb)) {
+      onLevel(levelDb);
     }
-    node.port.onmessage = (event) => {
+  };
+  node.port.onmessageerror = (event) => {
+    console.error("Level detector port error", event);
+  };
+  return {
+    node,
+    dispose: () => {
       try {
-        const { probability } = event.data;
-        if (typeof probability === "number" && !isNaN(probability)) {
-          onDecision(probability);
-        } else {
-          console.warn("Invalid VAD probability received:", event.data);
-        }
+        node.port.onmessage = null;
+        node.port.close();
       } catch (error) {
-        console.error("Error in VAD message handler:", error);
+        console.error("Failed to dispose level detector node", error);
       }
-    };
-    node.port.onmessageerror = (event) => {
-      console.error("VAD port message error:", event);
-    };
-    return node;
-  }
-  updateSpeakingState(isSpeaking) {
-    if (this.workletNode) {
-      this.workletNode.port.postMessage({ isSpeaking });
     }
-  }
-};
+  };
+}
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
-  EnergyVADPlugin
+  createLevelDetectorNode
 });

package/dist/vad/vad-node.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
-  EnergyVADPlugin
-} from "../chunk-2G2JFHJY.mjs";
+  createLevelDetectorNode
+} from "../chunk-QNQK6QFB.mjs";
 export {
-  EnergyVADPlugin
+  createLevelDetectorNode
 };

package/dist/vad/vad-state.d.mts CHANGED Viewed

@@ -1,16 +1,14 @@
-import { AudioProcessingConfig, VADState } from '../types.mjs';
+import { SpeakingDetectionConfig, SpeakingState } from '../types.mjs';
 import 'mitt';
-declare class VADStateMachine {
+declare class LevelBasedVAD {
     private config;
-    private currentState;
-    private lastSpeechTime;
-    private speechStartTime;
-    private lastSilenceTime;
-    private frameDurationMs;
-    constructor(config: AudioProcessingConfig["vad"]);
-    updateConfig(config: Partial<AudioProcessingConfig["vad"]>): void;
-    processFrame(probability: number, timestamp: number): VADState;
+    private speaking;
+    private pendingSpeechSince;
+    private pendingSilenceSince;
+    constructor(config: SpeakingDetectionConfig);
+    updateConfig(config: Partial<SpeakingDetectionConfig>): void;
+    process(levelDb: number, timestampMs: number): SpeakingState;
 }
-export { VADStateMachine };
+export { LevelBasedVAD };

package/dist/vad/vad-state.d.ts CHANGED Viewed

@@ -1,16 +1,14 @@
-import { AudioProcessingConfig, VADState } from '../types.js';
+import { SpeakingDetectionConfig, SpeakingState } from '../types.js';
 import 'mitt';
-declare class VADStateMachine {
+declare class LevelBasedVAD {
     private config;
-    private currentState;
-    private lastSpeechTime;
-    private speechStartTime;
-    private lastSilenceTime;
-    private frameDurationMs;
-    constructor(config: AudioProcessingConfig["vad"]);
-    updateConfig(config: Partial<AudioProcessingConfig["vad"]>): void;
-    processFrame(probability: number, timestamp: number): VADState;
+    private speaking;
+    private pendingSpeechSince;
+    private pendingSilenceSince;
+    constructor(config: SpeakingDetectionConfig);
+    updateConfig(config: Partial<SpeakingDetectionConfig>): void;
+    process(levelDb: number, timestampMs: number): SpeakingState;
 }
-export { VADStateMachine };
+export { LevelBasedVAD };

package/dist/vad/vad-state.js CHANGED Viewed

@@ -20,108 +20,79 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 // src/vad/vad-state.ts
 var vad_state_exports = {};
 __export(vad_state_exports, {
-  VADStateMachine: () => VADStateMachine
+  LevelBasedVAD: () => LevelBasedVAD
 });
 module.exports = __toCommonJS(vad_state_exports);
-var VADStateMachine = class {
+var LevelBasedVAD = class {
   config;
-  currentState = "silent";
-  lastSpeechTime = 0;
-  speechStartTime = 0;
-  lastSilenceTime = 0;
-  frameDurationMs = 20;
-  // Assumed frame duration, updated by calls
+  speaking = false;
+  pendingSpeechSince = null;
+  pendingSilenceSince = null;
   constructor(config) {
     this.config = {
-      enabled: config?.enabled ?? true,
-      pluginName: config?.pluginName ?? "energy-vad",
-      // Voice-optimized defaults
-      startThreshold: config?.startThreshold ?? 0.8,
-      // Higher threshold to avoid noise
-      stopThreshold: config?.stopThreshold ?? 0.3,
-      // Balanced for voice
-      hangoverMs: config?.hangoverMs ?? 300,
-      // Smooth for natural speech
-      preRollMs: config?.preRollMs ?? 250,
-      // Generous pre-roll
-      minSpeechDurationMs: config?.minSpeechDurationMs ?? 250,
-      // Aggressive transient rejection
-      minSilenceDurationMs: config?.minSilenceDurationMs ?? 150,
-      energyVad: {
-        smoothing: config?.energyVad?.smoothing ?? 0.95,
-        initialNoiseFloor: config?.energyVad?.initialNoiseFloor ?? 1e-3,
-        noiseFloorAdaptRateQuiet: config?.energyVad?.noiseFloorAdaptRateQuiet ?? 2e-3,
-        noiseFloorAdaptRateLoud: config?.energyVad?.noiseFloorAdaptRateLoud ?? 0.02,
-        minSNR: config?.energyVad?.minSNR ?? 12,
-        snrRange: config?.energyVad?.snrRange ?? 10,
-        minEnergy: config?.energyVad?.minEnergy ?? 3e-3
-      }
+      minDb: config.minDb,
+      maxDb: config.maxDb,
+      speakOnRatio: config.speakOnRatio ?? 0.6,
+      speakOffRatio: config.speakOffRatio ?? 0.3,
+      hangoverMs: config.hangoverMs ?? 350,
+      attackMs: config.attackMs ?? 50,
+      releaseMs: config.releaseMs ?? 120
     };
-    this.lastSilenceTime = Date.now();
   }
   updateConfig(config) {
-    this.config = { ...this.config, ...config };
+    this.config = {
+      ...this.config,
+      ...config,
+      speakOnRatio: config.speakOnRatio ?? this.config.speakOnRatio,
+      speakOffRatio: config.speakOffRatio ?? this.config.speakOffRatio,
+      hangoverMs: config.hangoverMs ?? this.config.hangoverMs,
+      attackMs: config.attackMs ?? this.config.attackMs,
+      releaseMs: config.releaseMs ?? this.config.releaseMs
+    };
   }
-  processFrame(probability, timestamp) {
+  process(levelDb, timestampMs) {
     const {
-      startThreshold,
-      stopThreshold,
+      minDb,
+      maxDb,
+      speakOnRatio,
+      speakOffRatio,
       hangoverMs,
-      minSpeechDurationMs,
-      minSilenceDurationMs
+      attackMs,
+      releaseMs
     } = this.config;
-    let newState = this.currentState;
-    if (this.currentState === "silent" || this.currentState === "speech_ending") {
-      if (probability >= startThreshold) {
-        const silenceDuration = timestamp - this.lastSilenceTime;
-        if (silenceDuration >= minSilenceDurationMs) {
-          newState = "speech_starting";
-          this.speechStartTime = timestamp;
-          this.lastSpeechTime = timestamp;
-        } else {
-          newState = "silent";
+    const clamped = Math.min(maxDb, Math.max(minDb, levelDb));
+    const norm = (clamped - minDb) / Math.max(1, maxDb - minDb);
+    if (!this.speaking) {
+      if (norm >= speakOnRatio) {
+        this.pendingSpeechSince = this.pendingSpeechSince ?? timestampMs;
+        if (timestampMs - this.pendingSpeechSince >= attackMs) {
+          this.speaking = true;
+          this.pendingSpeechSince = null;
+          this.pendingSilenceSince = null;
         }
       } else {
-        newState = "silent";
-        this.lastSilenceTime = timestamp;
+        this.pendingSpeechSince = null;
       }
-    } else if (this.currentState === "speech_starting") {
-      if (probability >= stopThreshold) {
-        const speechDuration = timestamp - this.speechStartTime;
-        if (speechDuration >= minSpeechDurationMs) {
-          newState = "speaking";
-        } else {
-          newState = "speech_starting";
+    } else {
+      if (norm <= speakOffRatio) {
+        this.pendingSilenceSince = this.pendingSilenceSince ?? timestampMs;
+        const releaseWindow = Math.max(releaseMs, hangoverMs);
+        if (timestampMs - this.pendingSilenceSince >= releaseWindow) {
+          this.speaking = false;
+          this.pendingSilenceSince = null;
+          this.pendingSpeechSince = null;
         }
-        this.lastSpeechTime = timestamp;
       } else {
-        newState = "silent";
-        this.lastSilenceTime = timestamp;
-      }
-    } else if (this.currentState === "speaking") {
-      if (probability >= stopThreshold) {
-        newState = "speaking";
-        this.lastSpeechTime = timestamp;
-      } else {
-        const timeSinceSpeech = timestamp - this.lastSpeechTime;
-        if (timeSinceSpeech < hangoverMs) {
-          newState = "speaking";
-        } else {
-          newState = "speech_ending";
-          this.lastSilenceTime = timestamp;
-        }
+        this.pendingSilenceSince = null;
       }
     }
-    if (newState === "speech_ending") newState = "silent";
-    this.currentState = newState;
     return {
-      isSpeaking: newState === "speaking",
-      probability,
-      state: newState
+      speaking: this.speaking,
+      levelDb: clamped
     };
   }
 };
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
-  VADStateMachine
+  LevelBasedVAD
 });

package/dist/vad/vad-state.mjs CHANGED Viewed

@@ -1,6 +1,6 @@
 import {
-  VADStateMachine
-} from "../chunk-K4YLH73B.mjs";
+  LevelBasedVAD
+} from "../chunk-AQ5RVY33.mjs";
 export {
-  VADStateMachine
+  LevelBasedVAD
 };

package/package.json CHANGED Viewed

@@ -1,41 +1,41 @@
 {
   "name": "@tensamin/audio",
-  "version": "0.1.14",
-  "main": "dist/index.js",
-  "module": "dist/index.mjs",
-  "types": "dist/index.d.ts",
+  "version": "0.2.0",
   "author": {
     "email": "aloisianer@proton.me",
     "name": "Alois"
   },
-  "publishConfig": {
-    "access": "public"
-  },
   "repository": {
     "type": "git",
     "url": "https://github.com/Tensamin/Audio"
   },
-  "license": "MIT",
-  "scripts": {
-    "build": "tsup src/ --format cjs,esm --dts --out-dir dist --clean",
-    "format": "bunx prettier --write .",
-    "lint": "tsc"
+  "main": "dist/index.js",
+  "module": "dist/index.mjs",
+  "devDependencies": {
+    "tsup": "^8.5.1",
+    "@types/bun": "latest",
+    "@types/web": "^0.0.298",
+    "livekit-client": "^2.16.1",
+    "typescript": "^5.9.3"
   },
   "dependencies": {
-    "@sapphi-red/web-noise-suppressor": "^0.3.5",
+    "deepfilternet3-noise-filter": "^1.1.2",
     "mitt": "^3.0.1"
   },
   "peerDependencies": {
     "livekit-client": "^2.0.0"
   },
-  "devDependencies": {
-    "tsup": "^8.5.1",
-    "@types/bun": "latest",
-    "@types/web": "^0.0.298",
-    "livekit-client": "^2.16.0",
-    "typescript": "^5.9.3"
-  },
   "files": [
     "dist"
-  ]
+  ],
+  "license": "MIT",
+  "publishConfig": {
+    "access": "public"
+  },
+  "scripts": {
+    "build": "tsup src/ --format cjs,esm --dts --out-dir dist --clean",
+    "format": "bunx prettier --write .",
+    "lint": "tsc"
+  },
+  "types": "dist/index.d.ts"
 }