npm - @tensamin/audio - Versions diffs - 0.2.0 → 0.2.2 - Mend

@tensamin/audio 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +3 -0
package/dist/chunk-7IKKNKM7.mjs +92 -0
package/dist/{chunk-K4J3UUOR.mjs → chunk-BAUJY4Q2.mjs} +10 -10
package/dist/{chunk-6BJ4XGSA.mjs → chunk-YQPL2O7D.mjs} +52 -3
package/dist/index.d.mts +2 -2
package/dist/index.d.ts +2 -2
package/dist/index.js +129 -7
package/dist/index.mjs +7 -4
package/dist/livekit/integration.d.mts +4 -3
package/dist/livekit/integration.d.ts +4 -3
package/dist/livekit/integration.js +129 -5
package/dist/livekit/integration.mjs +7 -4
package/dist/pipeline/audio-pipeline.js +4 -4
package/dist/pipeline/audio-pipeline.mjs +3 -3
package/dist/pipeline/remote-audio-monitor.d.mts +12 -0
package/dist/pipeline/remote-audio-monitor.d.ts +12 -0
package/dist/pipeline/remote-audio-monitor.js +276 -0
package/dist/pipeline/remote-audio-monitor.mjs +9 -0
package/dist/types.d.mts +4 -1
package/dist/types.d.ts +4 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -9,6 +9,9 @@ DeepFilterNet3-based noise suppression and realtime speaking detection for LiveK
 - Automatic mute/unmute for LiveKit tracks
 - Simple min/max dB speaking thresholds
+> [Noise suppression is provided via the `deepfilternet3-noise-filter` package.](https://www.npmjs.com/package/deepfilternet3-noise-filter)
+> [That package is based on DeepFilterNet by Rikorose.](https://github.com/Rikorose/DeepFilterNet)
 ## Installation
 ```bash

package/dist/chunk-7IKKNKM7.mjs ADDED Viewed

@@ -0,0 +1,92 @@
+import {
+  LevelBasedVAD
+} from "./chunk-AQ5RVY33.mjs";
+import {
+  getAudioContext,
+  registerPipeline,
+  unregisterPipeline
+} from "./chunk-OZ7KMC4S.mjs";
+import {
+  createLevelDetectorNode
+} from "./chunk-QNQK6QFB.mjs";
+// src/pipeline/remote-audio-monitor.ts
+import mitt from "mitt";
+async function createRemoteAudioMonitor(sourceTrack, config = {}) {
+  const context = getAudioContext();
+  registerPipeline();
+  const fullConfig = {
+    speaking: {
+      minDb: config.speaking?.minDb ?? -55,
+      maxDb: config.speaking?.maxDb ?? -20,
+      speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
+      speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
+      hangoverMs: config.speaking?.hangoverMs ?? 500,
+      attackMs: config.speaking?.attackMs ?? 100,
+      releaseMs: config.speaking?.releaseMs ?? 120
+    }
+  };
+  if (!sourceTrack || sourceTrack.kind !== "audio") {
+    throw new Error(
+      "createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
+    );
+  }
+  if (sourceTrack.readyState === "ended") {
+    throw new Error("Cannot create monitor from an ended MediaStreamTrack");
+  }
+  const sourceStream = new MediaStream([sourceTrack]);
+  const sourceNode = context.createMediaStreamSource(sourceStream);
+  const emitter = mitt();
+  const vad = new LevelBasedVAD(fullConfig.speaking);
+  let lastState = { speaking: false, levelDb: -Infinity };
+  const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
+    try {
+      const timestamp = context.currentTime * 1e3;
+      const nextState = vad.process(levelDb, timestamp);
+      const speakingChanged = nextState.speaking !== lastState.speaking;
+      const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
+      if (speakingChanged || levelChanged) {
+        lastState = nextState;
+        emitter.emit("speakingChange", nextState);
+      }
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      emitter.emit("error", err);
+    }
+  });
+  sourceNode.connect(levelHandle.node);
+  function dispose() {
+    try {
+      sourceNode.disconnect();
+      levelHandle.node.disconnect();
+      levelHandle.dispose();
+    } catch (error) {
+      console.error("Error during remote monitor disposal", error);
+    } finally {
+      unregisterPipeline();
+    }
+  }
+  const handle = {
+    events: emitter,
+    get state() {
+      return lastState;
+    },
+    setConfig: (next) => {
+      try {
+        if (next.speaking) {
+          vad.updateConfig(next.speaking);
+          fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
+        }
+      } catch (error) {
+        const err = error instanceof Error ? error : new Error(String(error));
+        emitter.emit("error", err);
+      }
+    },
+    dispose
+  };
+  return handle;
+}
+export {
+  createRemoteAudioMonitor
+};

package/dist/{chunk-K4J3UUOR.mjs → chunk-BAUJY4Q2.mjs} RENAMED Viewed

@@ -1,17 +1,17 @@
+import {
+  createDeepFilterNet3Node
+} from "./chunk-IS37FHDN.mjs";
+import {
+  LevelBasedVAD
+} from "./chunk-AQ5RVY33.mjs";
 import {
   getAudioContext,
   registerPipeline,
   unregisterPipeline
 } from "./chunk-OZ7KMC4S.mjs";
-import {
-  createDeepFilterNet3Node
-} from "./chunk-IS37FHDN.mjs";
 import {
   createLevelDetectorNode
 } from "./chunk-QNQK6QFB.mjs";
-import {
-  LevelBasedVAD
-} from "./chunk-AQ5RVY33.mjs";
 // src/pipeline/audio-pipeline.ts
 import mitt from "mitt";
@@ -28,12 +28,12 @@ async function createAudioPipeline(sourceTrack, config = {}) {
   const fullConfig = {
     noiseSuppression: nsConfig,
     speaking: {
-      minDb: config.speaking?.minDb ?? -60,
+      minDb: config.speaking?.minDb ?? -55,
       maxDb: config.speaking?.maxDb ?? -20,
-      speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
+      speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
       speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
-      hangoverMs: config.speaking?.hangoverMs ?? 350,
-      attackMs: config.speaking?.attackMs ?? 50,
+      hangoverMs: config.speaking?.hangoverMs ?? 500,
+      attackMs: config.speaking?.attackMs ?? 100,
       releaseMs: config.speaking?.releaseMs ?? 120
     },
     output: {

package/dist/{chunk-6BJ4XGSA.mjs → chunk-YQPL2O7D.mjs} RENAMED Viewed

@@ -1,9 +1,11 @@
 import {
   createAudioPipeline
-} from "./chunk-K4J3UUOR.mjs";
+} from "./chunk-BAUJY4Q2.mjs";
+import {
+  createRemoteAudioMonitor
+} from "./chunk-7IKKNKM7.mjs";
 // src/livekit/integration.ts
-import "mitt";
 async function attachSpeakingDetectionToTrack(track, options = {}) {
   if (!track) {
     throw new Error(
@@ -74,7 +76,54 @@ async function attachSpeakingDetectionToTrack(track, options = {}) {
   };
   return controller;
 }
+async function attachSpeakingDetectionToRemoteTrack(track, options = {}) {
+  if (!track) {
+    throw new Error(
+      "attachSpeakingDetectionToRemoteTrack requires a valid RemoteAudioTrack"
+    );
+  }
+  const mediaTrack = track.mediaStreamTrack;
+  if (!mediaTrack || mediaTrack.readyState === "ended") {
+    throw new Error("RemoteAudioTrack has no live MediaStreamTrack to monitor");
+  }
+  const monitor = await createRemoteAudioMonitor(mediaTrack, options);
+  const listeners = /* @__PURE__ */ new Set();
+  let currentState = monitor.state;
+  const speakingHandler = (state) => {
+    currentState = state;
+    listeners.forEach((listener) => listener(state));
+  };
+  monitor.events.on("speakingChange", speakingHandler);
+  const errorHandler = (error) => {
+    console.error("Remote audio monitor error", error);
+  };
+  monitor.events.on("error", errorHandler);
+  const controller = {
+    get speaking() {
+      return currentState.speaking;
+    },
+    get levelDb() {
+      return currentState.levelDb;
+    },
+    onChange: (listener) => {
+      listeners.add(listener);
+      listener(currentState);
+      return () => listeners.delete(listener);
+    },
+    setConfig: (config) => {
+      monitor.setConfig(config);
+    },
+    dispose: () => {
+      monitor.events.off("speakingChange", speakingHandler);
+      monitor.events.off("error", errorHandler);
+      listeners.clear();
+      monitor.dispose();
+    }
+  };
+  return controller;
+}
 export {
-  attachSpeakingDetectionToTrack
+  attachSpeakingDetectionToTrack,
+  attachSpeakingDetectionToRemoteTrack
 };

package/dist/index.d.mts CHANGED Viewed

@@ -1,4 +1,4 @@
-export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.mjs';
-export { attachSpeakingDetectionToTrack } from './livekit/integration.mjs';
+export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.mjs';
+export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack } from './livekit/integration.mjs';
 import 'mitt';
 import 'livekit-client';

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.js';
-export { attachSpeakingDetectionToTrack } from './livekit/integration.js';
+export { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState } from './types.js';
+export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack } from './livekit/integration.js';
 import 'mitt';
 import 'livekit-client';

package/dist/index.js CHANGED Viewed

@@ -30,13 +30,11 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 // src/index.ts
 var index_exports = {};
 __export(index_exports, {
+  attachSpeakingDetectionToRemoteTrack: () => attachSpeakingDetectionToRemoteTrack,
   attachSpeakingDetectionToTrack: () => attachSpeakingDetectionToTrack
 });
 module.exports = __toCommonJS(index_exports);
-// src/livekit/integration.ts
-var import_mitt2 = require("mitt");
 // src/pipeline/audio-pipeline.ts
 var import_mitt = __toESM(require("mitt"));
@@ -244,12 +242,12 @@ async function createAudioPipeline(sourceTrack, config = {}) {
   const fullConfig = {
     noiseSuppression: nsConfig,
     speaking: {
-      minDb: config.speaking?.minDb ?? -60,
+      minDb: config.speaking?.minDb ?? -55,
       maxDb: config.speaking?.maxDb ?? -20,
-      speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
+      speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
       speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
-      hangoverMs: config.speaking?.hangoverMs ?? 350,
-      attackMs: config.speaking?.attackMs ?? 50,
+      hangoverMs: config.speaking?.hangoverMs ?? 500,
+      attackMs: config.speaking?.attackMs ?? 100,
       releaseMs: config.speaking?.releaseMs ?? 120
     },
     output: {
@@ -389,6 +387,83 @@ async function createAudioPipeline(sourceTrack, config = {}) {
   return handle;
 }
+// src/pipeline/remote-audio-monitor.ts
+var import_mitt2 = __toESM(require("mitt"));
+async function createRemoteAudioMonitor(sourceTrack, config = {}) {
+  const context = getAudioContext();
+  registerPipeline();
+  const fullConfig = {
+    speaking: {
+      minDb: config.speaking?.minDb ?? -55,
+      maxDb: config.speaking?.maxDb ?? -20,
+      speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
+      speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
+      hangoverMs: config.speaking?.hangoverMs ?? 500,
+      attackMs: config.speaking?.attackMs ?? 100,
+      releaseMs: config.speaking?.releaseMs ?? 120
+    }
+  };
+  if (!sourceTrack || sourceTrack.kind !== "audio") {
+    throw new Error(
+      "createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
+    );
+  }
+  if (sourceTrack.readyState === "ended") {
+    throw new Error("Cannot create monitor from an ended MediaStreamTrack");
+  }
+  const sourceStream = new MediaStream([sourceTrack]);
+  const sourceNode = context.createMediaStreamSource(sourceStream);
+  const emitter = (0, import_mitt2.default)();
+  const vad = new LevelBasedVAD(fullConfig.speaking);
+  let lastState = { speaking: false, levelDb: -Infinity };
+  const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
+    try {
+      const timestamp = context.currentTime * 1e3;
+      const nextState = vad.process(levelDb, timestamp);
+      const speakingChanged = nextState.speaking !== lastState.speaking;
+      const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
+      if (speakingChanged || levelChanged) {
+        lastState = nextState;
+        emitter.emit("speakingChange", nextState);
+      }
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      emitter.emit("error", err);
+    }
+  });
+  sourceNode.connect(levelHandle.node);
+  function dispose() {
+    try {
+      sourceNode.disconnect();
+      levelHandle.node.disconnect();
+      levelHandle.dispose();
+    } catch (error) {
+      console.error("Error during remote monitor disposal", error);
+    } finally {
+      unregisterPipeline();
+    }
+  }
+  const handle = {
+    events: emitter,
+    get state() {
+      return lastState;
+    },
+    setConfig: (next) => {
+      try {
+        if (next.speaking) {
+          vad.updateConfig(next.speaking);
+          fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
+        }
+      } catch (error) {
+        const err = error instanceof Error ? error : new Error(String(error));
+        emitter.emit("error", err);
+      }
+    },
+    dispose
+  };
+  return handle;
+}
 // src/livekit/integration.ts
 async function attachSpeakingDetectionToTrack(track, options = {}) {
   if (!track) {
@@ -460,7 +535,54 @@ async function attachSpeakingDetectionToTrack(track, options = {}) {
   };
   return controller;
 }
+async function attachSpeakingDetectionToRemoteTrack(track, options = {}) {
+  if (!track) {
+    throw new Error(
+      "attachSpeakingDetectionToRemoteTrack requires a valid RemoteAudioTrack"
+    );
+  }
+  const mediaTrack = track.mediaStreamTrack;
+  if (!mediaTrack || mediaTrack.readyState === "ended") {
+    throw new Error("RemoteAudioTrack has no live MediaStreamTrack to monitor");
+  }
+  const monitor = await createRemoteAudioMonitor(mediaTrack, options);
+  const listeners = /* @__PURE__ */ new Set();
+  let currentState = monitor.state;
+  const speakingHandler = (state) => {
+    currentState = state;
+    listeners.forEach((listener) => listener(state));
+  };
+  monitor.events.on("speakingChange", speakingHandler);
+  const errorHandler = (error) => {
+    console.error("Remote audio monitor error", error);
+  };
+  monitor.events.on("error", errorHandler);
+  const controller = {
+    get speaking() {
+      return currentState.speaking;
+    },
+    get levelDb() {
+      return currentState.levelDb;
+    },
+    onChange: (listener) => {
+      listeners.add(listener);
+      listener(currentState);
+      return () => listeners.delete(listener);
+    },
+    setConfig: (config) => {
+      monitor.setConfig(config);
+    },
+    dispose: () => {
+      monitor.events.off("speakingChange", speakingHandler);
+      monitor.events.off("error", errorHandler);
+      listeners.clear();
+      monitor.dispose();
+    }
+  };
+  return controller;
+}
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
+  attachSpeakingDetectionToRemoteTrack,
   attachSpeakingDetectionToTrack
 });

package/dist/index.mjs CHANGED Viewed

@@ -1,12 +1,15 @@
 import "./chunk-WBQAMGXK.mjs";
 import {
+  attachSpeakingDetectionToRemoteTrack,
   attachSpeakingDetectionToTrack
-} from "./chunk-6BJ4XGSA.mjs";
-import "./chunk-K4J3UUOR.mjs";
-import "./chunk-OZ7KMC4S.mjs";
+} from "./chunk-YQPL2O7D.mjs";
+import "./chunk-BAUJY4Q2.mjs";
 import "./chunk-IS37FHDN.mjs";
-import "./chunk-QNQK6QFB.mjs";
+import "./chunk-7IKKNKM7.mjs";
 import "./chunk-AQ5RVY33.mjs";
+import "./chunk-OZ7KMC4S.mjs";
+import "./chunk-QNQK6QFB.mjs";
 export {
+  attachSpeakingDetectionToRemoteTrack,
   attachSpeakingDetectionToTrack
 };

package/dist/livekit/integration.d.mts CHANGED Viewed

@@ -1,7 +1,8 @@
-import { LivekitSpeakingOptions, SpeakingController } from '../types.mjs';
-import { LocalAudioTrack } from 'livekit-client';
+import { LivekitSpeakingOptions, SpeakingController, RemoteSpeakingOptions } from '../types.mjs';
+import { LocalAudioTrack, RemoteAudioTrack } from 'livekit-client';
 import 'mitt';
 declare function attachSpeakingDetectionToTrack(track: LocalAudioTrack, options?: LivekitSpeakingOptions): Promise<SpeakingController>;
+declare function attachSpeakingDetectionToRemoteTrack(track: RemoteAudioTrack, options?: RemoteSpeakingOptions): Promise<SpeakingController>;
-export { attachSpeakingDetectionToTrack };
+export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack };

package/dist/livekit/integration.d.ts CHANGED Viewed

@@ -1,7 +1,8 @@
-import { LivekitSpeakingOptions, SpeakingController } from '../types.js';
-import { LocalAudioTrack } from 'livekit-client';
+import { LivekitSpeakingOptions, SpeakingController, RemoteSpeakingOptions } from '../types.js';
+import { LocalAudioTrack, RemoteAudioTrack } from 'livekit-client';
 import 'mitt';
 declare function attachSpeakingDetectionToTrack(track: LocalAudioTrack, options?: LivekitSpeakingOptions): Promise<SpeakingController>;
+declare function attachSpeakingDetectionToRemoteTrack(track: RemoteAudioTrack, options?: RemoteSpeakingOptions): Promise<SpeakingController>;
-export { attachSpeakingDetectionToTrack };
+export { attachSpeakingDetectionToRemoteTrack, attachSpeakingDetectionToTrack };

package/dist/livekit/integration.js CHANGED Viewed

@@ -30,10 +30,10 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 // src/livekit/integration.ts
 var integration_exports = {};
 __export(integration_exports, {
+  attachSpeakingDetectionToRemoteTrack: () => attachSpeakingDetectionToRemoteTrack,
   attachSpeakingDetectionToTrack: () => attachSpeakingDetectionToTrack
 });
 module.exports = __toCommonJS(integration_exports);
-var import_mitt2 = require("mitt");
 // src/pipeline/audio-pipeline.ts
 var import_mitt = __toESM(require("mitt"));
@@ -242,12 +242,12 @@ async function createAudioPipeline(sourceTrack, config = {}) {
   const fullConfig = {
     noiseSuppression: nsConfig,
     speaking: {
-      minDb: config.speaking?.minDb ?? -60,
+      minDb: config.speaking?.minDb ?? -55,
       maxDb: config.speaking?.maxDb ?? -20,
-      speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
+      speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
       speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
-      hangoverMs: config.speaking?.hangoverMs ?? 350,
-      attackMs: config.speaking?.attackMs ?? 50,
+      hangoverMs: config.speaking?.hangoverMs ?? 500,
+      attackMs: config.speaking?.attackMs ?? 100,
       releaseMs: config.speaking?.releaseMs ?? 120
     },
     output: {
@@ -387,6 +387,83 @@ async function createAudioPipeline(sourceTrack, config = {}) {
   return handle;
 }
+// src/pipeline/remote-audio-monitor.ts
+var import_mitt2 = __toESM(require("mitt"));
+async function createRemoteAudioMonitor(sourceTrack, config = {}) {
+  const context = getAudioContext();
+  registerPipeline();
+  const fullConfig = {
+    speaking: {
+      minDb: config.speaking?.minDb ?? -55,
+      maxDb: config.speaking?.maxDb ?? -20,
+      speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
+      speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
+      hangoverMs: config.speaking?.hangoverMs ?? 500,
+      attackMs: config.speaking?.attackMs ?? 100,
+      releaseMs: config.speaking?.releaseMs ?? 120
+    }
+  };
+  if (!sourceTrack || sourceTrack.kind !== "audio") {
+    throw new Error(
+      "createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
+    );
+  }
+  if (sourceTrack.readyState === "ended") {
+    throw new Error("Cannot create monitor from an ended MediaStreamTrack");
+  }
+  const sourceStream = new MediaStream([sourceTrack]);
+  const sourceNode = context.createMediaStreamSource(sourceStream);
+  const emitter = (0, import_mitt2.default)();
+  const vad = new LevelBasedVAD(fullConfig.speaking);
+  let lastState = { speaking: false, levelDb: -Infinity };
+  const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
+    try {
+      const timestamp = context.currentTime * 1e3;
+      const nextState = vad.process(levelDb, timestamp);
+      const speakingChanged = nextState.speaking !== lastState.speaking;
+      const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
+      if (speakingChanged || levelChanged) {
+        lastState = nextState;
+        emitter.emit("speakingChange", nextState);
+      }
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      emitter.emit("error", err);
+    }
+  });
+  sourceNode.connect(levelHandle.node);
+  function dispose() {
+    try {
+      sourceNode.disconnect();
+      levelHandle.node.disconnect();
+      levelHandle.dispose();
+    } catch (error) {
+      console.error("Error during remote monitor disposal", error);
+    } finally {
+      unregisterPipeline();
+    }
+  }
+  const handle = {
+    events: emitter,
+    get state() {
+      return lastState;
+    },
+    setConfig: (next) => {
+      try {
+        if (next.speaking) {
+          vad.updateConfig(next.speaking);
+          fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
+        }
+      } catch (error) {
+        const err = error instanceof Error ? error : new Error(String(error));
+        emitter.emit("error", err);
+      }
+    },
+    dispose
+  };
+  return handle;
+}
 // src/livekit/integration.ts
 async function attachSpeakingDetectionToTrack(track, options = {}) {
   if (!track) {
@@ -458,7 +535,54 @@ async function attachSpeakingDetectionToTrack(track, options = {}) {
   };
   return controller;
 }
+async function attachSpeakingDetectionToRemoteTrack(track, options = {}) {
+  if (!track) {
+    throw new Error(
+      "attachSpeakingDetectionToRemoteTrack requires a valid RemoteAudioTrack"
+    );
+  }
+  const mediaTrack = track.mediaStreamTrack;
+  if (!mediaTrack || mediaTrack.readyState === "ended") {
+    throw new Error("RemoteAudioTrack has no live MediaStreamTrack to monitor");
+  }
+  const monitor = await createRemoteAudioMonitor(mediaTrack, options);
+  const listeners = /* @__PURE__ */ new Set();
+  let currentState = monitor.state;
+  const speakingHandler = (state) => {
+    currentState = state;
+    listeners.forEach((listener) => listener(state));
+  };
+  monitor.events.on("speakingChange", speakingHandler);
+  const errorHandler = (error) => {
+    console.error("Remote audio monitor error", error);
+  };
+  monitor.events.on("error", errorHandler);
+  const controller = {
+    get speaking() {
+      return currentState.speaking;
+    },
+    get levelDb() {
+      return currentState.levelDb;
+    },
+    onChange: (listener) => {
+      listeners.add(listener);
+      listener(currentState);
+      return () => listeners.delete(listener);
+    },
+    setConfig: (config) => {
+      monitor.setConfig(config);
+    },
+    dispose: () => {
+      monitor.events.off("speakingChange", speakingHandler);
+      monitor.events.off("error", errorHandler);
+      listeners.clear();
+      monitor.dispose();
+    }
+  };
+  return controller;
+}
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
+  attachSpeakingDetectionToRemoteTrack,
   attachSpeakingDetectionToTrack
 });

package/dist/livekit/integration.mjs CHANGED Viewed

@@ -1,11 +1,14 @@
 import {
+  attachSpeakingDetectionToRemoteTrack,
   attachSpeakingDetectionToTrack
-} from "../chunk-6BJ4XGSA.mjs";
-import "../chunk-K4J3UUOR.mjs";
-import "../chunk-OZ7KMC4S.mjs";
+} from "../chunk-YQPL2O7D.mjs";
+import "../chunk-BAUJY4Q2.mjs";
 import "../chunk-IS37FHDN.mjs";
-import "../chunk-QNQK6QFB.mjs";
+import "../chunk-7IKKNKM7.mjs";
 import "../chunk-AQ5RVY33.mjs";
+import "../chunk-OZ7KMC4S.mjs";
+import "../chunk-QNQK6QFB.mjs";
 export {
+  attachSpeakingDetectionToRemoteTrack,
   attachSpeakingDetectionToTrack
 };

package/dist/pipeline/audio-pipeline.js CHANGED Viewed

@@ -239,12 +239,12 @@ async function createAudioPipeline(sourceTrack, config = {}) {
   const fullConfig = {
     noiseSuppression: nsConfig,
     speaking: {
-      minDb: config.speaking?.minDb ?? -60,
+      minDb: config.speaking?.minDb ?? -55,
       maxDb: config.speaking?.maxDb ?? -20,
-      speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
+      speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
       speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
-      hangoverMs: config.speaking?.hangoverMs ?? 350,
-      attackMs: config.speaking?.attackMs ?? 50,
+      hangoverMs: config.speaking?.hangoverMs ?? 500,
+      attackMs: config.speaking?.attackMs ?? 100,
       releaseMs: config.speaking?.releaseMs ?? 120
     },
     output: {

package/dist/pipeline/audio-pipeline.mjs CHANGED Viewed

@@ -1,10 +1,10 @@
 import {
   createAudioPipeline
-} from "../chunk-K4J3UUOR.mjs";
-import "../chunk-OZ7KMC4S.mjs";
+} from "../chunk-BAUJY4Q2.mjs";
 import "../chunk-IS37FHDN.mjs";
-import "../chunk-QNQK6QFB.mjs";
 import "../chunk-AQ5RVY33.mjs";
+import "../chunk-OZ7KMC4S.mjs";
+import "../chunk-QNQK6QFB.mjs";
 export {
   createAudioPipeline
 };

package/dist/pipeline/remote-audio-monitor.d.mts ADDED Viewed

@@ -0,0 +1,12 @@
+import { Emitter } from 'mitt';
+import { SpeakingEvents, SpeakingState, RemoteSpeakingOptions } from '../types.mjs';
+interface RemoteAudioMonitorHandle {
+    readonly events: Emitter<SpeakingEvents>;
+    readonly state: SpeakingState;
+    setConfig(config: Partial<RemoteSpeakingOptions>): void;
+    dispose(): void;
+}
+declare function createRemoteAudioMonitor(sourceTrack: MediaStreamTrack, config?: RemoteSpeakingOptions): Promise<RemoteAudioMonitorHandle>;
+export { type RemoteAudioMonitorHandle, createRemoteAudioMonitor };

package/dist/pipeline/remote-audio-monitor.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import { Emitter } from 'mitt';
+import { SpeakingEvents, SpeakingState, RemoteSpeakingOptions } from '../types.js';
+interface RemoteAudioMonitorHandle {
+    readonly events: Emitter<SpeakingEvents>;
+    readonly state: SpeakingState;
+    setConfig(config: Partial<RemoteSpeakingOptions>): void;
+    dispose(): void;
+}
+declare function createRemoteAudioMonitor(sourceTrack: MediaStreamTrack, config?: RemoteSpeakingOptions): Promise<RemoteAudioMonitorHandle>;
+export { type RemoteAudioMonitorHandle, createRemoteAudioMonitor };

package/dist/pipeline/remote-audio-monitor.js ADDED Viewed

@@ -0,0 +1,276 @@
+"use strict";
+var __create = Object.create;
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/pipeline/remote-audio-monitor.ts
+var remote_audio_monitor_exports = {};
+__export(remote_audio_monitor_exports, {
+  createRemoteAudioMonitor: () => createRemoteAudioMonitor
+});
+module.exports = __toCommonJS(remote_audio_monitor_exports);
+var import_mitt = __toESM(require("mitt"));
+// src/context/audio-context.ts
+var sharedContext = null;
+var activePipelines = 0;
+function getAudioContext(options) {
+  if (typeof window === "undefined" || typeof AudioContext === "undefined") {
+    throw new Error(
+      "AudioContext is not supported in this environment (browser only)."
+    );
+  }
+  if (!sharedContext || sharedContext.state === "closed") {
+    sharedContext = new AudioContext(options);
+  }
+  return sharedContext;
+}
+function registerPipeline() {
+  activePipelines++;
+}
+function unregisterPipeline() {
+  activePipelines = Math.max(0, activePipelines - 1);
+}
+// src/vad/vad-node.ts
+function createLevelDetectorWorkletCode(smoothing) {
+  return `
+class LevelDetectorProcessor extends AudioWorkletProcessor {
+  constructor() {
+    super();
+    this.smoothed = 0;
+    this.smoothing = ${smoothing};
+  }
+  process(inputs) {
+    const input = inputs[0];
+    if (!input || input.length === 0) return true;
+    const channel = input[0];
+    if (!channel || channel.length === 0) return true;
+    let sum = 0;
+    for (let i = 0; i < channel.length; i++) {
+      const sample = channel[i];
+      sum += sample * sample;
+    }
+    const rms = Math.sqrt(sum / channel.length);
+    this.smoothed = this.smoothed * this.smoothing + rms * (1 - this.smoothing);
+    const levelDb = 20 * Math.log10(Math.max(1e-8, this.smoothed));
+    this.port.postMessage({ levelDb });
+    return true;
+  }
+}
+registerProcessor('level-detector-processor', LevelDetectorProcessor);
+`;
+}
+async function createLevelDetectorNode(context, onLevel, options) {
+  const smoothing = options?.smoothing ?? 0.9;
+  const workletCode = createLevelDetectorWorkletCode(smoothing);
+  const blob = new Blob([workletCode], { type: "application/javascript" });
+  const url = URL.createObjectURL(blob);
+  try {
+    await context.audioWorklet.addModule(url);
+  } finally {
+    URL.revokeObjectURL(url);
+  }
+  const node = new AudioWorkletNode(context, "level-detector-processor", {
+    numberOfInputs: 1,
+    numberOfOutputs: 0
+  });
+  node.port.onmessage = (event) => {
+    const { levelDb } = event.data ?? {};
+    if (typeof levelDb === "number" && !Number.isNaN(levelDb)) {
+      onLevel(levelDb);
+    }
+  };
+  node.port.onmessageerror = (event) => {
+    console.error("Level detector port error", event);
+  };
+  return {
+    node,
+    dispose: () => {
+      try {
+        node.port.onmessage = null;
+        node.port.close();
+      } catch (error) {
+        console.error("Failed to dispose level detector node", error);
+      }
+    }
+  };
+}
+// src/vad/vad-state.ts
+var LevelBasedVAD = class {
+  config;
+  speaking = false;
+  pendingSpeechSince = null;
+  pendingSilenceSince = null;
+  constructor(config) {
+    this.config = {
+      minDb: config.minDb,
+      maxDb: config.maxDb,
+      speakOnRatio: config.speakOnRatio ?? 0.6,
+      speakOffRatio: config.speakOffRatio ?? 0.3,
+      hangoverMs: config.hangoverMs ?? 350,
+      attackMs: config.attackMs ?? 50,
+      releaseMs: config.releaseMs ?? 120
+    };
+  }
+  updateConfig(config) {
+    this.config = {
+      ...this.config,
+      ...config,
+      speakOnRatio: config.speakOnRatio ?? this.config.speakOnRatio,
+      speakOffRatio: config.speakOffRatio ?? this.config.speakOffRatio,
+      hangoverMs: config.hangoverMs ?? this.config.hangoverMs,
+      attackMs: config.attackMs ?? this.config.attackMs,
+      releaseMs: config.releaseMs ?? this.config.releaseMs
+    };
+  }
+  process(levelDb, timestampMs) {
+    const {
+      minDb,
+      maxDb,
+      speakOnRatio,
+      speakOffRatio,
+      hangoverMs,
+      attackMs,
+      releaseMs
+    } = this.config;
+    const clamped = Math.min(maxDb, Math.max(minDb, levelDb));
+    const norm = (clamped - minDb) / Math.max(1, maxDb - minDb);
+    if (!this.speaking) {
+      if (norm >= speakOnRatio) {
+        this.pendingSpeechSince = this.pendingSpeechSince ?? timestampMs;
+        if (timestampMs - this.pendingSpeechSince >= attackMs) {
+          this.speaking = true;
+          this.pendingSpeechSince = null;
+          this.pendingSilenceSince = null;
+        }
+      } else {
+        this.pendingSpeechSince = null;
+      }
+    } else {
+      if (norm <= speakOffRatio) {
+        this.pendingSilenceSince = this.pendingSilenceSince ?? timestampMs;
+        const releaseWindow = Math.max(releaseMs, hangoverMs);
+        if (timestampMs - this.pendingSilenceSince >= releaseWindow) {
+          this.speaking = false;
+          this.pendingSilenceSince = null;
+          this.pendingSpeechSince = null;
+        }
+      } else {
+        this.pendingSilenceSince = null;
+      }
+    }
+    return {
+      speaking: this.speaking,
+      levelDb: clamped
+    };
+  }
+};
+// src/pipeline/remote-audio-monitor.ts
+async function createRemoteAudioMonitor(sourceTrack, config = {}) {
+  const context = getAudioContext();
+  registerPipeline();
+  const fullConfig = {
+    speaking: {
+      minDb: config.speaking?.minDb ?? -55,
+      maxDb: config.speaking?.maxDb ?? -20,
+      speakOnRatio: config.speaking?.speakOnRatio ?? 0.5,
+      speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
+      hangoverMs: config.speaking?.hangoverMs ?? 500,
+      attackMs: config.speaking?.attackMs ?? 100,
+      releaseMs: config.speaking?.releaseMs ?? 120
+    }
+  };
+  if (!sourceTrack || sourceTrack.kind !== "audio") {
+    throw new Error(
+      "createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
+    );
+  }
+  if (sourceTrack.readyState === "ended") {
+    throw new Error("Cannot create monitor from an ended MediaStreamTrack");
+  }
+  const sourceStream = new MediaStream([sourceTrack]);
+  const sourceNode = context.createMediaStreamSource(sourceStream);
+  const emitter = (0, import_mitt.default)();
+  const vad = new LevelBasedVAD(fullConfig.speaking);
+  let lastState = { speaking: false, levelDb: -Infinity };
+  const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
+    try {
+      const timestamp = context.currentTime * 1e3;
+      const nextState = vad.process(levelDb, timestamp);
+      const speakingChanged = nextState.speaking !== lastState.speaking;
+      const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
+      if (speakingChanged || levelChanged) {
+        lastState = nextState;
+        emitter.emit("speakingChange", nextState);
+      }
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      emitter.emit("error", err);
+    }
+  });
+  sourceNode.connect(levelHandle.node);
+  function dispose() {
+    try {
+      sourceNode.disconnect();
+      levelHandle.node.disconnect();
+      levelHandle.dispose();
+    } catch (error) {
+      console.error("Error during remote monitor disposal", error);
+    } finally {
+      unregisterPipeline();
+    }
+  }
+  const handle = {
+    events: emitter,
+    get state() {
+      return lastState;
+    },
+    setConfig: (next) => {
+      try {
+        if (next.speaking) {
+          vad.updateConfig(next.speaking);
+          fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
+        }
+      } catch (error) {
+        const err = error instanceof Error ? error : new Error(String(error));
+        emitter.emit("error", err);
+      }
+    },
+    dispose
+  };
+  return handle;
+}
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  createRemoteAudioMonitor
+});

package/dist/pipeline/remote-audio-monitor.mjs ADDED Viewed

@@ -0,0 +1,9 @@
+import {
+  createRemoteAudioMonitor
+} from "../chunk-7IKKNKM7.mjs";
+import "../chunk-AQ5RVY33.mjs";
+import "../chunk-OZ7KMC4S.mjs";
+import "../chunk-QNQK6QFB.mjs";
+export {
+  createRemoteAudioMonitor
+};

package/dist/types.d.mts CHANGED Viewed

@@ -29,6 +29,9 @@ interface LivekitSpeakingOptions {
     output?: OutputGainConfig;
     muteWhenSilent?: boolean;
 }
+interface RemoteSpeakingOptions {
+    speaking?: SpeakingDetectionConfig;
+}
 interface SpeakingState {
     speaking: boolean;
     levelDb: number;
@@ -52,4 +55,4 @@ interface SpeakingController {
     dispose(): void;
 }
-export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
+export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };

package/dist/types.d.ts CHANGED Viewed

@@ -29,6 +29,9 @@ interface LivekitSpeakingOptions {
     output?: OutputGainConfig;
     muteWhenSilent?: boolean;
 }
+interface RemoteSpeakingOptions {
+    speaking?: SpeakingDetectionConfig;
+}
 interface SpeakingState {
     speaking: boolean;
     levelDb: number;
@@ -52,4 +55,4 @@ interface SpeakingController {
     dispose(): void;
 }
-export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };
+export type { AudioPipelineHandle, LivekitSpeakingOptions, NoiseSuppressionConfig, OutputGainConfig, RemoteSpeakingOptions, SpeakingController, SpeakingDetectionConfig, SpeakingEvents, SpeakingState };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tensamin/audio",
-  "version": "0.2.0",
+  "version": "0.2.2",
   "author": {
     "email": "aloisianer@proton.me",
     "name": "Alois"