npm - @tensamin/audio - Versions diffs - 0.1.15 → 0.2.1 - Mend

@tensamin/audio 0.1.15 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/README.md +52 -229
package/dist/chunk-AQ5RVY33.mjs +74 -0
package/dist/chunk-BSYE2MWZ.mjs +178 -0
package/dist/chunk-DTIMONGP.mjs +92 -0
package/dist/chunk-IS37FHDN.mjs +33 -0
package/dist/chunk-JBGGED5Q.mjs +129 -0
package/dist/chunk-QNQK6QFB.mjs +71 -0
package/dist/context/audio-context.d.mts +0 -24
package/dist/context/audio-context.d.ts +0 -24
package/dist/index.d.mts +2 -8
package/dist/index.d.ts +2 -8
package/dist/index.js +403 -651
package/dist/index.mjs +11 -43
package/dist/livekit/integration.d.mts +5 -8
package/dist/livekit/integration.d.ts +5 -8
package/dist/livekit/integration.js +401 -598
package/dist/livekit/integration.mjs +10 -8
package/dist/noise-suppression/deepfilternet-node.d.mts +12 -0
package/dist/noise-suppression/deepfilternet-node.d.ts +12 -0
package/dist/noise-suppression/deepfilternet-node.js +57 -0
package/dist/noise-suppression/deepfilternet-node.mjs +6 -0
package/dist/pipeline/audio-pipeline.d.mts +2 -2
package/dist/pipeline/audio-pipeline.d.ts +2 -2
package/dist/pipeline/audio-pipeline.js +219 -529
package/dist/pipeline/audio-pipeline.mjs +4 -5
package/dist/pipeline/remote-audio-monitor.d.mts +12 -0
package/dist/pipeline/remote-audio-monitor.d.ts +12 -0
package/dist/pipeline/remote-audio-monitor.js +276 -0
package/dist/pipeline/remote-audio-monitor.mjs +9 -0
package/dist/types.d.mts +45 -246
package/dist/types.d.ts +45 -246
package/dist/vad/vad-node.d.mts +7 -9
package/dist/vad/vad-node.d.ts +7 -9
package/dist/vad/vad-node.js +47 -134
package/dist/vad/vad-node.mjs +3 -3
package/dist/vad/vad-state.d.mts +9 -11
package/dist/vad/vad-state.d.ts +9 -11
package/dist/vad/vad-state.js +50 -77
package/dist/vad/vad-state.mjs +3 -3
package/package.json +21 -21
package/dist/chunk-GLKAWCEW.mjs +0 -158
package/dist/chunk-KLBA2CPE.mjs +0 -101
package/dist/chunk-QQFKHTCQ.mjs +0 -91
package/dist/chunk-U26F3GJN.mjs +0 -47
package/dist/chunk-WQVMSR7V.mjs +0 -310
package/dist/chunk-XO6B3D4A.mjs +0 -67
package/dist/extensibility/plugins.d.mts +0 -9
package/dist/extensibility/plugins.d.ts +0 -9
package/dist/extensibility/plugins.js +0 -298
package/dist/extensibility/plugins.mjs +0 -14
package/dist/noise-suppression/rnnoise-node.d.mts +0 -10
package/dist/noise-suppression/rnnoise-node.d.ts +0 -10
package/dist/noise-suppression/rnnoise-node.js +0 -101
package/dist/noise-suppression/rnnoise-node.mjs +0 -6

package/README.md CHANGED Viewed

@@ -1,16 +1,17 @@
 # @tensamin/audio
-Audio processing library for the web with RNNoise-based noise suppression and Voice Activity Detection (VAD). Designed for voice communication applications with LiveKit integration support.
+DeepFilterNet3-based noise suppression and realtime speaking detection for LiveKit.
 ## Features
-- Configurable Voice Activity Detection with energy-based algorithm
-- RNNoise noise suppression via `@sapphi-red/web-noise-suppressor`
-- Automatic audio gating based on voice detection
-- Runtime configuration updates
-- LiveKit `LocalAudioTrack` integration
-- Plugin system for custom audio processors
-- Optional dynamic range compression
+- DeepFilterNet3 WASM noise suppression
+- Realtime `speaking` boolean + dB level
+- Automatic mute/unmute for LiveKit tracks
+- Simple min/max dB speaking thresholds
+> [Noise suppression is provided via the `deepfilternet3-noise-filter` package.](https://www.npmjs.com/package/deepfilternet3-noise-filter)
+> [That package is based on DeepFilterNet by Rikorose.](https://github.com/Rikorose/DeepFilterNet)
 ## Installation
@@ -18,259 +19,81 @@ Audio processing library for the web with RNNoise-based noise suppression and Vo
 npm install @tensamin/audio livekit-client
 ```
-## Requirements
-For noise suppression, the following files must be provided:
-- `rnnoise.wasm`
-- `rnnoise_simd.wasm`
-- `worklet.js`
-Available at: `https://unpkg.com/@sapphi-red/web-noise-suppressor@0.3.5/dist/`
-Place these files in a publicly accessible directory (e.g., `public/audio-processor/`).
-## Usage
-### Basic Example
-```ts
-import { createAudioPipeline } from "@tensamin/audio";
-const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-const track = stream.getAudioTracks()[0];
-const pipeline = await createAudioPipeline(track, {
-  noiseSuppression: {
-    enabled: true,
-    wasmUrl: "/audio-processor/rnnoise.wasm",
-    simdUrl: "/audio-processor/rnnoise_simd.wasm",
-    workletUrl: "/audio-processor/worklet.js",
-  },
-  vad: { enabled: true },
-});
-const processedStream = new MediaStream([pipeline.processedTrack]);
-```
-### LiveKit Integration
+## Quick Start (LiveKit)
 ```ts
-import { attachProcessingToTrack } from "@tensamin/audio";
 import { LocalAudioTrack } from "livekit-client";
+import { attachSpeakingDetectionToTrack } from "@tensamin/audio";
 const localTrack = await LocalAudioTrack.create();
-const pipeline = await attachProcessingToTrack(localTrack, {
+const controller = await attachSpeakingDetectionToTrack(localTrack, {
+  speaking: {
+    minDb: -60,
+    maxDb: -20,
+  },
   noiseSuppression: {
     enabled: true,
-    wasmUrl: "/audio-processor/rnnoise.wasm",
-    simdUrl: "/audio-processor/rnnoise_simd.wasm",
-    workletUrl: "/audio-processor/worklet.js",
   },
-  vad: { enabled: true },
-  livekit: { manageTrackMute: true },
+  muteWhenSilent: true,
 });
-await room.localParticipant.publishTrack(localTrack);
-```
-### Monitoring VAD State
-```ts
-pipeline.events.on("vadChange", (state) => {
-  console.log("Speaking:", state.isSpeaking);
-  console.log("Probability:", state.probability);
-  console.log("State:", state.state);
+controller.onChange((state) => {
+  console.log("speaking", state.speaking);
+  console.log("levelDb", state.levelDb);
 });
-```
-## Configuration
-### Voice Activity Detection
-```ts
-vad: {
-  enabled: boolean;
-  startThreshold: number;              // Default: 0.6 (range: 0-1)
-  stopThreshold: number;               // Default: 0.45 (range: 0-1)
-  hangoverMs: number;                  // Default: 400
-  preRollMs: number;                   // Default: 250
-  minSpeechDurationMs: number;         // Default: 100
-  minSilenceDurationMs: number;        // Default: 150
-  energyVad?: {
-    smoothing: number;                 // Default: 0.95
-    initialNoiseFloor: number;         // Default: 0.001
-    minSNR: number;                    // Default: 8.0 (dB)
-    snrRange: number;                  // Default: 12.0 (dB)
-    minEnergy: number;                 // Default: 0.01
-  };
-}
+await room.localParticipant.publishTrack(localTrack);
 ```
-**Threshold Parameters:**
-- `startThreshold`: Probability threshold to unmute audio (Default: 0.8, ~18dB SNR)
-- `stopThreshold`: Probability threshold to mute audio (Default: 0.3, ~13dB SNR)
-- `hangoverMs`: Delay before muting after speech stops (Default: 300ms)
-- `preRollMs`: Audio buffer duration before speech onset
-- `minSpeechDurationMs`: Minimum duration to consider as valid speech (Default: 250ms)
-- `minSilenceDurationMs`: Minimum silence duration between speech segments
-**Energy VAD Parameters:**
+## Configuration
-- `smoothing`: Energy calculation smoothing factor (0-1)
-- `minSNR`: Minimum signal-to-noise ratio in dB for speech detection
-- `snrRange`: Range in dB for probability scaling from minSNR
-- `minEnergy`: Minimum absolute RMS energy to consider as speech (Default: 0.01, ~-40dB)
+All options are passed via `LivekitSpeakingOptions` to `attachSpeakingDetectionToTrack`.
-### Output Control
+### Noise suppression (DeepFilterNet3)
 ```ts
-output: {
-  speechGain: number;                  // Default: 1.0
-  silenceGain: number;                 // Default: 0.0
-  gainRampTime: number;                // Default: 0.015 (seconds)
-  smoothTransitions: boolean;          // Default: true
-  maxGainDb: number;                   // Default: 6.0
-  enableCompression: boolean;          // Default: false
-  compression?: {
-    threshold: number;                 // Default: -24.0 (dB)
-    ratio: number;                     // Default: 3.0
-    attack: number;                    // Default: 0.003 (seconds)
-    release: number;                   // Default: 0.05 (seconds)
+noiseSuppression: {
+  enabled?: boolean;          // default: true
+  noiseReductionLevel?: number; // 0-100, default: 60
+  assetConfig?: {
+    cdnUrl?: string;
   };
 }
 ```
-**Gain Parameters:**
-- `speechGain`: Gain multiplier when speaking (1.0 = unity)
-- `silenceGain`: Gain multiplier when silent (0.0 = mute)
-- `gainRampTime`: Transition duration for gain changes
-- `maxGainDb`: Maximum gain limit to prevent clipping
-**Compression Parameters:**
-- `threshold`: Level above which compression is applied
-- `ratio`: Compression ratio (e.g., 3.0 = 3:1)
-- `attack`: Time to reach full compression
-- `release`: Time to release compression
-### Runtime Configuration Updates
-```ts
-pipeline.setConfig({
-  vad: {
-    startThreshold: 0.7,
-    stopThreshold: 0.55,
-  },
-  output: {
-    speechGain: 1.3,
-  },
-});
-```
-## Configuration Examples
-### Noisy Environment
-```ts
-{
-  vad: {
-    startThreshold: 0.7,
-    stopThreshold: 0.55,
-    minSpeechDurationMs: 150,
-    energyVad: { minSNR: 3.0 }
-  }
-}
-```
-### Quiet Speaker
-```ts
-{
-  vad: {
-    startThreshold: 0.4,
-    stopThreshold: 0.25,
-    energyVad: { minSNR: 1.5 }
-  },
-  output: {
-    speechGain: 1.5
-  }
-}
-```
-### Natural Conversation
+### Speaking detection (dB-based)
 ```ts
-{
-  vad: {
-    startThreshold: 0.5,
-    stopThreshold: 0.3,
-    hangoverMs: 600,
-  },
-  output: {
-    silenceGain: 0.2
-  }
+speaking: {
+  minDb: number;              // e.g. -60
+  maxDb: number;              // e.g. -20
+  speakOnRatio?: number;      // default: 0.6
+  speakOffRatio?: number;     // default: 0.3
+  hangoverMs?: number;        // default: 350
+  attackMs?: number;          // default: 50
+  releaseMs?: number;         // default: 120
 }
 ```
-## API Reference
-### `createAudioPipeline(track, config)`
+`minDb` / `maxDb` define the dynamic range used for level normalization. `speakOnRatio` and `speakOffRatio` (0–1) control when speech starts/stops within that range.
-Creates an audio processing pipeline from a MediaStreamTrack.
+### Output gain control
-**Parameters:**
-- `track`: MediaStreamTrack - Source audio track
-- `config`: AudioProcessingConfig - Configuration object
-**Returns:** `Promise<AudioPipelineHandle>`
-### AudioPipelineHandle
-```ts
-interface AudioPipelineHandle {
-  processedTrack: MediaStreamTrack;
-  events: Emitter<AudioPipelineEvents>;
-  state: VADState;
-  setConfig(config: Partial<AudioProcessingConfig>): void;
-  dispose(): void;
+````ts
+output: {
+  speechGain?: number;        // default: 1.0
+  silenceGain?: number;       // default: 0.0
+  gainRampTime?: number;      // default: 0.015 (s)
+  maxGainDb?: number;         // default: 6.0
+  smoothTransitions?: boolean;// default: true
 }
-```
+``+
-### AudioPipelineEvents
+### LiveKit mute handling
 ```ts
-type AudioPipelineEvents = {
-  vadChange: VADState;
-  error: Error;
-};
-```
-### VADState
-```ts
-interface VADState {
-  isSpeaking: boolean;
-  probability: number;
-  state: "silent" | "speech_starting" | "speaking" | "speech_ending";
-}
-```
-## Default Values
+muteWhenSilent?: boolean;     // default: false
+````
-| Parameter              | Default | Description                      |
-| ---------------------- | ------- | -------------------------------- |
-| `startThreshold`       | 0.6     | Unmute at 60% confidence         |
-| `stopThreshold`        | 0.45    | Mute below 45% confidence        |
-| `hangoverMs`           | 400     | Wait 400ms before muting         |
-| `preRollMs`            | 250     | Buffer 250ms before speech       |
-| `minSpeechDurationMs`  | 100     | Minimum valid speech duration    |
-| `minSilenceDurationMs` | 150     | Minimum silence between speech   |
-| `silenceGain`          | 0.0     | Complete mute when silent        |
-| `speechGain`           | 1.0     | Unity gain when speaking         |
-| `minSNR`               | 2.0     | Voice must be 2x noise floor     |
-| `snrRange`             | 8.0     | Probability scales over SNR 2-10 |
+When `muteWhenSilent` is `true`, the library automatically calls `track.mute()` when silence is detected and `track.unmute()` when speech resumes (only if it muted the track itself).

package/dist/chunk-AQ5RVY33.mjs ADDED Viewed

@@ -0,0 +1,74 @@
+// src/vad/vad-state.ts
+var LevelBasedVAD = class {
+  config;
+  speaking = false;
+  pendingSpeechSince = null;
+  pendingSilenceSince = null;
+  constructor(config) {
+    this.config = {
+      minDb: config.minDb,
+      maxDb: config.maxDb,
+      speakOnRatio: config.speakOnRatio ?? 0.6,
+      speakOffRatio: config.speakOffRatio ?? 0.3,
+      hangoverMs: config.hangoverMs ?? 350,
+      attackMs: config.attackMs ?? 50,
+      releaseMs: config.releaseMs ?? 120
+    };
+  }
+  updateConfig(config) {
+    this.config = {
+      ...this.config,
+      ...config,
+      speakOnRatio: config.speakOnRatio ?? this.config.speakOnRatio,
+      speakOffRatio: config.speakOffRatio ?? this.config.speakOffRatio,
+      hangoverMs: config.hangoverMs ?? this.config.hangoverMs,
+      attackMs: config.attackMs ?? this.config.attackMs,
+      releaseMs: config.releaseMs ?? this.config.releaseMs
+    };
+  }
+  process(levelDb, timestampMs) {
+    const {
+      minDb,
+      maxDb,
+      speakOnRatio,
+      speakOffRatio,
+      hangoverMs,
+      attackMs,
+      releaseMs
+    } = this.config;
+    const clamped = Math.min(maxDb, Math.max(minDb, levelDb));
+    const norm = (clamped - minDb) / Math.max(1, maxDb - minDb);
+    if (!this.speaking) {
+      if (norm >= speakOnRatio) {
+        this.pendingSpeechSince = this.pendingSpeechSince ?? timestampMs;
+        if (timestampMs - this.pendingSpeechSince >= attackMs) {
+          this.speaking = true;
+          this.pendingSpeechSince = null;
+          this.pendingSilenceSince = null;
+        }
+      } else {
+        this.pendingSpeechSince = null;
+      }
+    } else {
+      if (norm <= speakOffRatio) {
+        this.pendingSilenceSince = this.pendingSilenceSince ?? timestampMs;
+        const releaseWindow = Math.max(releaseMs, hangoverMs);
+        if (timestampMs - this.pendingSilenceSince >= releaseWindow) {
+          this.speaking = false;
+          this.pendingSilenceSince = null;
+          this.pendingSpeechSince = null;
+        }
+      } else {
+        this.pendingSilenceSince = null;
+      }
+    }
+    return {
+      speaking: this.speaking,
+      levelDb: clamped
+    };
+  }
+};
+export {
+  LevelBasedVAD
+};

package/dist/chunk-BSYE2MWZ.mjs ADDED Viewed

@@ -0,0 +1,178 @@
+import {
+  createDeepFilterNet3Node
+} from "./chunk-IS37FHDN.mjs";
+import {
+  LevelBasedVAD
+} from "./chunk-AQ5RVY33.mjs";
+import {
+  getAudioContext,
+  registerPipeline,
+  unregisterPipeline
+} from "./chunk-OZ7KMC4S.mjs";
+import {
+  createLevelDetectorNode
+} from "./chunk-QNQK6QFB.mjs";
+// src/pipeline/audio-pipeline.ts
+import mitt from "mitt";
+async function createAudioPipeline(sourceTrack, config = {}) {
+  const context = getAudioContext();
+  registerPipeline();
+  const nsConfig = {
+    enabled: config.noiseSuppression?.enabled ?? true,
+    noiseReductionLevel: config.noiseSuppression?.noiseReductionLevel ?? 60
+  };
+  if (config.noiseSuppression?.assetConfig) {
+    nsConfig.assetConfig = config.noiseSuppression.assetConfig;
+  }
+  const fullConfig = {
+    noiseSuppression: nsConfig,
+    speaking: {
+      minDb: config.speaking?.minDb ?? -60,
+      maxDb: config.speaking?.maxDb ?? -20,
+      speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
+      speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
+      hangoverMs: config.speaking?.hangoverMs ?? 350,
+      attackMs: config.speaking?.attackMs ?? 50,
+      releaseMs: config.speaking?.releaseMs ?? 120
+    },
+    output: {
+      speechGain: config.output?.speechGain ?? 1,
+      silenceGain: config.output?.silenceGain ?? 0,
+      gainRampTime: config.output?.gainRampTime ?? 0.015,
+      maxGainDb: config.output?.maxGainDb ?? 6,
+      smoothTransitions: config.output?.smoothTransitions ?? true
+    },
+    muteWhenSilent: config.muteWhenSilent ?? false
+  };
+  if (!sourceTrack || sourceTrack.kind !== "audio") {
+    throw new Error(
+      "createAudioPipeline requires a valid audio MediaStreamTrack"
+    );
+  }
+  if (sourceTrack.readyState === "ended") {
+    throw new Error("Cannot create pipeline from an ended MediaStreamTrack");
+  }
+  const sourceStream = new MediaStream([sourceTrack]);
+  const sourceNode = context.createMediaStreamSource(sourceStream);
+  const emitter = mitt();
+  const vad = new LevelBasedVAD(fullConfig.speaking);
+  let lastState = { speaking: false, levelDb: -Infinity };
+  const nsHandle = await createDeepFilterNet3Node(
+    context,
+    fullConfig.noiseSuppression
+  );
+  const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
+    try {
+      const timestamp = context.currentTime * 1e3;
+      const nextState = vad.process(levelDb, timestamp);
+      const speakingChanged = nextState.speaking !== lastState.speaking;
+      const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
+      if (speakingChanged || levelChanged) {
+        lastState = nextState;
+        updateGain(nextState);
+        emitter.emit("speakingChange", nextState);
+      }
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      emitter.emit("error", err);
+    }
+  });
+  const splitter = context.createGain();
+  sourceNode.connect(nsHandle.node);
+  nsHandle.node.connect(splitter);
+  splitter.connect(levelHandle.node);
+  const gainNode = context.createGain();
+  gainNode.gain.value = fullConfig.output?.silenceGain ?? 0;
+  splitter.connect(gainNode);
+  const destination = context.createMediaStreamDestination();
+  gainNode.connect(destination);
+  function updateGain(state) {
+    const {
+      speechGain = 1,
+      silenceGain = 0,
+      gainRampTime = 0.015,
+      smoothTransitions = true,
+      maxGainDb = 6
+    } = fullConfig.output ?? {};
+    const maxGainLinear = Math.pow(10, maxGainDb / 20);
+    const limitedSpeechGain = Math.min(speechGain ?? 1, maxGainLinear);
+    const target = state.speaking ? limitedSpeechGain : silenceGain ?? 0;
+    const now = context.currentTime;
+    gainNode.gain.cancelScheduledValues(now);
+    gainNode.gain.setValueAtTime(gainNode.gain.value, now);
+    if (smoothTransitions) {
+      gainNode.gain.setTargetAtTime(target, now, gainRampTime / 3);
+    } else {
+      gainNode.gain.setValueAtTime(target, now);
+    }
+  }
+  const audioTracks = destination.stream.getAudioTracks();
+  if (audioTracks.length === 0) {
+    nsHandle.dispose();
+    levelHandle.dispose();
+    unregisterPipeline();
+    throw new Error("Failed to create processed audio track");
+  }
+  const processedTrack = audioTracks[0];
+  function dispose() {
+    try {
+      sourceNode.disconnect();
+      nsHandle.node.disconnect();
+      splitter.disconnect();
+      levelHandle.node.disconnect();
+      gainNode.disconnect();
+      destination.stream.getTracks().forEach((t) => t.stop());
+      levelHandle.dispose();
+      nsHandle.dispose();
+    } catch (error) {
+      console.error("Error during pipeline disposal", error);
+    } finally {
+      unregisterPipeline();
+    }
+  }
+  const handle = {
+    processedTrack,
+    events: emitter,
+    get state() {
+      return lastState;
+    },
+    setConfig: (next) => {
+      try {
+        if (next.speaking) {
+          vad.updateConfig(next.speaking);
+          fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
+        }
+        if (next.output) {
+          fullConfig.output = { ...fullConfig.output, ...next.output };
+          updateGain(lastState);
+        }
+        if (next.noiseSuppression) {
+          const ns = next.noiseSuppression;
+          fullConfig.noiseSuppression = {
+            ...fullConfig.noiseSuppression,
+            ...ns
+          };
+          if (typeof ns.noiseReductionLevel === "number") {
+            nsHandle.processor.setSuppressionLevel(ns.noiseReductionLevel);
+          }
+          if (typeof ns.enabled === "boolean") {
+            nsHandle.processor.setNoiseSuppressionEnabled(ns.enabled);
+          }
+        }
+        if (typeof next.muteWhenSilent === "boolean") {
+          fullConfig.muteWhenSilent = next.muteWhenSilent;
+        }
+      } catch (error) {
+        const err = error instanceof Error ? error : new Error(String(error));
+        emitter.emit("error", err);
+      }
+    },
+    dispose
+  };
+  return handle;
+}
+export {
+  createAudioPipeline
+};

package/dist/chunk-DTIMONGP.mjs ADDED Viewed

@@ -0,0 +1,92 @@
+import {
+  LevelBasedVAD
+} from "./chunk-AQ5RVY33.mjs";
+import {
+  getAudioContext,
+  registerPipeline,
+  unregisterPipeline
+} from "./chunk-OZ7KMC4S.mjs";
+import {
+  createLevelDetectorNode
+} from "./chunk-QNQK6QFB.mjs";
+// src/pipeline/remote-audio-monitor.ts
+import mitt from "mitt";
+async function createRemoteAudioMonitor(sourceTrack, config = {}) {
+  const context = getAudioContext();
+  registerPipeline();
+  const fullConfig = {
+    speaking: {
+      minDb: config.speaking?.minDb ?? -60,
+      maxDb: config.speaking?.maxDb ?? -20,
+      speakOnRatio: config.speaking?.speakOnRatio ?? 0.6,
+      speakOffRatio: config.speaking?.speakOffRatio ?? 0.3,
+      hangoverMs: config.speaking?.hangoverMs ?? 350,
+      attackMs: config.speaking?.attackMs ?? 50,
+      releaseMs: config.speaking?.releaseMs ?? 120
+    }
+  };
+  if (!sourceTrack || sourceTrack.kind !== "audio") {
+    throw new Error(
+      "createRemoteAudioMonitor requires a valid audio MediaStreamTrack"
+    );
+  }
+  if (sourceTrack.readyState === "ended") {
+    throw new Error("Cannot create monitor from an ended MediaStreamTrack");
+  }
+  const sourceStream = new MediaStream([sourceTrack]);
+  const sourceNode = context.createMediaStreamSource(sourceStream);
+  const emitter = mitt();
+  const vad = new LevelBasedVAD(fullConfig.speaking);
+  let lastState = { speaking: false, levelDb: -Infinity };
+  const levelHandle = await createLevelDetectorNode(context, (levelDb) => {
+    try {
+      const timestamp = context.currentTime * 1e3;
+      const nextState = vad.process(levelDb, timestamp);
+      const speakingChanged = nextState.speaking !== lastState.speaking;
+      const levelChanged = Math.abs(nextState.levelDb - lastState.levelDb) > 0.5;
+      if (speakingChanged || levelChanged) {
+        lastState = nextState;
+        emitter.emit("speakingChange", nextState);
+      }
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      emitter.emit("error", err);
+    }
+  });
+  sourceNode.connect(levelHandle.node);
+  function dispose() {
+    try {
+      sourceNode.disconnect();
+      levelHandle.node.disconnect();
+      levelHandle.dispose();
+    } catch (error) {
+      console.error("Error during remote monitor disposal", error);
+    } finally {
+      unregisterPipeline();
+    }
+  }
+  const handle = {
+    events: emitter,
+    get state() {
+      return lastState;
+    },
+    setConfig: (next) => {
+      try {
+        if (next.speaking) {
+          vad.updateConfig(next.speaking);
+          fullConfig.speaking = { ...fullConfig.speaking, ...next.speaking };
+        }
+      } catch (error) {
+        const err = error instanceof Error ? error : new Error(String(error));
+        emitter.emit("error", err);
+      }
+    },
+    dispose
+  };
+  return handle;
+}
+export {
+  createRemoteAudioMonitor
+};