npm - omnivad - Versions diffs - 0.2.5 → 0.2.9 - Mend

omnivad 0.2.5 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,184 @@
+# omnivad
+[![npm](https://img.shields.io/npm/v/omnivad)](https://www.npmjs.com/package/omnivad)
+[![npm bundle size](https://img.shields.io/bundlephobia/min/omnivad)](https://bundlephobia.com/package/omnivad)
+[![license](https://img.shields.io/npm/l/omnivad)](https://github.com/lifeiteng/OmniVAD-Kit/blob/main/LICENSE)
+Cross-platform Voice Activity Detection and Audio Event Detection via WebAssembly.
+Runs in **browsers, Web Workers, and Node.js** with a single API. Zero runtime
+dependencies. Built on [FireRedVAD](https://github.com/FireRedTeam/FireRedVAD)
+from Xiaohongshu (DFSMN architecture, ~2.2 MB per model).
+## What's in the box
+| Class | Use case | Output |
+|-------|----------|--------|
+| **`OmniVAD`** | Whole-audio voice activity detection | `[start, end]` timestamps |
+| **`OmniStreamVAD`** | Real-time, frame-by-frame VAD with segment-boundary events | per-frame probability + start/end events |
+| **`OmniAED`** | Audio event detection (3-class) | `speech` / `singing` / `music` timestamps |
+| **`mergeChunks`** | Pack VAD output into Whisper-style 30 s chunks | `{ start, end, segStartIdx, segCount }[]` |
+All four share one WASM module (~2.2 MB SIMD-enabled), one C implementation,
+and a single bundle (~24 KB JS, ESM + CJS + types).
+## Install
+```bash
+pnpm add omnivad     # or: npm install omnivad / yarn add omnivad
+```
+Models are served from jsDelivr by default (zero config). For air-gapped or
+custom deployments, pass `modelUrl` or pre-loaded `modelData`.
+## Quickstart — whole-audio VAD
+```ts
+import { OmniVAD } from "omnivad";
+const vad = await OmniVAD.create();
+// Float32Array in [-1, 1] (Web Audio, decodeAudioData) or Int16Array (raw PCM)
+const result = vad.detect(audioFloat32);
+// { duration: 12.4, timestamps: [[0.35, 4.8], [5.1, 12.4]] }
+```
+## Streaming VAD — real-time, frame-by-frame
+`OmniStreamVAD` processes 10 ms frames (160 int16 samples @ 16 kHz) and emits
+segment-boundary events on the same call that confirms the boundary —
+bit-identical to upstream FireRedVAD's `FireRedStreamVad`.
+```ts
+import { OmniStreamVAD } from "omnivad";
+const vad = await OmniStreamVAD.create();
+for (let i = 0; i + 160 <= pcm.length; i += 160) {
+  const r = vad.processFrame(pcm.subarray(i, i + 160));
+  if (!r) continue;
+  if (r.isSpeechStart) console.log(`START @ ${(r.speechStartFrame * 0.01).toFixed(2)}s`);
+  if (r.isSpeechEnd)   console.log(`END   @ ${(r.speechEndFrame   * 0.01).toFixed(2)}s`);
+}
+```
+`processFrame()` returns `{ confidence, smoothedProb, isSpeech, isSpeechStart,
+isSpeechEnd, frameIdx, speechStartFrame, speechEndFrame }` — every field comes
+straight from the C state machine.
+## Audio Event Detection — speech / singing / music
+```ts
+import { OmniAED } from "omnivad";
+const aed = await OmniAED.create();
+const events = aed.detect(audioFloat32);
+// { duration: 22.0,
+//   events: { speech: [[...]], singing: [[...]], music: [[...]] },
+//   ratios: { speech: 0.41, singing: 0.0, music: 0.59 } }
+```
+## Whisper / WhisperX-style chunking
+`OmniVAD` + `mergeChunks(mode: "greedy")` is the 1:1 equivalent of WhisperX's
+`Binarize(max_duration=chunk_size)` + greedy packing. Use this recipe when
+feeding chunks into Whisper-family ASR models that expect a fixed 30 s window:
+```ts
+import { OmniVAD, mergeChunks } from "omnivad";
+const vad = await OmniVAD.create();                 // threshold=0.4 default — safer for Whisper
+const result = vad.detect(audioFloat32);
+const chunks = await mergeChunks(result.timestamps, {
+  maxChunkSecs:    30.0,                            // Whisper input window
+  mode:            "greedy",                        // WhisperX behavior
+  padOnsetSecs:    0.04,
+  padOffsetSecs:   0.04,
+  minSilenceSecs:  0.20,
+});
+// Slice the audio at [chunk.start, chunk.end] and feed each slice to Whisper.
+```
+A second mode `"longest_gap"` exists for variable-length-input models
+(forced alignment, TTS) — see the GitHub README for the comparison table.
+## Multi-stream concurrency
+`OmniStreamVAD` instances have mutable per-stream state and **must not** be
+shared across concurrent streams. Use `clone()` to spin up a fresh instance
+that shares the underlying model weights but has its own state — instant,
+near-zero memory overhead per stream.
+```ts
+const base = await OmniStreamVAD.create();
+const streamA = base.clone();
+const streamB = base.clone();
+// Process two independent audio sessions in parallel.
+```
+## Models and CDN
+By default, models are fetched from jsDelivr:
+```
+https://cdn.jsdelivr.net/npm/omnivad@<version>/models/{vad,stream-vad,aed}.omnivad
+```
+Override per call when you need to host them yourself or pre-bundle:
+```ts
+const vad = await OmniVAD.create({
+  modelUrl: "https://your-cdn/vad.omnivad",   // or
+  modelData: arrayBufferYouAlreadyHave,
+});
+```
+In Node.js, models are read from the installed package (`omnivad/models/`) — no
+network access required at runtime.
+## Performance
+Real-Time Factor (lower = faster) on Apple M-series:
+| Model | RTF | Speed |
+|-------|-----|-------|
+| VAD | ~0.003 | ~330× real-time |
+| Streaming VAD | ~0.002 | ~500× real-time |
+| AED | ~0.002 | ~500× real-time |
+WASM is built with SIMD enabled and ncnn fp16 weights.
+## Accuracy
+Verified bit-identical to upstream PyTorch reference on 5 audio files × 3
+models — see the [accuracy table](https://github.com/lifeiteng/OmniVAD-Kit#testing)
+in the main repo.
+## Browser, Worker, Node — same API
+The package detects its runtime and loads the right glue:
+- **Browsers (main thread)** — classic-script injection of the Emscripten glue
+  (works around `MODULARIZE=1` IIFE issues with `import()`).
+- **Web Workers / ServiceWorkers** — same path via `importScripts`.
+- **Node.js (≥ 18)** — `createRequire` + local CJS resolution. No bundler
+  config needed.
+## See also
+- Full documentation, accuracy tables, C/C++ API, Python package, native build:
+  [GitHub repository](https://github.com/lifeiteng/OmniVAD-Kit)
+- [中文 README](https://github.com/lifeiteng/OmniVAD-Kit/blob/main/README.zh.md)
+- [Local development guide](https://github.com/lifeiteng/OmniVAD-Kit#local-development)
+## Credits
+- [**FireRedVAD**](https://github.com/FireRedTeam/FireRedVAD) — Kaituo Xu,
+  Wenpeng Li, Kai Huang, Kun Liu (Xiaohongshu). Source models, DFSMN
+  architecture, training pipeline.
+- [ncnn](https://github.com/Tencent/ncnn) — Tencent. Inference backend.
+- [Emscripten](https://emscripten.org/) — WebAssembly toolchain.
+## License
+Apache-2.0 — same as upstream FireRedVAD.

package/dist/index.cjs CHANGED Viewed

@@ -4,12 +4,41 @@ var _documentCurrentScript = typeof document !== 'undefined' ? document.currentS
 // src/wasm-binding.ts
 var _module = null;
 var _loading = null;
+function loadScript(url) {
+  if (typeof globalThis.document === "undefined") {
+    return new Promise((resolve, reject) => {
+      try {
+        const importScripts = globalThis.importScripts;
+        if (typeof importScripts !== "function") {
+          throw new Error(
+            "omnivad: cannot load glue script \u2014 no document and no importScripts"
+          );
+        }
+        importScripts(url);
+        resolve();
+      } catch (err) {
+        reject(err instanceof Error ? err : new Error(String(err)));
+      }
+    });
+  }
+  return new Promise((resolve, reject) => {
+    const s = globalThis.document.createElement("script");
+    s.src = url;
+    s.async = true;
+    s.crossOrigin = "anonymous";
+    s.onload = () => resolve();
+    s.onerror = () => reject(new Error(`Failed to load omnivad glue script: ${url}`));
+    globalThis.document.head.appendChild(s);
+  });
+}
 var SIZEOF_POST_CONFIG = 28;
 var SIZEOF_AED_POST_CONFIG = 3 * SIZEOF_POST_CONFIG;
 var SIZEOF_SEGMENT = 8;
 var SIZEOF_AED_SEGMENT = 16;
+var SIZEOF_CHUNK_CONFIG = 28;
+var SIZEOF_CHUNK = 16;
 var OMNI_ERR_NO_FRAMES = -7;
-var VERSION = "0.2.5";
+var VERSION = "0.2.9";
 var DEFAULT_CDN_BASE = `https://cdn.jsdelivr.net/npm/omnivad@${VERSION}/models`;
 var MODEL_FILES = {
   vad: "vad.omnivad",
@@ -25,22 +54,41 @@ async function initWasm(wasmLocator) {
     if (typeof globalThis.process?.versions?.node === "string") {
       const { createRequire } = await import(
         /* webpackIgnore: true */
+        /* turbopackIgnore: true */
         'module'
       );
-      const { dirname, join } = await import('path');
+      const { dirname, join } = await import(
+        /* webpackIgnore: true */
+        /* turbopackIgnore: true */
+        'path'
+      );
       const req = createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.cjs', document.baseURI).href)));
       const gluePath = req.resolve("../dist/wasm/omnivad.cjs");
       const wasmDir = dirname(gluePath);
       createOmniVAD = req(gluePath);
       defaultLocateFile = (filename) => join(wasmDir, filename);
     } else {
-      const glueUrl = new URL("../dist/wasm/omnivad.js", (typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.cjs', document.baseURI).href)));
-      const mod = await import(
-        /* webpackIgnore: true */
-        glueUrl.href
-      );
-      createOmniVAD = mod.default || mod;
-      const wasmBaseUrl = new URL("./", glueUrl);
+      let glueUrlStr;
+      if (wasmLocator) {
+        glueUrlStr = wasmLocator("omnivad.js");
+      } else {
+        glueUrlStr = new URL("../dist/wasm/omnivad.js", (typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.cjs', document.baseURI).href))).href;
+      }
+      const g = globalThis;
+      let factory = g.createOmniVAD;
+      if (typeof factory !== "function") {
+        await loadScript(glueUrlStr);
+        factory = g.createOmniVAD;
+      }
+      if (typeof factory !== "function") {
+        throw new Error(
+          `omnivad.js loaded from ${glueUrlStr} but globalThis.createOmniVAD is missing`
+        );
+      }
+      createOmniVAD = factory;
+      const baseHref = typeof globalThis.location !== "undefined" ? globalThis.location.href : "file:///";
+      const absGlue = new URL(glueUrlStr, baseHref);
+      const wasmBaseUrl = new URL("./", absGlue);
       defaultLocateFile = (filename) => new URL(filename, wasmBaseUrl).toString();
     }
     const opts = {};
@@ -64,10 +112,19 @@ async function loadModel(modelType, modelUrl, modelData) {
   if (typeof globalThis.process?.versions?.node === "string") {
     const { createRequire } = await import(
       /* webpackIgnore: true */
+      /* turbopackIgnore: true */
       'module'
     );
-    const { dirname, join } = await import('path');
-    const { readFile } = await import('fs/promises');
+    const { dirname, join } = await import(
+      /* webpackIgnore: true */
+      /* turbopackIgnore: true */
+      'path'
+    );
+    const { readFile } = await import(
+      /* webpackIgnore: true */
+      /* turbopackIgnore: true */
+      'fs/promises'
+    );
     const req = createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.cjs', document.baseURI).href)));
     const pkgDir = dirname(req.resolve("../package.json"));
     const modelPath = join(pkgDir, "models", filename);
@@ -120,10 +177,86 @@ var DEFAULT_VAD_CONFIG = {
   smoothWindowSize: 5,
   minSpeechFrames: 20,
   minSilenceFrames: 20,
-  maxSpeechFrames: 2e3,
+  maxSpeechFrames: 3e3,
   mergeSilenceFrames: 0,
   extendSpeechFrames: 0
 };
+var OMNI_CHUNK_GREEDY = 0;
+var OMNI_CHUNK_LONGEST_GAP = 1;
+var DEFAULT_CHUNK_CONFIG = {
+  maxChunkSecs: 30,
+  maxGapSecs: Infinity,
+  padOnsetSecs: 0.04,
+  padOffsetSecs: 0.04,
+  minSpeechSecs: 0,
+  minSilenceSecs: 0.2,
+  // matches VAD minSilenceFrames=20 @ 10ms shift
+  mode: "greedy"
+};
+function modeToInt(m) {
+  switch (m) {
+    case "greedy":
+      return OMNI_CHUNK_GREEDY;
+    case "longest_gap":
+      return OMNI_CHUNK_LONGEST_GAP;
+    default:
+      throw new Error(`Unknown chunking mode: ${String(m)}`);
+  }
+}
+function writeChunkConfig(M, ptr, cfg) {
+  M.setValue(ptr + 0, cfg.maxChunkSecs, "float");
+  M.setValue(ptr + 4, cfg.maxGapSecs, "float");
+  M.setValue(ptr + 8, cfg.padOnsetSecs, "float");
+  M.setValue(ptr + 12, cfg.padOffsetSecs, "float");
+  M.setValue(ptr + 16, cfg.minSpeechSecs, "float");
+  M.setValue(ptr + 20, cfg.minSilenceSecs, "float");
+  M.setValue(ptr + 24, modeToInt(cfg.mode), "i32");
+}
+function chunkMerge(M, segments, config) {
+  const numSegments = segments.length;
+  const segPtr = numSegments > 0 ? M._malloc(numSegments * SIZEOF_SEGMENT) : 0;
+  const cfgPtr = M._malloc(SIZEOF_CHUNK_CONFIG);
+  const outPtrPtr = M._malloc(4);
+  const outCountPtr = M._malloc(4);
+  try {
+    for (let i = 0; i < numSegments; i++) {
+      const base = segPtr + i * SIZEOF_SEGMENT;
+      M.setValue(base + 0, segments[i][0], "float");
+      M.setValue(base + 4, segments[i][1], "float");
+    }
+    writeChunkConfig(M, cfgPtr, config);
+    M.setValue(outPtrPtr, 0, "i32");
+    M.setValue(outCountPtr, 0, "i32");
+    const rc = M.ccall(
+      "omni_merge_chunks",
+      "number",
+      ["number", "number", "number", "number", "number"],
+      [segPtr, numSegments, cfgPtr, outPtrPtr, outCountPtr]
+    );
+    if (rc !== 0) {
+      throw new Error(`omni_merge_chunks failed: ${readNativeError(M, rc)}`);
+    }
+    const count = M.getValue(outCountPtr, "i32");
+    const chunkPtr = M.getValue(outPtrPtr, "i32");
+    const chunks = [];
+    for (let i = 0; i < count; i++) {
+      const base = chunkPtr + i * SIZEOF_CHUNK;
+      chunks.push({
+        start: M.getValue(base + 0, "float"),
+        end: M.getValue(base + 4, "float"),
+        segStartIdx: M.getValue(base + 8, "i32"),
+        segCount: M.getValue(base + 12, "i32")
+      });
+    }
+    if (chunkPtr) M._free(chunkPtr);
+    return chunks;
+  } finally {
+    if (segPtr) M._free(segPtr);
+    M._free(cfgPtr);
+    M._free(outPtrPtr);
+    M._free(outCountPtr);
+  }
+}
 function vadCreate(M, modelBuffer) {
   const bytes = new Uint8Array(modelBuffer);
   const ptr = M._malloc(bytes.length);
@@ -228,24 +361,49 @@ function aedDetect(M, handle, audioPtr, numSamples, cfg, format = "f32") {
 function aedDestroy(M, handle) {
   M.ccall("omni_aed_destroy", null, ["number"], [handle]);
 }
-function streamVadCreate(M, modelBuffer, threshold = 0.5) {
+var DEFAULT_STREAM_VAD_CONFIG = {
+  threshold: 0.5,
+  smoothWindowSize: 5,
+  padStartFrame: 5,
+  minSpeechFrame: 8,
+  maxSpeechFrame: 2e3,
+  minSilenceFrame: 20
+};
+var SIZEOF_STREAM_VAD_CONFIG = 24;
+function writeStreamVadConfig(M, ptr, cfg) {
+  M.setValue(ptr + 0, cfg.threshold, "float");
+  M.setValue(ptr + 4, cfg.smoothWindowSize, "i32");
+  M.setValue(ptr + 8, cfg.padStartFrame, "i32");
+  M.setValue(ptr + 12, cfg.minSpeechFrame, "i32");
+  M.setValue(ptr + 16, cfg.maxSpeechFrame, "i32");
+  M.setValue(ptr + 20, cfg.minSilenceFrame, "i32");
+}
+function streamVadCreate(M, modelBuffer, config = {}) {
+  const overrides = Object.fromEntries(
+    Object.entries(config).filter(([, v]) => v !== void 0)
+  );
+  const cfg = { ...DEFAULT_STREAM_VAD_CONFIG, ...overrides };
   const bytes = new Uint8Array(modelBuffer);
-  const ptr = M._malloc(bytes.length);
-  M.HEAPU8.set(bytes, ptr);
+  const dataPtr = M._malloc(bytes.length);
+  M.HEAPU8.set(bytes, dataPtr);
+  const cfgPtr = M._malloc(SIZEOF_STREAM_VAD_CONFIG);
   try {
+    writeStreamVadConfig(M, cfgPtr, cfg);
     return createModel(
       M,
       "omni_stream_vad_create_from_buffer",
       ["number", "number", "number"],
-      [ptr, bytes.length, threshold],
+      [dataPtr, bytes.length, cfgPtr],
       "StreamVAD"
     );
   } finally {
-    M._free(ptr);
+    M._free(dataPtr);
+    M._free(cfgPtr);
   }
 }
+var SIZEOF_STREAM_VAD_RESULT = 24;
 function streamVadProcess(M, handle, pcm16Ptr, numSamples) {
-  const resultPtr = M._malloc(12);
+  const resultPtr = M._malloc(SIZEOF_STREAM_VAD_RESULT);
   try {
     const ret = M.ccall(
       "omni_stream_vad_process",
@@ -256,9 +414,14 @@ function streamVadProcess(M, handle, pcm16Ptr, numSamples) {
     if (ret === OMNI_ERR_NO_FRAMES) return null;
     if (ret !== 0) throw new Error(`StreamVAD process failed: ${ret}`);
     return {
-      confidence: M.getValue(resultPtr, "float"),
-      isSpeech: M.getValue(resultPtr + 4, "i8") !== 0,
-      frameOffset: M.getValue(resultPtr + 8, "i32")
+      confidence: M.getValue(resultPtr + 0, "float"),
+      smoothedProb: M.getValue(resultPtr + 4, "float"),
+      isSpeech: M.getValue(resultPtr + 8, "i8") !== 0,
+      isSpeechStart: M.getValue(resultPtr + 9, "i8") !== 0,
+      isSpeechEnd: M.getValue(resultPtr + 10, "i8") !== 0,
+      frameIdx: M.getValue(resultPtr + 12, "i32"),
+      speechStartFrame: M.getValue(resultPtr + 16, "i32"),
+      speechEndFrame: M.getValue(resultPtr + 20, "i32")
     };
   } finally {
     M._free(resultPtr);
@@ -357,8 +520,6 @@ function int16ToNormalizedFloat32(i16) {
 var SAMPLE_RATE2 = 16e3;
 var OmniStreamVAD = class _OmniStreamVAD {
   constructor(handle) {
-    this.inSpeech = false;
-    this.speechStartFrame = 0;
     this.handle = handle;
   }
   /**
@@ -369,8 +530,14 @@ var OmniStreamVAD = class _OmniStreamVAD {
     await initWasm();
     const M = getModule();
     const modelBuffer = await loadModel("stream-vad", options.modelUrl, options.modelData);
-    const threshold = options.speechThreshold ?? 0.5;
-    const handle = streamVadCreate(M, modelBuffer, threshold);
+    const handle = streamVadCreate(M, modelBuffer, {
+      threshold: options.threshold,
+      smoothWindowSize: options.smoothWindowSize,
+      padStartFrame: options.padStartFrame,
+      minSpeechFrame: options.minSpeechFrame,
+      maxSpeechFrame: options.maxSpeechFrame,
+      minSilenceFrame: options.minSilenceFrame
+    });
     return new _OmniStreamVAD(handle);
   }
   /**
@@ -388,6 +555,10 @@ var OmniStreamVAD = class _OmniStreamVAD {
   /**
    * Process one frame of audio (160 int16 samples = 10ms @ 16kHz).
    * Returns null until enough audio is accumulated.
+   *
+   * Segment-boundary events (isSpeechStart / isSpeechEnd and the matching
+   * speech_*_frame indices) come straight from the C-layer state machine
+   * (bit-identical to upstream FireRedVAD) — the wrapper is just a marshaller.
    */
   processFrame(pcm160) {
     const M = getModule();
@@ -396,28 +567,16 @@ var OmniStreamVAD = class _OmniStreamVAD {
     heap16.set(pcm160);
     try {
       const result = streamVadProcess(M, this.handle, ptr, pcm160.length);
-      if (!result || result.frameOffset === 0) return null;
-      const frameIndex = result.frameOffset;
-      const isSpeechStart = result.isSpeech && !this.inSpeech;
-      const isSpeechEnd = !result.isSpeech && this.inSpeech;
-      if (isSpeechStart) {
-        this.speechStartFrame = frameIndex;
-      }
-      const activeSpeechStartFrame = isSpeechEnd ? this.speechStartFrame : result.isSpeech ? this.speechStartFrame : 0;
-      const speechEndFrame = isSpeechEnd ? Math.max(1, frameIndex - 1) : 0;
-      this.inSpeech = result.isSpeech;
-      if (isSpeechEnd) {
-        this.speechStartFrame = 0;
-      }
+      if (!result) return null;
       return {
         confidence: result.confidence,
-        smoothedConfidence: result.confidence,
+        smoothedProb: result.smoothedProb,
         isSpeech: result.isSpeech,
-        frameIndex,
-        isSpeechStart,
-        isSpeechEnd,
-        speechStartFrame: activeSpeechStartFrame,
-        speechEndFrame
+        frameIndex: result.frameIdx,
+        isSpeechStart: result.isSpeechStart,
+        isSpeechEnd: result.isSpeechEnd,
+        speechStartFrame: result.speechStartFrame,
+        speechEndFrame: result.speechEndFrame
       };
     } finally {
       M._free(ptr);
@@ -456,11 +615,9 @@ var OmniStreamVAD = class _OmniStreamVAD {
       M._free(framesPtr);
     }
   }
-  /** Reset all internal state. */
+  /** Reset all internal state (model cache, audio buffer, postprocessor). */
   reset() {
     streamVadReset(getModule(), this.handle);
-    this.inSpeech = false;
-    this.speechStartFrame = 0;
   }
   /** Release native resources. */
   dispose() {
@@ -468,8 +625,6 @@ var OmniStreamVAD = class _OmniStreamVAD {
       streamVadDestroy(getModule(), this.handle);
       this.handle = 0;
     }
-    this.inSpeech = false;
-    this.speechStartFrame = 0;
   }
 };
 function int16ToFloat32(i16) {
@@ -583,7 +738,30 @@ function computeCoverageRatios(events, duration) {
   return ratios;
 }
+// src/chunking.ts
+async function mergeChunks(segments, options = {}) {
+  await initWasm();
+  const M = getModule();
+  const cfg = {
+    maxChunkSecs: options.maxChunkSecs ?? DEFAULT_CHUNK_CONFIG.maxChunkSecs,
+    maxGapSecs: options.maxGapSecs ?? DEFAULT_CHUNK_CONFIG.maxGapSecs,
+    padOnsetSecs: options.padOnsetSecs ?? DEFAULT_CHUNK_CONFIG.padOnsetSecs,
+    padOffsetSecs: options.padOffsetSecs ?? DEFAULT_CHUNK_CONFIG.padOffsetSecs,
+    minSpeechSecs: options.minSpeechSecs ?? DEFAULT_CHUNK_CONFIG.minSpeechSecs,
+    minSilenceSecs: options.minSilenceSecs ?? DEFAULT_CHUNK_CONFIG.minSilenceSecs,
+    mode: options.mode ?? DEFAULT_CHUNK_CONFIG.mode
+  };
+  const records = chunkMerge(M, segments, cfg);
+  return records.map((r) => ({
+    start: r.start,
+    end: r.end,
+    segStartIdx: r.segStartIdx,
+    segCount: r.segCount
+  }));
+}
 exports.DEFAULT_CDN_BASE = DEFAULT_CDN_BASE;
+exports.DEFAULT_CHUNK_CONFIG = DEFAULT_CHUNK_CONFIG;
 exports.FireRedAED = OmniAED;
 exports.FireRedStreamVAD = OmniStreamVAD;
 exports.FireRedVAD = OmniVAD;
@@ -594,5 +772,6 @@ exports.OmniVAD = OmniVAD;
 exports.VERSION = VERSION;
 exports.initWasm = initWasm;
 exports.loadModel = loadModel;
+exports.mergeChunks = mergeChunks;
 //# sourceMappingURL=index.cjs.map
 //# sourceMappingURL=index.cjs.map