omnivad 0.2.4 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +258 -49
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +159 -19
- package/dist/index.d.ts +159 -19
- package/dist/index.js +257 -50
- package/dist/index.js.map +1 -1
- package/dist/wasm/omnivad.cjs +1 -1
- package/dist/wasm/omnivad.js +1 -1
- package/dist/wasm/omnivad.wasm +0 -0
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -4,12 +4,41 @@ var _documentCurrentScript = typeof document !== 'undefined' ? document.currentS
|
|
|
4
4
|
// src/wasm-binding.ts
|
|
5
5
|
var _module = null;
|
|
6
6
|
var _loading = null;
|
|
7
|
+
function loadScript(url) {
|
|
8
|
+
if (typeof globalThis.document === "undefined") {
|
|
9
|
+
return new Promise((resolve, reject) => {
|
|
10
|
+
try {
|
|
11
|
+
const importScripts = globalThis.importScripts;
|
|
12
|
+
if (typeof importScripts !== "function") {
|
|
13
|
+
throw new Error(
|
|
14
|
+
"omnivad: cannot load glue script \u2014 no document and no importScripts"
|
|
15
|
+
);
|
|
16
|
+
}
|
|
17
|
+
importScripts(url);
|
|
18
|
+
resolve();
|
|
19
|
+
} catch (err) {
|
|
20
|
+
reject(err instanceof Error ? err : new Error(String(err)));
|
|
21
|
+
}
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
return new Promise((resolve, reject) => {
|
|
25
|
+
const s = globalThis.document.createElement("script");
|
|
26
|
+
s.src = url;
|
|
27
|
+
s.async = true;
|
|
28
|
+
s.crossOrigin = "anonymous";
|
|
29
|
+
s.onload = () => resolve();
|
|
30
|
+
s.onerror = () => reject(new Error(`Failed to load omnivad glue script: ${url}`));
|
|
31
|
+
globalThis.document.head.appendChild(s);
|
|
32
|
+
});
|
|
33
|
+
}
|
|
7
34
|
var SIZEOF_POST_CONFIG = 28;
|
|
8
35
|
var SIZEOF_AED_POST_CONFIG = 3 * SIZEOF_POST_CONFIG;
|
|
9
36
|
var SIZEOF_SEGMENT = 8;
|
|
10
37
|
var SIZEOF_AED_SEGMENT = 16;
|
|
38
|
+
var SIZEOF_CHUNK_CONFIG = 28;
|
|
39
|
+
var SIZEOF_CHUNK = 16;
|
|
11
40
|
var OMNI_ERR_NO_FRAMES = -7;
|
|
12
|
-
var VERSION = "0.2.
|
|
41
|
+
var VERSION = "0.2.8";
|
|
13
42
|
var DEFAULT_CDN_BASE = `https://cdn.jsdelivr.net/npm/omnivad@${VERSION}/models`;
|
|
14
43
|
var MODEL_FILES = {
|
|
15
44
|
vad: "vad.omnivad",
|
|
@@ -25,22 +54,41 @@ async function initWasm(wasmLocator) {
|
|
|
25
54
|
if (typeof globalThis.process?.versions?.node === "string") {
|
|
26
55
|
const { createRequire } = await import(
|
|
27
56
|
/* webpackIgnore: true */
|
|
57
|
+
/* turbopackIgnore: true */
|
|
28
58
|
'module'
|
|
29
59
|
);
|
|
30
|
-
const { dirname, join } = await import(
|
|
60
|
+
const { dirname, join } = await import(
|
|
61
|
+
/* webpackIgnore: true */
|
|
62
|
+
/* turbopackIgnore: true */
|
|
63
|
+
'path'
|
|
64
|
+
);
|
|
31
65
|
const req = createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.cjs', document.baseURI).href)));
|
|
32
66
|
const gluePath = req.resolve("../dist/wasm/omnivad.cjs");
|
|
33
67
|
const wasmDir = dirname(gluePath);
|
|
34
68
|
createOmniVAD = req(gluePath);
|
|
35
69
|
defaultLocateFile = (filename) => join(wasmDir, filename);
|
|
36
70
|
} else {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
const
|
|
71
|
+
let glueUrlStr;
|
|
72
|
+
if (wasmLocator) {
|
|
73
|
+
glueUrlStr = wasmLocator("omnivad.js");
|
|
74
|
+
} else {
|
|
75
|
+
glueUrlStr = new URL("../dist/wasm/omnivad.js", (typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.cjs', document.baseURI).href))).href;
|
|
76
|
+
}
|
|
77
|
+
const g = globalThis;
|
|
78
|
+
let factory = g.createOmniVAD;
|
|
79
|
+
if (typeof factory !== "function") {
|
|
80
|
+
await loadScript(glueUrlStr);
|
|
81
|
+
factory = g.createOmniVAD;
|
|
82
|
+
}
|
|
83
|
+
if (typeof factory !== "function") {
|
|
84
|
+
throw new Error(
|
|
85
|
+
`omnivad.js loaded from ${glueUrlStr} but globalThis.createOmniVAD is missing`
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
createOmniVAD = factory;
|
|
89
|
+
const baseHref = typeof globalThis.location !== "undefined" ? globalThis.location.href : "file:///";
|
|
90
|
+
const absGlue = new URL(glueUrlStr, baseHref);
|
|
91
|
+
const wasmBaseUrl = new URL("./", absGlue);
|
|
44
92
|
defaultLocateFile = (filename) => new URL(filename, wasmBaseUrl).toString();
|
|
45
93
|
}
|
|
46
94
|
const opts = {};
|
|
@@ -64,10 +112,19 @@ async function loadModel(modelType, modelUrl, modelData) {
|
|
|
64
112
|
if (typeof globalThis.process?.versions?.node === "string") {
|
|
65
113
|
const { createRequire } = await import(
|
|
66
114
|
/* webpackIgnore: true */
|
|
115
|
+
/* turbopackIgnore: true */
|
|
67
116
|
'module'
|
|
68
117
|
);
|
|
69
|
-
const { dirname, join } = await import(
|
|
70
|
-
|
|
118
|
+
const { dirname, join } = await import(
|
|
119
|
+
/* webpackIgnore: true */
|
|
120
|
+
/* turbopackIgnore: true */
|
|
121
|
+
'path'
|
|
122
|
+
);
|
|
123
|
+
const { readFile } = await import(
|
|
124
|
+
/* webpackIgnore: true */
|
|
125
|
+
/* turbopackIgnore: true */
|
|
126
|
+
'fs/promises'
|
|
127
|
+
);
|
|
71
128
|
const req = createRequire((typeof document === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('index.cjs', document.baseURI).href)));
|
|
72
129
|
const pkgDir = dirname(req.resolve("../package.json"));
|
|
73
130
|
const modelPath = join(pkgDir, "models", filename);
|
|
@@ -120,10 +177,86 @@ var DEFAULT_VAD_CONFIG = {
|
|
|
120
177
|
smoothWindowSize: 5,
|
|
121
178
|
minSpeechFrames: 20,
|
|
122
179
|
minSilenceFrames: 20,
|
|
123
|
-
maxSpeechFrames:
|
|
180
|
+
maxSpeechFrames: 3e3,
|
|
124
181
|
mergeSilenceFrames: 0,
|
|
125
182
|
extendSpeechFrames: 0
|
|
126
183
|
};
|
|
184
|
+
var OMNI_CHUNK_GREEDY = 0;
|
|
185
|
+
var OMNI_CHUNK_LONGEST_GAP = 1;
|
|
186
|
+
var DEFAULT_CHUNK_CONFIG = {
|
|
187
|
+
maxChunkSecs: 30,
|
|
188
|
+
maxGapSecs: Infinity,
|
|
189
|
+
padOnsetSecs: 0.04,
|
|
190
|
+
padOffsetSecs: 0.04,
|
|
191
|
+
minSpeechSecs: 0,
|
|
192
|
+
minSilenceSecs: 0.2,
|
|
193
|
+
// matches VAD minSilenceFrames=20 @ 10ms shift
|
|
194
|
+
mode: "greedy"
|
|
195
|
+
};
|
|
196
|
+
function modeToInt(m) {
|
|
197
|
+
switch (m) {
|
|
198
|
+
case "greedy":
|
|
199
|
+
return OMNI_CHUNK_GREEDY;
|
|
200
|
+
case "longest_gap":
|
|
201
|
+
return OMNI_CHUNK_LONGEST_GAP;
|
|
202
|
+
default:
|
|
203
|
+
throw new Error(`Unknown chunking mode: ${String(m)}`);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
function writeChunkConfig(M, ptr, cfg) {
|
|
207
|
+
M.setValue(ptr + 0, cfg.maxChunkSecs, "float");
|
|
208
|
+
M.setValue(ptr + 4, cfg.maxGapSecs, "float");
|
|
209
|
+
M.setValue(ptr + 8, cfg.padOnsetSecs, "float");
|
|
210
|
+
M.setValue(ptr + 12, cfg.padOffsetSecs, "float");
|
|
211
|
+
M.setValue(ptr + 16, cfg.minSpeechSecs, "float");
|
|
212
|
+
M.setValue(ptr + 20, cfg.minSilenceSecs, "float");
|
|
213
|
+
M.setValue(ptr + 24, modeToInt(cfg.mode), "i32");
|
|
214
|
+
}
|
|
215
|
+
function chunkMerge(M, segments, config) {
|
|
216
|
+
const numSegments = segments.length;
|
|
217
|
+
const segPtr = numSegments > 0 ? M._malloc(numSegments * SIZEOF_SEGMENT) : 0;
|
|
218
|
+
const cfgPtr = M._malloc(SIZEOF_CHUNK_CONFIG);
|
|
219
|
+
const outPtrPtr = M._malloc(4);
|
|
220
|
+
const outCountPtr = M._malloc(4);
|
|
221
|
+
try {
|
|
222
|
+
for (let i = 0; i < numSegments; i++) {
|
|
223
|
+
const base = segPtr + i * SIZEOF_SEGMENT;
|
|
224
|
+
M.setValue(base + 0, segments[i][0], "float");
|
|
225
|
+
M.setValue(base + 4, segments[i][1], "float");
|
|
226
|
+
}
|
|
227
|
+
writeChunkConfig(M, cfgPtr, config);
|
|
228
|
+
M.setValue(outPtrPtr, 0, "i32");
|
|
229
|
+
M.setValue(outCountPtr, 0, "i32");
|
|
230
|
+
const rc = M.ccall(
|
|
231
|
+
"omni_merge_chunks",
|
|
232
|
+
"number",
|
|
233
|
+
["number", "number", "number", "number", "number"],
|
|
234
|
+
[segPtr, numSegments, cfgPtr, outPtrPtr, outCountPtr]
|
|
235
|
+
);
|
|
236
|
+
if (rc !== 0) {
|
|
237
|
+
throw new Error(`omni_merge_chunks failed: ${readNativeError(M, rc)}`);
|
|
238
|
+
}
|
|
239
|
+
const count = M.getValue(outCountPtr, "i32");
|
|
240
|
+
const chunkPtr = M.getValue(outPtrPtr, "i32");
|
|
241
|
+
const chunks = [];
|
|
242
|
+
for (let i = 0; i < count; i++) {
|
|
243
|
+
const base = chunkPtr + i * SIZEOF_CHUNK;
|
|
244
|
+
chunks.push({
|
|
245
|
+
start: M.getValue(base + 0, "float"),
|
|
246
|
+
end: M.getValue(base + 4, "float"),
|
|
247
|
+
segStartIdx: M.getValue(base + 8, "i32"),
|
|
248
|
+
segCount: M.getValue(base + 12, "i32")
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
if (chunkPtr) M._free(chunkPtr);
|
|
252
|
+
return chunks;
|
|
253
|
+
} finally {
|
|
254
|
+
if (segPtr) M._free(segPtr);
|
|
255
|
+
M._free(cfgPtr);
|
|
256
|
+
M._free(outPtrPtr);
|
|
257
|
+
M._free(outCountPtr);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
127
260
|
function vadCreate(M, modelBuffer) {
|
|
128
261
|
const bytes = new Uint8Array(modelBuffer);
|
|
129
262
|
const ptr = M._malloc(bytes.length);
|
|
@@ -228,24 +361,49 @@ function aedDetect(M, handle, audioPtr, numSamples, cfg, format = "f32") {
|
|
|
228
361
|
function aedDestroy(M, handle) {
|
|
229
362
|
M.ccall("omni_aed_destroy", null, ["number"], [handle]);
|
|
230
363
|
}
|
|
231
|
-
|
|
364
|
+
var DEFAULT_STREAM_VAD_CONFIG = {
|
|
365
|
+
threshold: 0.5,
|
|
366
|
+
smoothWindowSize: 5,
|
|
367
|
+
padStartFrame: 5,
|
|
368
|
+
minSpeechFrame: 8,
|
|
369
|
+
maxSpeechFrame: 2e3,
|
|
370
|
+
minSilenceFrame: 20
|
|
371
|
+
};
|
|
372
|
+
var SIZEOF_STREAM_VAD_CONFIG = 24;
|
|
373
|
+
function writeStreamVadConfig(M, ptr, cfg) {
|
|
374
|
+
M.setValue(ptr + 0, cfg.threshold, "float");
|
|
375
|
+
M.setValue(ptr + 4, cfg.smoothWindowSize, "i32");
|
|
376
|
+
M.setValue(ptr + 8, cfg.padStartFrame, "i32");
|
|
377
|
+
M.setValue(ptr + 12, cfg.minSpeechFrame, "i32");
|
|
378
|
+
M.setValue(ptr + 16, cfg.maxSpeechFrame, "i32");
|
|
379
|
+
M.setValue(ptr + 20, cfg.minSilenceFrame, "i32");
|
|
380
|
+
}
|
|
381
|
+
function streamVadCreate(M, modelBuffer, config = {}) {
|
|
382
|
+
const overrides = Object.fromEntries(
|
|
383
|
+
Object.entries(config).filter(([, v]) => v !== void 0)
|
|
384
|
+
);
|
|
385
|
+
const cfg = { ...DEFAULT_STREAM_VAD_CONFIG, ...overrides };
|
|
232
386
|
const bytes = new Uint8Array(modelBuffer);
|
|
233
|
-
const
|
|
234
|
-
M.HEAPU8.set(bytes,
|
|
387
|
+
const dataPtr = M._malloc(bytes.length);
|
|
388
|
+
M.HEAPU8.set(bytes, dataPtr);
|
|
389
|
+
const cfgPtr = M._malloc(SIZEOF_STREAM_VAD_CONFIG);
|
|
235
390
|
try {
|
|
391
|
+
writeStreamVadConfig(M, cfgPtr, cfg);
|
|
236
392
|
return createModel(
|
|
237
393
|
M,
|
|
238
394
|
"omni_stream_vad_create_from_buffer",
|
|
239
395
|
["number", "number", "number"],
|
|
240
|
-
[
|
|
396
|
+
[dataPtr, bytes.length, cfgPtr],
|
|
241
397
|
"StreamVAD"
|
|
242
398
|
);
|
|
243
399
|
} finally {
|
|
244
|
-
M._free(
|
|
400
|
+
M._free(dataPtr);
|
|
401
|
+
M._free(cfgPtr);
|
|
245
402
|
}
|
|
246
403
|
}
|
|
404
|
+
var SIZEOF_STREAM_VAD_RESULT = 24;
|
|
247
405
|
function streamVadProcess(M, handle, pcm16Ptr, numSamples) {
|
|
248
|
-
const resultPtr = M._malloc(
|
|
406
|
+
const resultPtr = M._malloc(SIZEOF_STREAM_VAD_RESULT);
|
|
249
407
|
try {
|
|
250
408
|
const ret = M.ccall(
|
|
251
409
|
"omni_stream_vad_process",
|
|
@@ -256,14 +414,37 @@ function streamVadProcess(M, handle, pcm16Ptr, numSamples) {
|
|
|
256
414
|
if (ret === OMNI_ERR_NO_FRAMES) return null;
|
|
257
415
|
if (ret !== 0) throw new Error(`StreamVAD process failed: ${ret}`);
|
|
258
416
|
return {
|
|
259
|
-
confidence: M.getValue(resultPtr, "float"),
|
|
260
|
-
|
|
261
|
-
|
|
417
|
+
confidence: M.getValue(resultPtr + 0, "float"),
|
|
418
|
+
smoothedProb: M.getValue(resultPtr + 4, "float"),
|
|
419
|
+
isSpeech: M.getValue(resultPtr + 8, "i8") !== 0,
|
|
420
|
+
isSpeechStart: M.getValue(resultPtr + 9, "i8") !== 0,
|
|
421
|
+
isSpeechEnd: M.getValue(resultPtr + 10, "i8") !== 0,
|
|
422
|
+
frameIdx: M.getValue(resultPtr + 12, "i32"),
|
|
423
|
+
speechStartFrame: M.getValue(resultPtr + 16, "i32"),
|
|
424
|
+
speechEndFrame: M.getValue(resultPtr + 20, "i32")
|
|
262
425
|
};
|
|
263
426
|
} finally {
|
|
264
427
|
M._free(resultPtr);
|
|
265
428
|
}
|
|
266
429
|
}
|
|
430
|
+
function streamVadClone(M, handle) {
|
|
431
|
+
const errPtr = M._malloc(4);
|
|
432
|
+
try {
|
|
433
|
+
const newHandle = M.ccall(
|
|
434
|
+
"omni_stream_vad_clone",
|
|
435
|
+
"number",
|
|
436
|
+
["number", "number"],
|
|
437
|
+
[handle, errPtr]
|
|
438
|
+
);
|
|
439
|
+
if (!newHandle) {
|
|
440
|
+
const err = M.getValue(errPtr, "i32");
|
|
441
|
+
throw new Error(`StreamVAD clone failed: ${readNativeError(M, err)}`);
|
|
442
|
+
}
|
|
443
|
+
return newHandle;
|
|
444
|
+
} finally {
|
|
445
|
+
M._free(errPtr);
|
|
446
|
+
}
|
|
447
|
+
}
|
|
267
448
|
function streamVadReset(M, handle) {
|
|
268
449
|
M.ccall("omni_stream_vad_reset", null, ["number"], [handle]);
|
|
269
450
|
}
|
|
@@ -339,8 +520,6 @@ function int16ToNormalizedFloat32(i16) {
|
|
|
339
520
|
var SAMPLE_RATE2 = 16e3;
|
|
340
521
|
var OmniStreamVAD = class _OmniStreamVAD {
|
|
341
522
|
constructor(handle) {
|
|
342
|
-
this.inSpeech = false;
|
|
343
|
-
this.speechStartFrame = 0;
|
|
344
523
|
this.handle = handle;
|
|
345
524
|
}
|
|
346
525
|
/**
|
|
@@ -351,13 +530,35 @@ var OmniStreamVAD = class _OmniStreamVAD {
|
|
|
351
530
|
await initWasm();
|
|
352
531
|
const M = getModule();
|
|
353
532
|
const modelBuffer = await loadModel("stream-vad", options.modelUrl, options.modelData);
|
|
354
|
-
const
|
|
355
|
-
|
|
533
|
+
const handle = streamVadCreate(M, modelBuffer, {
|
|
534
|
+
threshold: options.threshold,
|
|
535
|
+
smoothWindowSize: options.smoothWindowSize,
|
|
536
|
+
padStartFrame: options.padStartFrame,
|
|
537
|
+
minSpeechFrame: options.minSpeechFrame,
|
|
538
|
+
maxSpeechFrame: options.maxSpeechFrame,
|
|
539
|
+
minSilenceFrame: options.minSilenceFrame
|
|
540
|
+
});
|
|
356
541
|
return new _OmniStreamVAD(handle);
|
|
357
542
|
}
|
|
543
|
+
/**
|
|
544
|
+
* Create a lightweight clone sharing the same underlying model weights.
|
|
545
|
+
* The clone has fresh per-instance state (empty audio buffer, zeroed cache).
|
|
546
|
+
* This is synchronous and extremely fast — ideal for multi-stream scenarios
|
|
547
|
+
* (e.g., handling multiple WebRTC tracks or concurrent audio sessions).
|
|
548
|
+
*/
|
|
549
|
+
clone() {
|
|
550
|
+
if (!this.handle) throw new Error("Cannot clone a disposed instance.");
|
|
551
|
+
const M = getModule();
|
|
552
|
+
const newHandle = streamVadClone(M, this.handle);
|
|
553
|
+
return new _OmniStreamVAD(newHandle);
|
|
554
|
+
}
|
|
358
555
|
/**
|
|
359
556
|
* Process one frame of audio (160 int16 samples = 10ms @ 16kHz).
|
|
360
557
|
* Returns null until enough audio is accumulated.
|
|
558
|
+
*
|
|
559
|
+
* Segment-boundary events (isSpeechStart / isSpeechEnd and the matching
|
|
560
|
+
* speech_*_frame indices) come straight from the C-layer state machine
|
|
561
|
+
* (bit-identical to upstream FireRedVAD) — the wrapper is just a marshaller.
|
|
361
562
|
*/
|
|
362
563
|
processFrame(pcm160) {
|
|
363
564
|
const M = getModule();
|
|
@@ -366,28 +567,16 @@ var OmniStreamVAD = class _OmniStreamVAD {
|
|
|
366
567
|
heap16.set(pcm160);
|
|
367
568
|
try {
|
|
368
569
|
const result = streamVadProcess(M, this.handle, ptr, pcm160.length);
|
|
369
|
-
if (!result
|
|
370
|
-
const frameIndex = result.frameOffset;
|
|
371
|
-
const isSpeechStart = result.isSpeech && !this.inSpeech;
|
|
372
|
-
const isSpeechEnd = !result.isSpeech && this.inSpeech;
|
|
373
|
-
if (isSpeechStart) {
|
|
374
|
-
this.speechStartFrame = frameIndex;
|
|
375
|
-
}
|
|
376
|
-
const activeSpeechStartFrame = isSpeechEnd ? this.speechStartFrame : result.isSpeech ? this.speechStartFrame : 0;
|
|
377
|
-
const speechEndFrame = isSpeechEnd ? Math.max(1, frameIndex - 1) : 0;
|
|
378
|
-
this.inSpeech = result.isSpeech;
|
|
379
|
-
if (isSpeechEnd) {
|
|
380
|
-
this.speechStartFrame = 0;
|
|
381
|
-
}
|
|
570
|
+
if (!result) return null;
|
|
382
571
|
return {
|
|
383
572
|
confidence: result.confidence,
|
|
384
|
-
|
|
573
|
+
smoothedProb: result.smoothedProb,
|
|
385
574
|
isSpeech: result.isSpeech,
|
|
386
|
-
frameIndex,
|
|
387
|
-
isSpeechStart,
|
|
388
|
-
isSpeechEnd,
|
|
389
|
-
speechStartFrame:
|
|
390
|
-
speechEndFrame
|
|
575
|
+
frameIndex: result.frameIdx,
|
|
576
|
+
isSpeechStart: result.isSpeechStart,
|
|
577
|
+
isSpeechEnd: result.isSpeechEnd,
|
|
578
|
+
speechStartFrame: result.speechStartFrame,
|
|
579
|
+
speechEndFrame: result.speechEndFrame
|
|
391
580
|
};
|
|
392
581
|
} finally {
|
|
393
582
|
M._free(ptr);
|
|
@@ -426,11 +615,9 @@ var OmniStreamVAD = class _OmniStreamVAD {
|
|
|
426
615
|
M._free(framesPtr);
|
|
427
616
|
}
|
|
428
617
|
}
|
|
429
|
-
/** Reset all internal state. */
|
|
618
|
+
/** Reset all internal state (model cache, audio buffer, postprocessor). */
|
|
430
619
|
reset() {
|
|
431
620
|
streamVadReset(getModule(), this.handle);
|
|
432
|
-
this.inSpeech = false;
|
|
433
|
-
this.speechStartFrame = 0;
|
|
434
621
|
}
|
|
435
622
|
/** Release native resources. */
|
|
436
623
|
dispose() {
|
|
@@ -438,8 +625,6 @@ var OmniStreamVAD = class _OmniStreamVAD {
|
|
|
438
625
|
streamVadDestroy(getModule(), this.handle);
|
|
439
626
|
this.handle = 0;
|
|
440
627
|
}
|
|
441
|
-
this.inSpeech = false;
|
|
442
|
-
this.speechStartFrame = 0;
|
|
443
628
|
}
|
|
444
629
|
};
|
|
445
630
|
function int16ToFloat32(i16) {
|
|
@@ -553,7 +738,30 @@ function computeCoverageRatios(events, duration) {
|
|
|
553
738
|
return ratios;
|
|
554
739
|
}
|
|
555
740
|
|
|
741
|
+
// src/chunking.ts
|
|
742
|
+
async function mergeChunks(segments, options = {}) {
|
|
743
|
+
await initWasm();
|
|
744
|
+
const M = getModule();
|
|
745
|
+
const cfg = {
|
|
746
|
+
maxChunkSecs: options.maxChunkSecs ?? DEFAULT_CHUNK_CONFIG.maxChunkSecs,
|
|
747
|
+
maxGapSecs: options.maxGapSecs ?? DEFAULT_CHUNK_CONFIG.maxGapSecs,
|
|
748
|
+
padOnsetSecs: options.padOnsetSecs ?? DEFAULT_CHUNK_CONFIG.padOnsetSecs,
|
|
749
|
+
padOffsetSecs: options.padOffsetSecs ?? DEFAULT_CHUNK_CONFIG.padOffsetSecs,
|
|
750
|
+
minSpeechSecs: options.minSpeechSecs ?? DEFAULT_CHUNK_CONFIG.minSpeechSecs,
|
|
751
|
+
minSilenceSecs: options.minSilenceSecs ?? DEFAULT_CHUNK_CONFIG.minSilenceSecs,
|
|
752
|
+
mode: options.mode ?? DEFAULT_CHUNK_CONFIG.mode
|
|
753
|
+
};
|
|
754
|
+
const records = chunkMerge(M, segments, cfg);
|
|
755
|
+
return records.map((r) => ({
|
|
756
|
+
start: r.start,
|
|
757
|
+
end: r.end,
|
|
758
|
+
segStartIdx: r.segStartIdx,
|
|
759
|
+
segCount: r.segCount
|
|
760
|
+
}));
|
|
761
|
+
}
|
|
762
|
+
|
|
556
763
|
exports.DEFAULT_CDN_BASE = DEFAULT_CDN_BASE;
|
|
764
|
+
exports.DEFAULT_CHUNK_CONFIG = DEFAULT_CHUNK_CONFIG;
|
|
557
765
|
exports.FireRedAED = OmniAED;
|
|
558
766
|
exports.FireRedStreamVAD = OmniStreamVAD;
|
|
559
767
|
exports.FireRedVAD = OmniVAD;
|
|
@@ -564,5 +772,6 @@ exports.OmniVAD = OmniVAD;
|
|
|
564
772
|
exports.VERSION = VERSION;
|
|
565
773
|
exports.initWasm = initWasm;
|
|
566
774
|
exports.loadModel = loadModel;
|
|
775
|
+
exports.mergeChunks = mergeChunks;
|
|
567
776
|
//# sourceMappingURL=index.cjs.map
|
|
568
777
|
//# sourceMappingURL=index.cjs.map
|