@livekit/agents-plugin-silero 0.5.9 → 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -16,13 +16,24 @@ var __copyProps = (to, from, except, desc) => {
16
16
  return to;
17
17
  };
18
18
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
- var src_exports = {};
20
- __export(src_exports, {
19
+ var index_exports = {};
20
+ __export(index_exports, {
21
21
  VAD: () => import_vad.VAD,
22
22
  VADStream: () => import_vad.VADStream
23
23
  });
24
- module.exports = __toCommonJS(src_exports);
24
+ module.exports = __toCommonJS(index_exports);
25
+ var import_agents = require("@livekit/agents");
25
26
  var import_vad = require("./vad.cjs");
27
+ class SileroPlugin extends import_agents.Plugin {
28
+ constructor() {
29
+ super({
30
+ title: "silero",
31
+ version: "0.5.6",
32
+ package: "@livekit/agents-plugin-silero"
33
+ });
34
+ }
35
+ }
36
+ import_agents.Plugin.registerPlugin(new SileroPlugin());
26
37
  // Annotate the CommonJS export names for ESM import in node:
27
38
  0 && (module.exports = {
28
39
  VAD,
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nexport { VAD, VADStream } from './vad.js';\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,iBAA+B;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport { VAD, VADStream } from './vad.js';\n\nclass SileroPlugin extends Plugin {\n constructor() {\n super({\n title: 'silero',\n version: '0.5.6',\n package: '@livekit/agents-plugin-silero',\n });\n }\n}\n\nPlugin.registerPlugin(new SileroPlugin());\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAAuB;AAEvB,iBAA+B;AAE/B,MAAM,qBAAqB,qBAAO;AAAA,EAChC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,qBAAO,eAAe,IAAI,aAAa,CAAC;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,GAAG,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC"}
package/dist/index.js CHANGED
@@ -1,4 +1,15 @@
1
+ import { Plugin } from "@livekit/agents";
1
2
  import { VAD, VADStream } from "./vad.js";
3
+ class SileroPlugin extends Plugin {
4
+ constructor() {
5
+ super({
6
+ title: "silero",
7
+ version: "0.5.6",
8
+ package: "@livekit/agents-plugin-silero"
9
+ });
10
+ }
11
+ }
12
+ Plugin.registerPlugin(new SileroPlugin());
2
13
  export {
3
14
  VAD,
4
15
  VADStream
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nexport { VAD, VADStream } from './vad.js';\n"],"mappings":"AAGA,SAAS,KAAK,iBAAiB;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { Plugin } from '@livekit/agents';\n\nexport { VAD, VADStream } from './vad.js';\n\nclass SileroPlugin extends Plugin {\n constructor() {\n super({\n title: 'silero',\n version: '0.5.6',\n package: '@livekit/agents-plugin-silero',\n });\n }\n}\n\nPlugin.registerPlugin(new SileroPlugin());\n"],"mappings":"AAGA,SAAS,cAAc;AAEvB,SAAS,KAAK,iBAAiB;AAE/B,MAAM,qBAAqB,OAAO;AAAA,EAChC,cAAc;AACZ,UAAM;AAAA,MACJ,OAAO;AAAA,MACP,SAAS;AAAA,MACT,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AACF;AAEA,OAAO,eAAe,IAAI,aAAa,CAAC;","names":[]}
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/onnx_model.ts","../../../node_modules/.pnpm/tsup@8.3.5_@microsoft+api-extractor@7.43.7_@types+node@22.5.5__postcss@8.4.38_tsx@4.19.2_typescript@5.4.5/node_modules/tsup/assets/cjs_shims.js"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { fileURLToPath } from 'node:url';\nimport { InferenceSession, Tensor } from 'onnxruntime-node';\n\nexport type SampleRate = 8000 | 16000;\n\nexport const newInferenceSession = (forceCPU: boolean) => {\n return InferenceSession.create(fileURLToPath(new URL('silero_vad.onnx', import.meta.url).href), {\n interOpNumThreads: 1,\n intraOpNumThreads: 1,\n executionMode: 'sequential',\n executionProviders: forceCPU ? [{ name: 'cpu' }] : undefined,\n });\n};\n\nexport class OnnxModel {\n #session: InferenceSession;\n #sampleRate: number;\n #windowSizeSamples: number;\n #contextSize: number;\n #sampleRateNd: BigInt64Array;\n #context: Float32Array;\n // #state: Float32Array;\n #rnnState: Float32Array;\n #inputBuffer: Float32Array;\n\n constructor(session: InferenceSession, sampleRate: SampleRate) {\n this.#session = session;\n this.#sampleRate = sampleRate;\n\n switch (sampleRate) {\n case 8000:\n this.#windowSizeSamples = 256;\n this.#contextSize = 32;\n break;\n case 16000:\n this.#windowSizeSamples = 512;\n this.#contextSize = 64;\n break;\n }\n\n this.#sampleRateNd = BigInt64Array.from([BigInt(sampleRate)]);\n this.#context = new Float32Array(this.#contextSize);\n this.#rnnState = new Float32Array(2 * 1 * 128);\n this.#inputBuffer = new Float32Array(this.#contextSize + this.#windowSizeSamples);\n }\n\n get sampleRate(): number {\n return this.#sampleRate;\n }\n\n get windowSizeSamples(): number {\n return this.#windowSizeSamples;\n }\n\n get contextSize(): number {\n return this.#contextSize;\n }\n\n async run(x: Float32Array): Promise<number> {\n this.#inputBuffer.set(this.#context, 0);\n this.#inputBuffer.set(x, this.#contextSize);\n\n return await this.#session\n .run({\n input: new Tensor('float32', this.#inputBuffer, [\n 1,\n this.#contextSize + this.#windowSizeSamples,\n ]),\n state: new Tensor('float32', this.#rnnState, [2, 1, 128]),\n sr: new Tensor('int64', this.#sampleRateNd),\n })\n .then((result) => {\n // this.#state = result.output.data as Float32Array,\n this.#context = this.#inputBuffer.subarray(0, this.#contextSize);\n return (result.output!.data as Float32Array).at(0)!;\n });\n }\n}\n","// Shim globals in cjs bundle\n// There's a weird bug that esbuild will always inject importMetaUrl\n// if we export it as `const importMetaUrl = ... __filename ...`\n// But using a function will not cause this issue\n\nconst getImportMetaUrl = () =>\n typeof document === 'undefined'\n ? new URL(`file:${__filename}`).href\n : (document.currentScript && document.currentScript.src) ||\n new URL('main.js', document.baseURI).href\n\nexport const importMetaUrl = /* @__PURE__ */ getImportMetaUrl()\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;ACKA,IAAM,mBAAmB,MACvB,OAAO,aAAa,cAChB,IAAI,IAAI,QAAQ,UAAU,EAAE,EAAE,OAC7B,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,OAAO,EAAE;AAEpC,IAAM,gBAAgC,iCAAiB;ADR9D,sBAA8B;AAC9B,8BAAyC;AAIlC,MAAM,sBAAsB,CAAC,aAAsB;AACxD,SAAO,yCAAiB,WAAO,+BAAc,IAAI,IAAI,mBAAmB,aAAe,EAAE,IAAI,GAAG;AAAA,IAC9F,mBAAmB;AAAA,IACnB,mBAAmB;AAAA,IACnB,eAAe;AAAA,IACf,oBAAoB,WAAW,CAAC,EAAE,MAAM,MAAM,CAAC,IAAI;AAAA,EACrD,CAAC;AACH;AAEO,MAAM,UAAU;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,SAA2B,YAAwB;AAC7D,SAAK,WAAW;AAChB,SAAK,cAAc;AAEnB,YAAQ,YAAY;AAAA,MAClB,KAAK;AACH,aAAK,qBAAqB;AAC1B,aAAK,eAAe;AACpB;AAAA,MACF,KAAK;AACH,aAAK,qBAAqB;AAC1B,aAAK,eAAe;AACpB;AAAA,IACJ;AAEA,SAAK,gBAAgB,cAAc,KAAK,CAAC,OAAO,UAAU,CAAC,CAAC;AAC5D,SAAK,WAAW,IAAI,aAAa,KAAK,YAAY;AAClD,SAAK,YAAY,IAAI,aAAa,IAAI,IAAI,GAAG;AAC7C,SAAK,eAAe,IAAI,aAAa,KAAK,eAAe,KAAK,kBAAkB;AAAA,EAClF;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,oBAA4B;AAC9B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,IAAI,GAAkC;AAC1C,SAAK,aAAa,IAAI,KAAK,UAAU,CAAC;AACtC,SAAK,aAAa,IAAI,GAAG,KAAK,YAAY;AAE1C,WAAO,MAAM,KAAK,SACf,IAAI;AAAA,MACH,OAAO,IAAI,+BAAO,WAAW,KAAK,cAAc;AAAA,QAC9C;AAAA,QACA,KAAK,eAAe,KAAK;AAAA,MAC3B,CAAC;AAAA,MACD,OAAO,IAAI,+BAAO,WAAW,KAAK,WAAW,CAAC,GAAG,GAAG,GAAG,CAAC;AAAA,MACxD,IAAI,IAAI,+BAAO,SAAS,KAAK,aAAa;AAAA,IAC5C,CAAC,EACA,KAAK,CAAC,WAAW;AAEhB,WAAK,WAAW,KAAK,aAAa,SAAS,GAAG,KAAK,YAAY;AAC/D,aAAQ,OAAO,OAAQ,KAAsB,GAAG,CAAC;AAAA,IACnD,CAAC;AAAA,EACL;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/onnx_model.ts","../../../node_modules/.pnpm/tsup@8.4.0_@microsoft+api-extractor@7.43.7_@types+node@22.15.30__postcss@8.4.38_tsx@4.20.4_typescript@5.4.5/node_modules/tsup/assets/cjs_shims.js"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { fileURLToPath } from 'node:url';\nimport { InferenceSession, Tensor } from 'onnxruntime-node';\n\nexport type SampleRate = 8000 | 16000;\n\nexport const newInferenceSession = (forceCPU: boolean) => {\n return InferenceSession.create(fileURLToPath(new URL('silero_vad.onnx', import.meta.url).href), {\n interOpNumThreads: 1,\n intraOpNumThreads: 1,\n executionMode: 'sequential',\n executionProviders: forceCPU ? [{ name: 'cpu' }] : undefined,\n });\n};\n\nexport class OnnxModel {\n #session: InferenceSession;\n #sampleRate: number;\n #windowSizeSamples: number;\n #contextSize: number;\n #sampleRateNd: BigInt64Array;\n #context: Float32Array;\n // #state: Float32Array;\n #rnnState: Float32Array;\n #inputBuffer: Float32Array;\n\n constructor(session: InferenceSession, sampleRate: SampleRate) {\n this.#session = session;\n this.#sampleRate = sampleRate;\n\n switch (sampleRate) {\n case 8000:\n this.#windowSizeSamples = 256;\n this.#contextSize = 32;\n break;\n case 16000:\n this.#windowSizeSamples = 512;\n this.#contextSize = 64;\n break;\n }\n\n this.#sampleRateNd = BigInt64Array.from([BigInt(sampleRate)]);\n this.#context = new Float32Array(this.#contextSize);\n this.#rnnState = new Float32Array(2 * 1 * 128);\n this.#inputBuffer = new Float32Array(this.#contextSize + this.#windowSizeSamples);\n }\n\n get sampleRate(): number {\n return this.#sampleRate;\n }\n\n get windowSizeSamples(): number {\n return this.#windowSizeSamples;\n }\n\n get contextSize(): number {\n return this.#contextSize;\n }\n\n async run(x: Float32Array): Promise<number> {\n this.#inputBuffer.set(this.#context, 0);\n this.#inputBuffer.set(x, this.#contextSize);\n\n return await this.#session\n .run({\n input: new Tensor('float32', this.#inputBuffer, [\n 1,\n this.#contextSize + this.#windowSizeSamples,\n ]),\n state: new Tensor('float32', this.#rnnState, [2, 1, 128]),\n sr: new Tensor('int64', this.#sampleRateNd),\n })\n .then((result) => {\n // this.#state = result.output.data as Float32Array,\n this.#context = this.#inputBuffer.subarray(0, this.#contextSize);\n return (result.output!.data as Float32Array).at(0)!;\n });\n }\n}\n","// Shim globals in cjs bundle\n// There's a weird bug that esbuild will always inject importMetaUrl\n// if we export it as `const importMetaUrl = ... __filename ...`\n// But using a function will not cause this issue\n\nconst getImportMetaUrl = () =>\n typeof document === 'undefined'\n ? new URL(`file:${__filename}`).href\n : (document.currentScript && document.currentScript.src) ||\n new URL('main.js', document.baseURI).href\n\nexport const importMetaUrl = /* @__PURE__ */ getImportMetaUrl()\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;ACKA,IAAM,mBAAmB,MACvB,OAAO,aAAa,cAChB,IAAI,IAAI,QAAQ,UAAU,EAAE,EAAE,OAC7B,SAAS,iBAAiB,SAAS,cAAc,OAClD,IAAI,IAAI,WAAW,SAAS,OAAO,EAAE;AAEpC,IAAM,gBAAgC,iCAAiB;ADR9D,sBAA8B;AAC9B,8BAAyC;AAIlC,MAAM,sBAAsB,CAAC,aAAsB;AACxD,SAAO,yCAAiB,WAAO,+BAAc,IAAI,IAAI,mBAAmB,aAAe,EAAE,IAAI,GAAG;AAAA,IAC9F,mBAAmB;AAAA,IACnB,mBAAmB;AAAA,IACnB,eAAe;AAAA,IACf,oBAAoB,WAAW,CAAC,EAAE,MAAM,MAAM,CAAC,IAAI;AAAA,EACrD,CAAC;AACH;AAEO,MAAM,UAAU;AAAA,EACrB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAEA;AAAA,EACA;AAAA,EAEA,YAAY,SAA2B,YAAwB;AAC7D,SAAK,WAAW;AAChB,SAAK,cAAc;AAEnB,YAAQ,YAAY;AAAA,MAClB,KAAK;AACH,aAAK,qBAAqB;AAC1B,aAAK,eAAe;AACpB;AAAA,MACF,KAAK;AACH,aAAK,qBAAqB;AAC1B,aAAK,eAAe;AACpB;AAAA,IACJ;AAEA,SAAK,gBAAgB,cAAc,KAAK,CAAC,OAAO,UAAU,CAAC,CAAC;AAC5D,SAAK,WAAW,IAAI,aAAa,KAAK,YAAY;AAClD,SAAK,YAAY,IAAI,aAAa,IAAI,IAAI,GAAG;AAC7C,SAAK,eAAe,IAAI,aAAa,KAAK,eAAe,KAAK,kBAAkB;AAAA,EAClF;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,oBAA4B;AAC9B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,IAAI,GAAkC;AAC1C,SAAK,aAAa,IAAI,KAAK,UAAU,CAAC;AACtC,SAAK,aAAa,IAAI,GAAG,KAAK,YAAY;AAE1C,WAAO,MAAM,KAAK,SACf,IAAI;AAAA,MACH,OAAO,IAAI,+BAAO,WAAW,KAAK,cAAc;AAAA,QAC9C;AAAA,QACA,KAAK,eAAe,KAAK;AAAA,MAC3B,CAAC;AAAA,MACD,OAAO,IAAI,+BAAO,WAAW,KAAK,WAAW,CAAC,GAAG,GAAG,GAAG,CAAC;AAAA,MACxD,IAAI,IAAI,+BAAO,SAAS,KAAK,aAAa;AAAA,IAC5C,CAAC,EACA,KAAK,CAAC,WAAW;AAEhB,WAAK,WAAW,KAAK,aAAa,SAAS,GAAG,KAAK,YAAY;AAC/D,aAAQ,OAAO,OAAQ,KAAsB,GAAG,CAAC;AAAA,IACnD,CAAC;AAAA,EACL;AACF;","names":[]}
package/dist/vad.cjs CHANGED
@@ -135,7 +135,11 @@ class VADStream extends import_agents.VADStream {
135
135
  let inferenceFrames = [];
136
136
  let resampler = null;
137
137
  let inputCopyRemainingFrac = 0;
138
- for await (const frame of this.input) {
138
+ while (true) {
139
+ const { done, value: frame } = await this.inputReader.read();
140
+ if (done) {
141
+ break;
142
+ }
139
143
  if (typeof frame === "symbol") {
140
144
  continue;
141
145
  }
@@ -209,7 +213,7 @@ class VADStream extends import_agents.VADStream {
209
213
  } else {
210
214
  pubSilenceDuration += windowDuration;
211
215
  }
212
- this.queue.put({
216
+ this.outputWriter.write({
213
217
  type: import_agents.VADEventType.INFERENCE_DONE,
214
218
  samplesIndex: pubCurrentSample,
215
219
  timestamp: pubTimestamp,
@@ -258,7 +262,7 @@ class VADStream extends import_agents.VADStream {
258
262
  pubSpeaking = true;
259
263
  pubSilenceDuration = 0;
260
264
  pubSpeechDuration = speechThresholdDuration;
261
- this.queue.put({
265
+ this.outputWriter.write({
262
266
  type: import_agents.VADEventType.START_OF_SPEECH,
263
267
  samplesIndex: pubCurrentSample,
264
268
  timestamp: pubTimestamp,
@@ -282,7 +286,7 @@ class VADStream extends import_agents.VADStream {
282
286
  pubSpeaking = false;
283
287
  pubSpeechDuration = 0;
284
288
  pubSilenceDuration = silenceThresholdDuration;
285
- this.queue.put({
289
+ this.outputWriter.write({
286
290
  type: import_agents.VADEventType.END_OF_SPEECH,
287
291
  samplesIndex: pubCurrentSample,
288
292
  timestamp: pubTimestamp,
package/dist/vad.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n ExpFilter,\n VADEventType,\n VADStream as baseStream,\n VAD as baseVAD,\n log,\n mergeFrames,\n} from '@livekit/agents';\nimport { AudioFrame, AudioResampler, AudioResamplerQuality } from '@livekit/rtc-node';\nimport type { InferenceSession } from 'onnxruntime-node';\nimport type { SampleRate } from './onnx_model.js';\nimport { OnnxModel, newInferenceSession } from './onnx_model.js';\n\nconst SLOW_INFERENCE_THRESHOLD = 200; // late by 200ms\n\nexport interface VADOptions {\n /** Minimum duration of speech to start a new speech chunk */\n minSpeechDuration: number;\n /** At the end of each speech, wait this duration before ending the speech */\n minSilenceDuration: number;\n /** Duration of padding to add to the beginning of each speech chunk */\n prefixPaddingDuration: number;\n /** Maximum duration of speech to keep in the buffer */\n maxBufferedSpeech: number;\n /** Maximum duration of speech to keep in the buffer*/\n activationThreshold: number;\n /** Sample rate for the inference (only 8KHz and 16KHz are supported) */\n sampleRate: SampleRate;\n /** Force the use of CPU for inference */\n forceCPU: boolean;\n}\n\nconst defaultVADOptions: VADOptions = {\n minSpeechDuration: 50,\n minSilenceDuration: 550,\n prefixPaddingDuration: 500,\n maxBufferedSpeech: 60000, // 60 seconds\n activationThreshold: 0.5,\n sampleRate: 16000,\n forceCPU: true,\n};\n\nexport class VAD extends baseVAD {\n #session: InferenceSession;\n #opts: VADOptions;\n #streams: VADStream[];\n label = 'silero.VAD';\n\n constructor(session: InferenceSession, opts: VADOptions) {\n super({ updateInterval: 32 });\n this.#session = session;\n this.#opts = opts;\n this.#streams = [];\n }\n\n /**\n * Updates the VAD options with new values.\n *\n * @param opts - Partial options object containing the values to update\n * @remarks\n * This method will merge the provided options with existing options and update all active streams.\n * Only the properties specified in opts will be updated, other properties retain their current values.\n */\n updateOptions(opts: Partial<VADOptions>): void {\n this.#opts = { ...this.#opts, ...opts };\n for (const stream of this.#streams) {\n stream.updateOptions(this.#opts);\n }\n }\n\n /**\n * Load and initialize the Silero VAD model.\n *\n * This method loads the ONNX model and prepares it for inference. When options are not provided,\n * sane defaults are used.\n *\n * @remarks\n * This method may take time to load the model into memory.\n * It is recommended to call this method inside your prewarm mechanism.\n *\n * @example\n * ```ts\n * export default defineAgent({\n * prewarm: async (proc: JobProcess) => {\n * proc.userData.vad = await VAD.load();\n * },\n * entry: async (ctx: JobContext) => {\n * const vad = ctx.proc.userData.vad! as VAD;\n * // the rest of your agent logic\n * },\n * });\n * ```\n *\n * @param options -\n * @returns Promise\\<{@link VAD}\\>: An instance of the VAD class ready for streaming.\n */\n static async load(opts: Partial<VADOptions> = {}): Promise<VAD> {\n const mergedOpts: VADOptions = { ...defaultVADOptions, ...opts };\n const session = await newInferenceSession(mergedOpts.forceCPU);\n return new VAD(session, mergedOpts);\n }\n\n stream(): VADStream {\n const stream = new VADStream(\n this,\n this.#opts,\n new OnnxModel(this.#session, this.#opts.sampleRate),\n );\n this.#streams.push(stream);\n return stream;\n }\n}\n\nexport class VADStream extends baseStream {\n #opts: VADOptions;\n #model: OnnxModel;\n #inputSampleRate: number;\n #speechBuffer: Int16Array | null;\n #speechBufferMaxReached: boolean;\n #prefixPaddingSamples: number;\n #task: Promise<void>;\n #expFilter = new ExpFilter(0.35);\n #extraInferenceTime = 0;\n #logger = log();\n\n constructor(vad: VAD, opts: VADOptions, model: OnnxModel) {\n super(vad);\n this.#opts = opts;\n this.#model = model;\n this.#inputSampleRate = 0;\n this.#speechBuffer = null;\n this.#speechBufferMaxReached = false;\n this.#prefixPaddingSamples = 0;\n\n this.#task = new Promise(async () => {\n let inferenceData = new Float32Array(this.#model.windowSizeSamples);\n\n // a copy is exposed to the user in END_OF_SPEECH\n let speechBufferIndex = 0;\n\n // \"pub\" means public, these values are exposed to the users through events\n let pubSpeaking = false;\n let pubSpeechDuration = 0;\n let pubSilenceDuration = 0;\n let pubCurrentSample = 0;\n let pubTimestamp = 0;\n let speechThresholdDuration = 0;\n let silenceThresholdDuration = 0;\n\n let inputFrames = [];\n let inferenceFrames: AudioFrame[] = [];\n let resampler: AudioResampler | null = null;\n\n // used to avoid drift when the sampleRate ratio is not an integer\n let inputCopyRemainingFrac = 0.0;\n\n for await (const frame of this.input) {\n if (typeof frame === 'symbol') {\n continue; // ignore flush sentinel for now\n }\n\n if (!this.#inputSampleRate || !this.#speechBuffer) {\n this.#inputSampleRate = frame.sampleRate;\n this.#prefixPaddingSamples = Math.trunc(\n (this.#opts.prefixPaddingDuration * this.#inputSampleRate) / 1000,\n );\n const bufferSize =\n Math.trunc((this.#opts.maxBufferedSpeech * this.#inputSampleRate) / 1000) +\n this.#prefixPaddingSamples;\n this.#speechBuffer = new Int16Array(bufferSize);\n\n if (this.#opts.sampleRate !== this.#inputSampleRate) {\n // resampling needed: the input sample rate isn't the same as the model's\n // sample rate used for inference\n resampler = new AudioResampler(\n this.#inputSampleRate,\n this.#opts.sampleRate,\n 1,\n AudioResamplerQuality.QUICK, // VAD doesn't need high quality\n );\n }\n } else if (frame.sampleRate !== this.#inputSampleRate) {\n this.#logger.error('a frame with a different sample rate was already published');\n continue;\n }\n\n inputFrames.push(frame);\n if (resampler) {\n inferenceFrames.push(...resampler.push(frame));\n } else {\n inferenceFrames.push(frame);\n }\n\n while (true) {\n const startTime = process.hrtime.bigint();\n const availableInferenceSamples = inferenceFrames\n .map((x) => x.samplesPerChannel)\n .reduce((acc, x) => acc + x, 0);\n\n if (availableInferenceSamples < this.#model.windowSizeSamples) {\n break; // not enough samples to run inference\n }\n\n const inputFrame = mergeFrames(inputFrames);\n const inferenceFrame = mergeFrames(inferenceFrames);\n\n // convert data to f32\n inferenceData = Float32Array.from(\n inferenceFrame.data.subarray(0, this.#model.windowSizeSamples),\n (x) => x / 32767,\n );\n\n const p = await this.#model\n .run(inferenceData)\n .then((data) => this.#expFilter.apply(1, data));\n\n const windowDuration = (this.#model.windowSizeSamples / this.#opts.sampleRate) * 1000;\n pubCurrentSample += this.#model.windowSizeSamples;\n pubTimestamp += windowDuration;\n const resamplingRatio = this.#inputSampleRate / this.#model.sampleRate;\n const toCopy = this.#model.windowSizeSamples * resamplingRatio + inputCopyRemainingFrac;\n const toCopyInt = Math.trunc(toCopy);\n inputCopyRemainingFrac = toCopy - toCopyInt;\n\n // copy the inference window to the speech buffer\n const availableSpace = this.#speechBuffer.length - speechBufferIndex;\n const toCopyBuffer = Math.min(toCopyInt, availableSpace);\n if (toCopyBuffer > 0) {\n this.#speechBuffer.set(inputFrame.data.subarray(0, toCopyBuffer), speechBufferIndex);\n speechBufferIndex += toCopyBuffer;\n } else if (!this.#speechBufferMaxReached) {\n this.#speechBufferMaxReached = true;\n this.#logger.warn(\n 'maxBufferedSpeech reached, ignoring further data for the current speech input',\n );\n }\n\n const inferenceDuration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.#extraInferenceTime = Math.max(\n 0,\n this.#extraInferenceTime + inferenceDuration - windowDuration,\n );\n if (this.#extraInferenceTime > SLOW_INFERENCE_THRESHOLD) {\n this.#logger\n .child({ delay: this.#extraInferenceTime })\n .warn('inference is slower than realtime');\n }\n\n if (pubSpeaking) {\n pubSpeechDuration += windowDuration;\n } else {\n pubSilenceDuration += windowDuration;\n }\n\n this.queue.put({\n type: VADEventType.INFERENCE_DONE,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [\n new AudioFrame(\n inputFrame.data.subarray(0, toCopyInt),\n this.#inputSampleRate,\n 1,\n toCopyInt,\n ),\n ],\n speaking: pubSpeaking,\n rawAccumulatedSilence: silenceThresholdDuration,\n rawAccumulatedSpeech: speechThresholdDuration,\n });\n\n const resetWriteCursor = () => {\n if (!this.#speechBuffer) throw new Error('speechBuffer is empty');\n if (speechBufferIndex <= this.#prefixPaddingSamples) {\n return;\n }\n\n const paddingData = this.#speechBuffer.subarray(\n speechBufferIndex - this.#prefixPaddingSamples,\n speechBufferIndex,\n );\n this.#speechBuffer.set(paddingData, 0);\n speechBufferIndex = this.#prefixPaddingSamples;\n this.#speechBufferMaxReached = false;\n };\n\n const copySpeechBuffer = (): AudioFrame => {\n if (!this.#speechBuffer) throw new Error('speechBuffer is empty');\n return new AudioFrame(\n this.#speechBuffer.subarray(this.#prefixPaddingSamples, speechBufferIndex),\n this.#inputSampleRate,\n 1,\n speechBufferIndex,\n );\n };\n\n if (p > this.#opts.activationThreshold) {\n speechThresholdDuration += windowDuration;\n silenceThresholdDuration = 0;\n if (!pubSpeaking && speechThresholdDuration >= this.#opts.minSpeechDuration) {\n pubSpeaking = true;\n pubSilenceDuration = 0;\n pubSpeechDuration = speechThresholdDuration;\n\n this.queue.put({\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [copySpeechBuffer()],\n speaking: pubSpeaking,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n }\n } else {\n silenceThresholdDuration += windowDuration;\n speechThresholdDuration = 0;\n\n if (!pubSpeaking) {\n resetWriteCursor();\n }\n\n if (pubSpeaking && silenceThresholdDuration > this.#opts.minSilenceDuration) {\n pubSpeaking = false;\n pubSpeechDuration = 0;\n pubSilenceDuration = silenceThresholdDuration;\n\n this.queue.put({\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [copySpeechBuffer()],\n speaking: pubSpeaking,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n\n resetWriteCursor();\n }\n }\n\n inputFrames = [];\n inferenceFrames = [];\n\n if (inputFrame.data.length > toCopyInt) {\n const data = inputFrame.data.subarray(toCopyInt);\n inputFrames.push(\n new AudioFrame(data, this.#inputSampleRate, 1, Math.trunc(data.length / 2)),\n );\n }\n if (inferenceFrame.data.length > this.#model.windowSizeSamples) {\n const data = inferenceFrame.data.subarray(this.#model.windowSizeSamples);\n inferenceFrames.push(\n new AudioFrame(data, this.#opts.sampleRate, 1, Math.trunc(data.length / 2)),\n );\n }\n }\n }\n });\n }\n\n /**\n * Update the VAD options\n *\n * @param opts - Partial options object containing the values to update\n * @remarks\n * This method allows you to update the VAD options after the VAD object has been created\n */\n updateOptions(opts: Partial<VADOptions>) {\n const oldMaxBufferedSpeech = this.#opts.maxBufferedSpeech;\n this.#opts = { ...this.#opts, ...opts };\n\n if (this.#inputSampleRate) {\n // Assert speech buffer exists\n if (this.#speechBuffer === null) throw new Error('speechBuffer is null');\n\n // Resize speech buffer\n this.#prefixPaddingSamples = Math.trunc(\n (this.#opts.prefixPaddingDuration * this.#inputSampleRate) / 1000,\n );\n const bufferSize =\n Math.trunc((this.#opts.maxBufferedSpeech * this.#inputSampleRate) / 1000) +\n this.#prefixPaddingSamples;\n const resizedBuffer = new Int16Array(bufferSize);\n resizedBuffer.set(\n this.#speechBuffer.subarray(0, Math.min(this.#speechBuffer.length, bufferSize)),\n );\n this.#speechBuffer = resizedBuffer;\n\n // Determine if max has been reached\n if (this.#opts.maxBufferedSpeech > oldMaxBufferedSpeech) {\n this.#speechBufferMaxReached = false;\n }\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AACP,sBAAkE;AAGlE,wBAA+C;AAE/C,MAAM,2BAA2B;AAmBjC,MAAM,oBAAgC;AAAA,EACpC,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,uBAAuB;AAAA,EACvB,mBAAmB;AAAA;AAAA,EACnB,qBAAqB;AAAA,EACrB,YAAY;AAAA,EACZ,UAAU;AACZ;AAEO,MAAM,YAAY,cAAAA,IAAQ;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,SAA2B,MAAkB;AACvD,UAAM,EAAE,gBAAgB,GAAG,CAAC;AAC5B,SAAK,WAAW;AAChB,SAAK,QAAQ;AACb,SAAK,WAAW,CAAC;AAAA,EACnB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,cAAc,MAAiC;AAC7C,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,eAAW,UAAU,KAAK,UAAU;AAClC,aAAO,cAAc,KAAK,KAAK;AAAA,IACjC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EA4BA,aAAa,KAAK,OAA4B,CAAC,GAAiB;AAC9D,UAAM,aAAyB,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC/D,UAAM,UAAU,UAAM,uCAAoB,WAAW,QAAQ;AAC7D,WAAO,IAAI,IAAI,SAAS,UAAU;AAAA,EACpC;AAAA,EAEA,SAAoB;AAClB,UAAM,SAAS,IAAI;AAAA,MACjB;AAAA,MACA,KAAK;AAAA,MACL,IAAI,4BAAU,KAAK,UAAU,KAAK,MAAM,UAAU;AAAA,IACpD;AACA,SAAK,SAAS,KAAK,MAAM;AACzB,WAAO;AAAA,EACT;AACF;AAEO,MAAM,kBAAkB,cAAAC,UAAW;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,aAAa,IAAI,wBAAU,IAAI;AAAA,EAC/B,sBAAsB;AAAA,EACtB,cAAU,mBAAI;AAAA,EAEd,YAAY,KAAU,MAAkB,OAAkB;AACxD,UAAM,GAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,mBAAmB;AACxB,SAAK,gBAAgB;AACrB,SAAK,0BAA0B;AAC/B,SAAK,wBAAwB;AAE7B,SAAK,QAAQ,IAAI,QAAQ,YAAY;AACnC,UAAI,gBAAgB,IAAI,aAAa,KAAK,OAAO,iBAAiB;AAGlE,UAAI,oBAAoB;AAGxB,UAAI,cAAc;AAClB,UAAI,oBAAoB;AACxB,UAAI,qBAAqB;AACzB,UAAI,mBAAmB;AACvB,UAAI,eAAe;AACnB,UAAI,0BAA0B;AAC9B,UAAI,2BAA2B;AAE/B,UAAI,cAAc,CAAC;AACnB,UAAI,kBAAgC,CAAC;AACrC,UAAI,YAAmC;AAGvC,UAAI,yBAAyB;AAE7B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,OAAO,UAAU,UAAU;AAC7B;AAAA,QACF;AAEA,YAAI,CAAC,KAAK,oBAAoB,CAAC,KAAK,eAAe;AACjD,eAAK,mBAAmB,MAAM;AAC9B,eAAK,wBAAwB,KAAK;AAAA,YAC/B,KAAK,MAAM,wBAAwB,KAAK,mBAAoB;AAAA,UAC/D;AACA,gBAAM,aACJ,KAAK,MAAO,KAAK,MAAM,oBAAoB,KAAK,mBAAoB,GAAI,IACxE,KAAK;AACP,eAAK,gBAAgB,IAAI,WAAW,UAAU;AAE9C,cAAI,KAAK,MAAM,eAAe,KAAK,kBAAkB;AAGnD,wBAAY,IAAI;AAAA,cACd,KAAK;AAAA,cACL,KAAK,MAAM;AAAA,cACX;AAAA,cACA,sCAAsB;AAAA;AAAA,YACxB;AAAA,UACF;AAAA,QACF,WAAW,MAAM,eAAe,KAAK,kBAAkB;AACrD,eAAK,QAAQ,MAAM,4DAA4D;AAC/E;AAAA,QACF;AAEA,oBAAY,KAAK,KAAK;AACtB,YAAI,WAAW;AACb,0BAAgB,KAAK,GAAG,UAAU,KAAK,KAAK,CAAC;AAAA,QAC/C,OAAO;AACL,0BAAgB,KAAK,KAAK;AAAA,QAC5B;AAEA,eAAO,MAAM;AACX,gBAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,gBAAM,4BAA4B,gBAC/B,IAAI,CAAC,MAAM,EAAE,iBAAiB,EAC9B,OAAO,CAAC,KAAK,MAAM,MAAM,GAAG,CAAC;AAEhC,cAAI,4BAA4B,KAAK,OAAO,mBAAmB;AAC7D;AAAA,UACF;AAEA,gBAAM,iBAAa,2BAAY,WAAW;AAC1C,gBAAM,qBAAiB,2BAAY,eAAe;AAGlD,0BAAgB,aAAa;AAAA,YAC3B,eAAe,KAAK,SAAS,GAAG,KAAK,OAAO,iBAAiB;AAAA,YAC7D,CAAC,MAAM,IAAI;AAAA,UACb;AAEA,gBAAM,IAAI,MAAM,KAAK,OAClB,IAAI,aAAa,EACjB,KAAK,CAAC,SAAS,KAAK,WAAW,MAAM,GAAG,IAAI,CAAC;AAEhD,gBAAM,iBAAkB,KAAK,OAAO,oBAAoB,KAAK,MAAM,aAAc;AACjF,8BAAoB,KAAK,OAAO;AAChC,0BAAgB;AAChB,gBAAM,kBAAkB,KAAK,mBAAmB,KAAK,OAAO;AAC5D,gBAAM,SAAS,KAAK,OAAO,oBAAoB,kBAAkB;AACjE,gBAAM,YAAY,KAAK,MAAM,MAAM;AACnC,mCAAyB,SAAS;AAGlC,gBAAM,iBAAiB,KAAK,cAAc,SAAS;AACnD,gBAAM,eAAe,KAAK,IAAI,WAAW,cAAc;AACvD,cAAI,eAAe,GAAG;AACpB,iBAAK,cAAc,IAAI,WAAW,KAAK,SAAS,GAAG,YAAY,GAAG,iBAAiB;AACnF,iCAAqB;AAAA,UACvB,WAAW,CAAC,KAAK,yBAAyB;AACxC,iBAAK,0BAA0B;AAC/B,iBAAK,QAAQ;AAAA,cACX;AAAA,YACF;AAAA,UACF;AAEA,gBAAM,oBAAoB,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACxF,eAAK,sBAAsB,KAAK;AAAA,YAC9B;AAAA,YACA,KAAK,sBAAsB,oBAAoB;AAAA,UACjD;AACA,cAAI,KAAK,sBAAsB,0BAA0B;AACvD,iBAAK,QACF,MAAM,EAAE,OAAO,KAAK,oBAAoB,CAAC,EACzC,KAAK,mCAAmC;AAAA,UAC7C;AAEA,cAAI,aAAa;AACf,iCAAqB;AAAA,UACvB,OAAO;AACL,kCAAsB;AAAA,UACxB;AAEA,eAAK,MAAM,IAAI;AAAA,YACb,MAAM,2BAAa;AAAA,YACnB,cAAc;AAAA,YACd,WAAW;AAAA,YACX,iBAAiB;AAAA,YACjB,gBAAgB;AAAA,YAChB,aAAa;AAAA,YACb;AAAA,YACA,QAAQ;AAAA,cACN,IAAI;AAAA,gBACF,WAAW,KAAK,SAAS,GAAG,SAAS;AAAA,gBACrC,KAAK;AAAA,gBACL;AAAA,gBACA;AAAA,cACF;AAAA,YACF;AAAA,YACA,UAAU;AAAA,YACV,uBAAuB;AAAA,YACvB,sBAAsB;AAAA,UACxB,CAAC;AAED,gBAAM,mBAAmB,MAAM;AAC7B,gBAAI,CAAC,KAAK,cAAe,OAAM,IAAI,MAAM,uBAAuB;AAChE,gBAAI,qBAAqB,KAAK,uBAAuB;AACnD;AAAA,YACF;AAEA,kBAAM,cAAc,KAAK,cAAc;AAAA,cACrC,oBAAoB,KAAK;AAAA,cACzB;AAAA,YACF;AACA,iBAAK,cAAc,IAAI,aAAa,CAAC;AACrC,gCAAoB,KAAK;AACzB,iBAAK,0BAA0B;AAAA,UACjC;AAEA,gBAAM,mBAAmB,MAAkB;AACzC,gBAAI,CAAC,KAAK,cAAe,OAAM,IAAI,MAAM,uBAAuB;AAChE,mBAAO,IAAI;AAAA,cACT,KAAK,cAAc,SAAS,KAAK,uBAAuB,iBAAiB;AAAA,cACzE,KAAK;AAAA,cACL;AAAA,cACA;AAAA,YACF;AAAA,UACF;AAEA,cAAI,IAAI,KAAK,MAAM,qBAAqB;AACtC,uCAA2B;AAC3B,uCAA2B;AAC3B,gBAAI,CAAC,eAAe,2BAA2B,KAAK,MAAM,mBAAmB;AAC3E,4BAAc;AACd,mCAAqB;AACrB,kCAAoB;AAEpB,mBAAK,MAAM,IAAI;AAAA,gBACb,MAAM,2BAAa;AAAA,gBACnB,cAAc;AAAA,gBACd,WAAW;AAAA,gBACX,iBAAiB;AAAA,gBACjB,gBAAgB;AAAA,gBAChB,aAAa;AAAA,gBACb;AAAA,gBACA,QAAQ,CAAC,iBAAiB,CAAC;AAAA,gBAC3B,UAAU;AAAA,gBACV,uBAAuB;AAAA,gBACvB,sBAAsB;AAAA,cACxB,CAAC;AAAA,YACH;AAAA,UACF,OAAO;AACL,wCAA4B;AAC5B,sCAA0B;AAE1B,gBAAI,CAAC,aAAa;AAChB,+BAAiB;AAAA,YACnB;AAEA,gBAAI,eAAe,2BAA2B,KAAK,MAAM,oBAAoB;AAC3E,4BAAc;AACd,kCAAoB;AACpB,mCAAqB;AAErB,mBAAK,MAAM,IAAI;AAAA,gBACb,MAAM,2BAAa;AAAA,gBACnB,cAAc;AAAA,gBACd,WAAW;AAAA,gBACX,iBAAiB;AAAA,gBACjB,gBAAgB;AAAA,gBAChB,aAAa;AAAA,gBACb;AAAA,gBACA,QAAQ,CAAC,iBAAiB,CAAC;AAAA,gBAC3B,UAAU;AAAA,gBACV,uBAAuB;AAAA,gBACvB,sBAAsB;AAAA,cACxB,CAAC;AAED,+BAAiB;AAAA,YACnB;AAAA,UACF;AAEA,wBAAc,CAAC;AACf,4BAAkB,CAAC;AAEnB,cAAI,WAAW,KAAK,SAAS,WAAW;AACtC,kBAAM,OAAO,WAAW,KAAK,SAAS,SAAS;AAC/C,wBAAY;AAAA,cACV,IAAI,2BAAW,MAAM,KAAK,kBAAkB,GAAG,KAAK,MAAM,KAAK,SAAS,CAAC,CAAC;AAAA,YAC5E;AAAA,UACF;AACA,cAAI,eAAe,KAAK,SAAS,KAAK,OAAO,mBAAmB;AAC9D,kBAAM,OAAO,eAAe,KAAK,SAAS,KAAK,OAAO,iBAAiB;AACvE,4BAAgB;AAAA,cACd,IAAI,2BAAW,MAAM,KAAK,MAAM,YAAY,GAAG,KAAK,MAAM,KAAK,SAAS,CAAC,CAAC;AAAA,YAC5E;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,cAAc,MAA2B;AACvC,UAAM,uBAAuB,KAAK,MAAM;AACxC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,QAAI,KAAK,kBAAkB;AAEzB,UAAI,KAAK,kBAAkB,KAAM,OAAM,IAAI,MAAM,sBAAsB;AAGvE,WAAK,wBAAwB,KAAK;AAAA,QAC/B,KAAK,MAAM,wBAAwB,KAAK,mBAAoB;AAAA,MAC/D;AACA,YAAM,aACJ,KAAK,MAAO,KAAK,MAAM,oBAAoB,KAAK,mBAAoB,GAAI,IACxE,KAAK;AACP,YAAM,gBAAgB,IAAI,WAAW,UAAU;AAC/C,oBAAc;AAAA,QACZ,KAAK,cAAc,SAAS,GAAG,KAAK,IAAI,KAAK,cAAc,QAAQ,UAAU,CAAC;AAAA,MAChF;AACA,WAAK,gBAAgB;AAGrB,UAAI,KAAK,MAAM,oBAAoB,sBAAsB;AACvD,aAAK,0BAA0B;AAAA,MACjC;AAAA,IACF;AAAA,EACF;AACF;","names":["baseVAD","baseStream"]}
1
+ {"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n ExpFilter,\n VADEventType,\n VADStream as baseStream,\n VAD as baseVAD,\n log,\n mergeFrames,\n} from '@livekit/agents';\nimport { AudioFrame, AudioResampler, AudioResamplerQuality } from '@livekit/rtc-node';\nimport type { InferenceSession } from 'onnxruntime-node';\nimport type { SampleRate } from './onnx_model.js';\nimport { OnnxModel, newInferenceSession } from './onnx_model.js';\n\nconst SLOW_INFERENCE_THRESHOLD = 200; // late by 200ms\n\nexport interface VADOptions {\n /** Minimum duration of speech to start a new speech chunk */\n minSpeechDuration: number;\n /** At the end of each speech, wait this duration before ending the speech */\n minSilenceDuration: number;\n /** Duration of padding to add to the beginning of each speech chunk */\n prefixPaddingDuration: number;\n /** Maximum duration of speech to keep in the buffer */\n maxBufferedSpeech: number;\n /** Maximum duration of speech to keep in the buffer*/\n activationThreshold: number;\n /** Sample rate for the inference (only 8KHz and 16KHz are supported) */\n sampleRate: SampleRate;\n /** Force the use of CPU for inference */\n forceCPU: boolean;\n}\n\nconst defaultVADOptions: VADOptions = {\n minSpeechDuration: 50,\n minSilenceDuration: 550,\n prefixPaddingDuration: 500,\n maxBufferedSpeech: 60000, // 60 seconds\n activationThreshold: 0.5,\n sampleRate: 16000,\n forceCPU: true,\n};\n\nexport class VAD extends baseVAD {\n #session: InferenceSession;\n #opts: VADOptions;\n #streams: VADStream[];\n label = 'silero.VAD';\n\n constructor(session: InferenceSession, opts: VADOptions) {\n super({ updateInterval: 32 });\n this.#session = session;\n this.#opts = opts;\n this.#streams = [];\n }\n\n /**\n * Updates the VAD options with new values.\n *\n * @param opts - Partial options object containing the values to update\n * @remarks\n * This method will merge the provided options with existing options and update all active streams.\n * Only the properties specified in opts will be updated, other properties retain their current values.\n */\n updateOptions(opts: Partial<VADOptions>): void {\n this.#opts = { ...this.#opts, ...opts };\n for (const stream of this.#streams) {\n stream.updateOptions(this.#opts);\n }\n }\n\n /**\n * Load and initialize the Silero VAD model.\n *\n * This method loads the ONNX model and prepares it for inference. When options are not provided,\n * sane defaults are used.\n *\n * @remarks\n * This method may take time to load the model into memory.\n * It is recommended to call this method inside your prewarm mechanism.\n *\n * @example\n * ```ts\n * export default defineAgent({\n * prewarm: async (proc: JobProcess) => {\n * proc.userData.vad = await VAD.load();\n * },\n * entry: async (ctx: JobContext) => {\n * const vad = ctx.proc.userData.vad! as VAD;\n * // the rest of your agent logic\n * },\n * });\n * ```\n *\n * @param options -\n * @returns Promise\\<{@link VAD}\\>: An instance of the VAD class ready for streaming.\n */\n static async load(opts: Partial<VADOptions> = {}): Promise<VAD> {\n const mergedOpts: VADOptions = { ...defaultVADOptions, ...opts };\n const session = await newInferenceSession(mergedOpts.forceCPU);\n return new VAD(session, mergedOpts);\n }\n\n stream(): VADStream {\n const stream = new VADStream(\n this,\n this.#opts,\n new OnnxModel(this.#session, this.#opts.sampleRate),\n );\n this.#streams.push(stream);\n return stream;\n }\n}\n\nexport class VADStream extends baseStream {\n #opts: VADOptions;\n #model: OnnxModel;\n #inputSampleRate: number;\n #speechBuffer: Int16Array | null;\n #speechBufferMaxReached: boolean;\n #prefixPaddingSamples: number;\n #task: Promise<void>;\n #expFilter = new ExpFilter(0.35);\n #extraInferenceTime = 0;\n #logger = log();\n\n constructor(vad: VAD, opts: VADOptions, model: OnnxModel) {\n super(vad);\n this.#opts = opts;\n this.#model = model;\n this.#inputSampleRate = 0;\n this.#speechBuffer = null;\n this.#speechBufferMaxReached = false;\n this.#prefixPaddingSamples = 0;\n\n this.#task = new Promise(async () => {\n let inferenceData = new Float32Array(this.#model.windowSizeSamples);\n\n // a copy is exposed to the user in END_OF_SPEECH\n let speechBufferIndex = 0;\n\n // \"pub\" means public, these values are exposed to the users through events\n let pubSpeaking = false;\n let pubSpeechDuration = 0;\n let pubSilenceDuration = 0;\n let pubCurrentSample = 0;\n let pubTimestamp = 0;\n let speechThresholdDuration = 0;\n let silenceThresholdDuration = 0;\n\n let inputFrames = [];\n let inferenceFrames: AudioFrame[] = [];\n let resampler: AudioResampler | null = null;\n\n // used to avoid drift when the sampleRate ratio is not an integer\n let inputCopyRemainingFrac = 0.0;\n\n while (true) {\n const { done, value: frame } = await this.inputReader.read();\n if (done) {\n break;\n }\n\n if (typeof frame === 'symbol') {\n continue; // ignore flush sentinel for now\n }\n\n if (!this.#inputSampleRate || !this.#speechBuffer) {\n this.#inputSampleRate = frame.sampleRate;\n this.#prefixPaddingSamples = Math.trunc(\n (this.#opts.prefixPaddingDuration * this.#inputSampleRate) / 1000,\n );\n const bufferSize =\n Math.trunc((this.#opts.maxBufferedSpeech * this.#inputSampleRate) / 1000) +\n this.#prefixPaddingSamples;\n this.#speechBuffer = new Int16Array(bufferSize);\n\n if (this.#opts.sampleRate !== this.#inputSampleRate) {\n // resampling needed: the input sample rate isn't the same as the model's\n // sample rate used for inference\n resampler = new AudioResampler(\n this.#inputSampleRate,\n this.#opts.sampleRate,\n 1,\n AudioResamplerQuality.QUICK, // VAD doesn't need high quality\n );\n }\n } else if (frame.sampleRate !== this.#inputSampleRate) {\n this.#logger.error('a frame with a different sample rate was already published');\n continue;\n }\n\n inputFrames.push(frame);\n if (resampler) {\n inferenceFrames.push(...resampler.push(frame));\n } else {\n inferenceFrames.push(frame);\n }\n\n while (true) {\n const startTime = process.hrtime.bigint();\n const availableInferenceSamples = inferenceFrames\n .map((x) => x.samplesPerChannel)\n .reduce((acc, x) => acc + x, 0);\n\n if (availableInferenceSamples < this.#model.windowSizeSamples) {\n break; // not enough samples to run inference\n }\n\n const inputFrame = mergeFrames(inputFrames);\n const inferenceFrame = mergeFrames(inferenceFrames);\n\n // convert data to f32\n inferenceData = Float32Array.from(\n inferenceFrame.data.subarray(0, this.#model.windowSizeSamples),\n (x) => x / 32767,\n );\n\n const p = await this.#model\n .run(inferenceData)\n .then((data) => this.#expFilter.apply(1, data));\n\n const windowDuration = (this.#model.windowSizeSamples / this.#opts.sampleRate) * 1000;\n pubCurrentSample += this.#model.windowSizeSamples;\n pubTimestamp += windowDuration;\n const resamplingRatio = this.#inputSampleRate / this.#model.sampleRate;\n const toCopy = this.#model.windowSizeSamples * resamplingRatio + inputCopyRemainingFrac;\n const toCopyInt = Math.trunc(toCopy);\n inputCopyRemainingFrac = toCopy - toCopyInt;\n\n // copy the inference window to the speech buffer\n const availableSpace = this.#speechBuffer.length - speechBufferIndex;\n const toCopyBuffer = Math.min(toCopyInt, availableSpace);\n if (toCopyBuffer > 0) {\n this.#speechBuffer.set(inputFrame.data.subarray(0, toCopyBuffer), speechBufferIndex);\n speechBufferIndex += toCopyBuffer;\n } else if (!this.#speechBufferMaxReached) {\n this.#speechBufferMaxReached = true;\n this.#logger.warn(\n 'maxBufferedSpeech reached, ignoring further data for the current speech input',\n );\n }\n\n const inferenceDuration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.#extraInferenceTime = Math.max(\n 0,\n this.#extraInferenceTime + inferenceDuration - windowDuration,\n );\n if (this.#extraInferenceTime > SLOW_INFERENCE_THRESHOLD) {\n this.#logger\n .child({ delay: this.#extraInferenceTime })\n .warn('inference is slower than realtime');\n }\n\n if (pubSpeaking) {\n pubSpeechDuration += windowDuration;\n } else {\n pubSilenceDuration += windowDuration;\n }\n\n this.outputWriter.write({\n type: VADEventType.INFERENCE_DONE,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [\n new AudioFrame(\n inputFrame.data.subarray(0, toCopyInt),\n this.#inputSampleRate,\n 1,\n toCopyInt,\n ),\n ],\n speaking: pubSpeaking,\n rawAccumulatedSilence: silenceThresholdDuration,\n rawAccumulatedSpeech: speechThresholdDuration,\n });\n\n const resetWriteCursor = () => {\n if (!this.#speechBuffer) throw new Error('speechBuffer is empty');\n if (speechBufferIndex <= this.#prefixPaddingSamples) {\n return;\n }\n\n const paddingData = this.#speechBuffer.subarray(\n speechBufferIndex - this.#prefixPaddingSamples,\n speechBufferIndex,\n );\n this.#speechBuffer.set(paddingData, 0);\n speechBufferIndex = this.#prefixPaddingSamples;\n this.#speechBufferMaxReached = false;\n };\n\n const copySpeechBuffer = (): AudioFrame => {\n if (!this.#speechBuffer) throw new Error('speechBuffer is empty');\n return new AudioFrame(\n this.#speechBuffer.subarray(this.#prefixPaddingSamples, speechBufferIndex),\n this.#inputSampleRate,\n 1,\n speechBufferIndex,\n );\n };\n\n if (p > this.#opts.activationThreshold) {\n speechThresholdDuration += windowDuration;\n silenceThresholdDuration = 0;\n if (!pubSpeaking && speechThresholdDuration >= this.#opts.minSpeechDuration) {\n pubSpeaking = true;\n pubSilenceDuration = 0;\n pubSpeechDuration = speechThresholdDuration;\n\n this.outputWriter.write({\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [copySpeechBuffer()],\n speaking: pubSpeaking,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n }\n } else {\n silenceThresholdDuration += windowDuration;\n speechThresholdDuration = 0;\n\n if (!pubSpeaking) {\n resetWriteCursor();\n }\n\n if (pubSpeaking && silenceThresholdDuration > this.#opts.minSilenceDuration) {\n pubSpeaking = false;\n pubSpeechDuration = 0;\n pubSilenceDuration = silenceThresholdDuration;\n\n this.outputWriter.write({\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [copySpeechBuffer()],\n speaking: pubSpeaking,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n\n resetWriteCursor();\n }\n }\n\n inputFrames = [];\n inferenceFrames = [];\n\n if (inputFrame.data.length > toCopyInt) {\n const data = inputFrame.data.subarray(toCopyInt);\n inputFrames.push(\n new AudioFrame(data, this.#inputSampleRate, 1, Math.trunc(data.length / 2)),\n );\n }\n if (inferenceFrame.data.length > this.#model.windowSizeSamples) {\n const data = inferenceFrame.data.subarray(this.#model.windowSizeSamples);\n inferenceFrames.push(\n new AudioFrame(data, this.#opts.sampleRate, 1, Math.trunc(data.length / 2)),\n );\n }\n }\n }\n });\n }\n\n /**\n * Update the VAD options\n *\n * @param opts - Partial options object containing the values to update\n * @remarks\n * This method allows you to update the VAD options after the VAD object has been created\n */\n updateOptions(opts: Partial<VADOptions>) {\n const oldMaxBufferedSpeech = this.#opts.maxBufferedSpeech;\n this.#opts = { ...this.#opts, ...opts };\n\n if (this.#inputSampleRate) {\n // Assert speech buffer exists\n if (this.#speechBuffer === null) throw new Error('speechBuffer is null');\n\n // Resize speech buffer\n this.#prefixPaddingSamples = Math.trunc(\n (this.#opts.prefixPaddingDuration * this.#inputSampleRate) / 1000,\n );\n const bufferSize =\n Math.trunc((this.#opts.maxBufferedSpeech * this.#inputSampleRate) / 1000) +\n this.#prefixPaddingSamples;\n const resizedBuffer = new Int16Array(bufferSize);\n resizedBuffer.set(\n this.#speechBuffer.subarray(0, Math.min(this.#speechBuffer.length, bufferSize)),\n );\n this.#speechBuffer = resizedBuffer;\n\n // Determine if max has been reached\n if (this.#opts.maxBufferedSpeech > oldMaxBufferedSpeech) {\n this.#speechBufferMaxReached = false;\n }\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AACP,sBAAkE;AAGlE,wBAA+C;AAE/C,MAAM,2BAA2B;AAmBjC,MAAM,oBAAgC;AAAA,EACpC,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,uBAAuB;AAAA,EACvB,mBAAmB;AAAA;AAAA,EACnB,qBAAqB;AAAA,EACrB,YAAY;AAAA,EACZ,UAAU;AACZ;AAEO,MAAM,YAAY,cAAAA,IAAQ;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,SAA2B,MAAkB;AACvD,UAAM,EAAE,gBAAgB,GAAG,CAAC;AAC5B,SAAK,WAAW;AAChB,SAAK,QAAQ;AACb,SAAK,WAAW,CAAC;AAAA,EACnB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,cAAc,MAAiC;AAC7C,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,eAAW,UAAU,KAAK,UAAU;AAClC,aAAO,cAAc,KAAK,KAAK;AAAA,IACjC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EA4BA,aAAa,KAAK,OAA4B,CAAC,GAAiB;AAC9D,UAAM,aAAyB,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC/D,UAAM,UAAU,UAAM,uCAAoB,WAAW,QAAQ;AAC7D,WAAO,IAAI,IAAI,SAAS,UAAU;AAAA,EACpC;AAAA,EAEA,SAAoB;AAClB,UAAM,SAAS,IAAI;AAAA,MACjB;AAAA,MACA,KAAK;AAAA,MACL,IAAI,4BAAU,KAAK,UAAU,KAAK,MAAM,UAAU;AAAA,IACpD;AACA,SAAK,SAAS,KAAK,MAAM;AACzB,WAAO;AAAA,EACT;AACF;AAEO,MAAM,kBAAkB,cAAAC,UAAW;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,aAAa,IAAI,wBAAU,IAAI;AAAA,EAC/B,sBAAsB;AAAA,EACtB,cAAU,mBAAI;AAAA,EAEd,YAAY,KAAU,MAAkB,OAAkB;AACxD,UAAM,GAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,mBAAmB;AACxB,SAAK,gBAAgB;AACrB,SAAK,0BAA0B;AAC/B,SAAK,wBAAwB;AAE7B,SAAK,QAAQ,IAAI,QAAQ,YAAY;AACnC,UAAI,gBAAgB,IAAI,aAAa,KAAK,OAAO,iBAAiB;AAGlE,UAAI,oBAAoB;AAGxB,UAAI,cAAc;AAClB,UAAI,oBAAoB;AACxB,UAAI,qBAAqB;AACzB,UAAI,mBAAmB;AACvB,UAAI,eAAe;AACnB,UAAI,0BAA0B;AAC9B,UAAI,2BAA2B;AAE/B,UAAI,cAAc,CAAC;AACnB,UAAI,kBAAgC,CAAC;AACrC,UAAI,YAAmC;AAGvC,UAAI,yBAAyB;AAE7B,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,OAAO,MAAM,IAAI,MAAM,KAAK,YAAY,KAAK;AAC3D,YAAI,MAAM;AACR;AAAA,QACF;AAEA,YAAI,OAAO,UAAU,UAAU;AAC7B;AAAA,QACF;AAEA,YAAI,CAAC,KAAK,oBAAoB,CAAC,KAAK,eAAe;AACjD,eAAK,mBAAmB,MAAM;AAC9B,eAAK,wBAAwB,KAAK;AAAA,YAC/B,KAAK,MAAM,wBAAwB,KAAK,mBAAoB;AAAA,UAC/D;AACA,gBAAM,aACJ,KAAK,MAAO,KAAK,MAAM,oBAAoB,KAAK,mBAAoB,GAAI,IACxE,KAAK;AACP,eAAK,gBAAgB,IAAI,WAAW,UAAU;AAE9C,cAAI,KAAK,MAAM,eAAe,KAAK,kBAAkB;AAGnD,wBAAY,IAAI;AAAA,cACd,KAAK;AAAA,cACL,KAAK,MAAM;AAAA,cACX;AAAA,cACA,sCAAsB;AAAA;AAAA,YACxB;AAAA,UACF;AAAA,QACF,WAAW,MAAM,eAAe,KAAK,kBAAkB;AACrD,eAAK,QAAQ,MAAM,4DAA4D;AAC/E;AAAA,QACF;AAEA,oBAAY,KAAK,KAAK;AACtB,YAAI,WAAW;AACb,0BAAgB,KAAK,GAAG,UAAU,KAAK,KAAK,CAAC;AAAA,QAC/C,OAAO;AACL,0BAAgB,KAAK,KAAK;AAAA,QAC5B;AAEA,eAAO,MAAM;AACX,gBAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,gBAAM,4BAA4B,gBAC/B,IAAI,CAAC,MAAM,EAAE,iBAAiB,EAC9B,OAAO,CAAC,KAAK,MAAM,MAAM,GAAG,CAAC;AAEhC,cAAI,4BAA4B,KAAK,OAAO,mBAAmB;AAC7D;AAAA,UACF;AAEA,gBAAM,iBAAa,2BAAY,WAAW;AAC1C,gBAAM,qBAAiB,2BAAY,eAAe;AAGlD,0BAAgB,aAAa;AAAA,YAC3B,eAAe,KAAK,SAAS,GAAG,KAAK,OAAO,iBAAiB;AAAA,YAC7D,CAAC,MAAM,IAAI;AAAA,UACb;AAEA,gBAAM,IAAI,MAAM,KAAK,OAClB,IAAI,aAAa,EACjB,KAAK,CAAC,SAAS,KAAK,WAAW,MAAM,GAAG,IAAI,CAAC;AAEhD,gBAAM,iBAAkB,KAAK,OAAO,oBAAoB,KAAK,MAAM,aAAc;AACjF,8BAAoB,KAAK,OAAO;AAChC,0BAAgB;AAChB,gBAAM,kBAAkB,KAAK,mBAAmB,KAAK,OAAO;AAC5D,gBAAM,SAAS,KAAK,OAAO,oBAAoB,kBAAkB;AACjE,gBAAM,YAAY,KAAK,MAAM,MAAM;AACnC,mCAAyB,SAAS;AAGlC,gBAAM,iBAAiB,KAAK,cAAc,SAAS;AACnD,gBAAM,eAAe,KAAK,IAAI,WAAW,cAAc;AACvD,cAAI,eAAe,GAAG;AACpB,iBAAK,cAAc,IAAI,WAAW,KAAK,SAAS,GAAG,YAAY,GAAG,iBAAiB;AACnF,iCAAqB;AAAA,UACvB,WAAW,CAAC,KAAK,yBAAyB;AACxC,iBAAK,0BAA0B;AAC/B,iBAAK,QAAQ;AAAA,cACX;AAAA,YACF;AAAA,UACF;AAEA,gBAAM,oBAAoB,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACxF,eAAK,sBAAsB,KAAK;AAAA,YAC9B;AAAA,YACA,KAAK,sBAAsB,oBAAoB;AAAA,UACjD;AACA,cAAI,KAAK,sBAAsB,0BAA0B;AACvD,iBAAK,QACF,MAAM,EAAE,OAAO,KAAK,oBAAoB,CAAC,EACzC,KAAK,mCAAmC;AAAA,UAC7C;AAEA,cAAI,aAAa;AACf,iCAAqB;AAAA,UACvB,OAAO;AACL,kCAAsB;AAAA,UACxB;AAEA,eAAK,aAAa,MAAM;AAAA,YACtB,MAAM,2BAAa;AAAA,YACnB,cAAc;AAAA,YACd,WAAW;AAAA,YACX,iBAAiB;AAAA,YACjB,gBAAgB;AAAA,YAChB,aAAa;AAAA,YACb;AAAA,YACA,QAAQ;AAAA,cACN,IAAI;AAAA,gBACF,WAAW,KAAK,SAAS,GAAG,SAAS;AAAA,gBACrC,KAAK;AAAA,gBACL;AAAA,gBACA;AAAA,cACF;AAAA,YACF;AAAA,YACA,UAAU;AAAA,YACV,uBAAuB;AAAA,YACvB,sBAAsB;AAAA,UACxB,CAAC;AAED,gBAAM,mBAAmB,MAAM;AAC7B,gBAAI,CAAC,KAAK,cAAe,OAAM,IAAI,MAAM,uBAAuB;AAChE,gBAAI,qBAAqB,KAAK,uBAAuB;AACnD;AAAA,YACF;AAEA,kBAAM,cAAc,KAAK,cAAc;AAAA,cACrC,oBAAoB,KAAK;AAAA,cACzB;AAAA,YACF;AACA,iBAAK,cAAc,IAAI,aAAa,CAAC;AACrC,gCAAoB,KAAK;AACzB,iBAAK,0BAA0B;AAAA,UACjC;AAEA,gBAAM,mBAAmB,MAAkB;AACzC,gBAAI,CAAC,KAAK,cAAe,OAAM,IAAI,MAAM,uBAAuB;AAChE,mBAAO,IAAI;AAAA,cACT,KAAK,cAAc,SAAS,KAAK,uBAAuB,iBAAiB;AAAA,cACzE,KAAK;AAAA,cACL;AAAA,cACA;AAAA,YACF;AAAA,UACF;AAEA,cAAI,IAAI,KAAK,MAAM,qBAAqB;AACtC,uCAA2B;AAC3B,uCAA2B;AAC3B,gBAAI,CAAC,eAAe,2BAA2B,KAAK,MAAM,mBAAmB;AAC3E,4BAAc;AACd,mCAAqB;AACrB,kCAAoB;AAEpB,mBAAK,aAAa,MAAM;AAAA,gBACtB,MAAM,2BAAa;AAAA,gBACnB,cAAc;AAAA,gBACd,WAAW;AAAA,gBACX,iBAAiB;AAAA,gBACjB,gBAAgB;AAAA,gBAChB,aAAa;AAAA,gBACb;AAAA,gBACA,QAAQ,CAAC,iBAAiB,CAAC;AAAA,gBAC3B,UAAU;AAAA,gBACV,uBAAuB;AAAA,gBACvB,sBAAsB;AAAA,cACxB,CAAC;AAAA,YACH;AAAA,UACF,OAAO;AACL,wCAA4B;AAC5B,sCAA0B;AAE1B,gBAAI,CAAC,aAAa;AAChB,+BAAiB;AAAA,YACnB;AAEA,gBAAI,eAAe,2BAA2B,KAAK,MAAM,oBAAoB;AAC3E,4BAAc;AACd,kCAAoB;AACpB,mCAAqB;AAErB,mBAAK,aAAa,MAAM;AAAA,gBACtB,MAAM,2BAAa;AAAA,gBACnB,cAAc;AAAA,gBACd,WAAW;AAAA,gBACX,iBAAiB;AAAA,gBACjB,gBAAgB;AAAA,gBAChB,aAAa;AAAA,gBACb;AAAA,gBACA,QAAQ,CAAC,iBAAiB,CAAC;AAAA,gBAC3B,UAAU;AAAA,gBACV,uBAAuB;AAAA,gBACvB,sBAAsB;AAAA,cACxB,CAAC;AAED,+BAAiB;AAAA,YACnB;AAAA,UACF;AAEA,wBAAc,CAAC;AACf,4BAAkB,CAAC;AAEnB,cAAI,WAAW,KAAK,SAAS,WAAW;AACtC,kBAAM,OAAO,WAAW,KAAK,SAAS,SAAS;AAC/C,wBAAY;AAAA,cACV,IAAI,2BAAW,MAAM,KAAK,kBAAkB,GAAG,KAAK,MAAM,KAAK,SAAS,CAAC,CAAC;AAAA,YAC5E;AAAA,UACF;AACA,cAAI,eAAe,KAAK,SAAS,KAAK,OAAO,mBAAmB;AAC9D,kBAAM,OAAO,eAAe,KAAK,SAAS,KAAK,OAAO,iBAAiB;AACvE,4BAAgB;AAAA,cACd,IAAI,2BAAW,MAAM,KAAK,MAAM,YAAY,GAAG,KAAK,MAAM,KAAK,SAAS,CAAC,CAAC;AAAA,YAC5E;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,cAAc,MAA2B;AACvC,UAAM,uBAAuB,KAAK,MAAM;AACxC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,QAAI,KAAK,kBAAkB;AAEzB,UAAI,KAAK,kBAAkB,KAAM,OAAM,IAAI,MAAM,sBAAsB;AAGvE,WAAK,wBAAwB,KAAK;AAAA,QAC/B,KAAK,MAAM,wBAAwB,KAAK,mBAAoB;AAAA,MAC/D;AACA,YAAM,aACJ,KAAK,MAAO,KAAK,MAAM,oBAAoB,KAAK,mBAAoB,GAAI,IACxE,KAAK;AACP,YAAM,gBAAgB,IAAI,WAAW,UAAU;AAC/C,oBAAc;AAAA,QACZ,KAAK,cAAc,SAAS,GAAG,KAAK,IAAI,KAAK,cAAc,QAAQ,UAAU,CAAC;AAAA,MAChF;AACA,WAAK,gBAAgB;AAGrB,UAAI,KAAK,MAAM,oBAAoB,sBAAsB;AACvD,aAAK,0BAA0B;AAAA,MACjC;AAAA,IACF;AAAA,EACF;AACF;","names":["baseVAD","baseStream"]}
package/dist/vad.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"vad.d.ts","sourceRoot":"","sources":["../src/vad.ts"],"names":[],"mappings":";AAGA,OAAO,EAGL,SAAS,IAAI,UAAU,EACvB,GAAG,IAAI,OAAO,EAGf,MAAM,iBAAiB,CAAC;AAEzB,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AACzD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,SAAS,EAAuB,MAAM,iBAAiB,CAAC;AAIjE,MAAM,WAAW,UAAU;IACzB,6DAA6D;IAC7D,iBAAiB,EAAE,MAAM,CAAC;IAC1B,6EAA6E;IAC7E,kBAAkB,EAAE,MAAM,CAAC;IAC3B,uEAAuE;IACvE,qBAAqB,EAAE,MAAM,CAAC;IAC9B,uDAAuD;IACvD,iBAAiB,EAAE,MAAM,CAAC;IAC1B,sDAAsD;IACtD,mBAAmB,EAAE,MAAM,CAAC;IAC5B,wEAAwE;IACxE,UAAU,EAAE,UAAU,CAAC;IACvB,yCAAyC;IACzC,QAAQ,EAAE,OAAO,CAAC;CACnB;AAYD,qBAAa,GAAI,SAAQ,OAAO;;IAI9B,KAAK,SAAgB;gBAET,OAAO,EAAE,gBAAgB,EAAE,IAAI,EAAE,UAAU;IAOvD;;;;;;;OAOG;IACH,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC,GAAG,IAAI;IAO9C;;;;;;;;;;;;;;;;;;;;;;;;;OAyBG;WACU,IAAI,CAAC,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM,GAAG,OAAO,CAAC,GAAG,CAAC;IAM/D,MAAM,IAAI,SAAS;CASpB;AAED,qBAAa,SAAU,SAAQ,UAAU;;gBAY3B,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,SAAS;IAwPxD;;;;;;OAMG;IACH,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;CA2BxC"}
1
+ {"version":3,"file":"vad.d.ts","sourceRoot":"","sources":["../src/vad.ts"],"names":[],"mappings":";AAGA,OAAO,EAGL,SAAS,IAAI,UAAU,EACvB,GAAG,IAAI,OAAO,EAGf,MAAM,iBAAiB,CAAC;AAEzB,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC;AACzD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,SAAS,EAAuB,MAAM,iBAAiB,CAAC;AAIjE,MAAM,WAAW,UAAU;IACzB,6DAA6D;IAC7D,iBAAiB,EAAE,MAAM,CAAC;IAC1B,6EAA6E;IAC7E,kBAAkB,EAAE,MAAM,CAAC;IAC3B,uEAAuE;IACvE,qBAAqB,EAAE,MAAM,CAAC;IAC9B,uDAAuD;IACvD,iBAAiB,EAAE,MAAM,CAAC;IAC1B,sDAAsD;IACtD,mBAAmB,EAAE,MAAM,CAAC;IAC5B,wEAAwE;IACxE,UAAU,EAAE,UAAU,CAAC;IACvB,yCAAyC;IACzC,QAAQ,EAAE,OAAO,CAAC;CACnB;AAYD,qBAAa,GAAI,SAAQ,OAAO;;IAI9B,KAAK,SAAgB;gBAET,OAAO,EAAE,gBAAgB,EAAE,IAAI,EAAE,UAAU;IAOvD;;;;;;;OAOG;IACH,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC,GAAG,IAAI;IAO9C;;;;;;;;;;;;;;;;;;;;;;;;;OAyBG;WACU,IAAI,CAAC,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM,GAAG,OAAO,CAAC,GAAG,CAAC;IAM/D,MAAM,IAAI,SAAS;CASpB;AAED,qBAAa,SAAU,SAAQ,UAAU;;gBAY3B,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,SAAS;IA6PxD;;;;;;OAMG;IACH,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;CA2BxC"}
package/dist/vad.js CHANGED
@@ -118,7 +118,11 @@ class VADStream extends baseStream {
118
118
  let inferenceFrames = [];
119
119
  let resampler = null;
120
120
  let inputCopyRemainingFrac = 0;
121
- for await (const frame of this.input) {
121
+ while (true) {
122
+ const { done, value: frame } = await this.inputReader.read();
123
+ if (done) {
124
+ break;
125
+ }
122
126
  if (typeof frame === "symbol") {
123
127
  continue;
124
128
  }
@@ -192,7 +196,7 @@ class VADStream extends baseStream {
192
196
  } else {
193
197
  pubSilenceDuration += windowDuration;
194
198
  }
195
- this.queue.put({
199
+ this.outputWriter.write({
196
200
  type: VADEventType.INFERENCE_DONE,
197
201
  samplesIndex: pubCurrentSample,
198
202
  timestamp: pubTimestamp,
@@ -241,7 +245,7 @@ class VADStream extends baseStream {
241
245
  pubSpeaking = true;
242
246
  pubSilenceDuration = 0;
243
247
  pubSpeechDuration = speechThresholdDuration;
244
- this.queue.put({
248
+ this.outputWriter.write({
245
249
  type: VADEventType.START_OF_SPEECH,
246
250
  samplesIndex: pubCurrentSample,
247
251
  timestamp: pubTimestamp,
@@ -265,7 +269,7 @@ class VADStream extends baseStream {
265
269
  pubSpeaking = false;
266
270
  pubSpeechDuration = 0;
267
271
  pubSilenceDuration = silenceThresholdDuration;
268
- this.queue.put({
272
+ this.outputWriter.write({
269
273
  type: VADEventType.END_OF_SPEECH,
270
274
  samplesIndex: pubCurrentSample,
271
275
  timestamp: pubTimestamp,
package/dist/vad.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n ExpFilter,\n VADEventType,\n VADStream as baseStream,\n VAD as baseVAD,\n log,\n mergeFrames,\n} from '@livekit/agents';\nimport { AudioFrame, AudioResampler, AudioResamplerQuality } from '@livekit/rtc-node';\nimport type { InferenceSession } from 'onnxruntime-node';\nimport type { SampleRate } from './onnx_model.js';\nimport { OnnxModel, newInferenceSession } from './onnx_model.js';\n\nconst SLOW_INFERENCE_THRESHOLD = 200; // late by 200ms\n\nexport interface VADOptions {\n /** Minimum duration of speech to start a new speech chunk */\n minSpeechDuration: number;\n /** At the end of each speech, wait this duration before ending the speech */\n minSilenceDuration: number;\n /** Duration of padding to add to the beginning of each speech chunk */\n prefixPaddingDuration: number;\n /** Maximum duration of speech to keep in the buffer */\n maxBufferedSpeech: number;\n /** Maximum duration of speech to keep in the buffer*/\n activationThreshold: number;\n /** Sample rate for the inference (only 8KHz and 16KHz are supported) */\n sampleRate: SampleRate;\n /** Force the use of CPU for inference */\n forceCPU: boolean;\n}\n\nconst defaultVADOptions: VADOptions = {\n minSpeechDuration: 50,\n minSilenceDuration: 550,\n prefixPaddingDuration: 500,\n maxBufferedSpeech: 60000, // 60 seconds\n activationThreshold: 0.5,\n sampleRate: 16000,\n forceCPU: true,\n};\n\nexport class VAD extends baseVAD {\n #session: InferenceSession;\n #opts: VADOptions;\n #streams: VADStream[];\n label = 'silero.VAD';\n\n constructor(session: InferenceSession, opts: VADOptions) {\n super({ updateInterval: 32 });\n this.#session = session;\n this.#opts = opts;\n this.#streams = [];\n }\n\n /**\n * Updates the VAD options with new values.\n *\n * @param opts - Partial options object containing the values to update\n * @remarks\n * This method will merge the provided options with existing options and update all active streams.\n * Only the properties specified in opts will be updated, other properties retain their current values.\n */\n updateOptions(opts: Partial<VADOptions>): void {\n this.#opts = { ...this.#opts, ...opts };\n for (const stream of this.#streams) {\n stream.updateOptions(this.#opts);\n }\n }\n\n /**\n * Load and initialize the Silero VAD model.\n *\n * This method loads the ONNX model and prepares it for inference. When options are not provided,\n * sane defaults are used.\n *\n * @remarks\n * This method may take time to load the model into memory.\n * It is recommended to call this method inside your prewarm mechanism.\n *\n * @example\n * ```ts\n * export default defineAgent({\n * prewarm: async (proc: JobProcess) => {\n * proc.userData.vad = await VAD.load();\n * },\n * entry: async (ctx: JobContext) => {\n * const vad = ctx.proc.userData.vad! as VAD;\n * // the rest of your agent logic\n * },\n * });\n * ```\n *\n * @param options -\n * @returns Promise\\<{@link VAD}\\>: An instance of the VAD class ready for streaming.\n */\n static async load(opts: Partial<VADOptions> = {}): Promise<VAD> {\n const mergedOpts: VADOptions = { ...defaultVADOptions, ...opts };\n const session = await newInferenceSession(mergedOpts.forceCPU);\n return new VAD(session, mergedOpts);\n }\n\n stream(): VADStream {\n const stream = new VADStream(\n this,\n this.#opts,\n new OnnxModel(this.#session, this.#opts.sampleRate),\n );\n this.#streams.push(stream);\n return stream;\n }\n}\n\nexport class VADStream extends baseStream {\n #opts: VADOptions;\n #model: OnnxModel;\n #inputSampleRate: number;\n #speechBuffer: Int16Array | null;\n #speechBufferMaxReached: boolean;\n #prefixPaddingSamples: number;\n #task: Promise<void>;\n #expFilter = new ExpFilter(0.35);\n #extraInferenceTime = 0;\n #logger = log();\n\n constructor(vad: VAD, opts: VADOptions, model: OnnxModel) {\n super(vad);\n this.#opts = opts;\n this.#model = model;\n this.#inputSampleRate = 0;\n this.#speechBuffer = null;\n this.#speechBufferMaxReached = false;\n this.#prefixPaddingSamples = 0;\n\n this.#task = new Promise(async () => {\n let inferenceData = new Float32Array(this.#model.windowSizeSamples);\n\n // a copy is exposed to the user in END_OF_SPEECH\n let speechBufferIndex = 0;\n\n // \"pub\" means public, these values are exposed to the users through events\n let pubSpeaking = false;\n let pubSpeechDuration = 0;\n let pubSilenceDuration = 0;\n let pubCurrentSample = 0;\n let pubTimestamp = 0;\n let speechThresholdDuration = 0;\n let silenceThresholdDuration = 0;\n\n let inputFrames = [];\n let inferenceFrames: AudioFrame[] = [];\n let resampler: AudioResampler | null = null;\n\n // used to avoid drift when the sampleRate ratio is not an integer\n let inputCopyRemainingFrac = 0.0;\n\n for await (const frame of this.input) {\n if (typeof frame === 'symbol') {\n continue; // ignore flush sentinel for now\n }\n\n if (!this.#inputSampleRate || !this.#speechBuffer) {\n this.#inputSampleRate = frame.sampleRate;\n this.#prefixPaddingSamples = Math.trunc(\n (this.#opts.prefixPaddingDuration * this.#inputSampleRate) / 1000,\n );\n const bufferSize =\n Math.trunc((this.#opts.maxBufferedSpeech * this.#inputSampleRate) / 1000) +\n this.#prefixPaddingSamples;\n this.#speechBuffer = new Int16Array(bufferSize);\n\n if (this.#opts.sampleRate !== this.#inputSampleRate) {\n // resampling needed: the input sample rate isn't the same as the model's\n // sample rate used for inference\n resampler = new AudioResampler(\n this.#inputSampleRate,\n this.#opts.sampleRate,\n 1,\n AudioResamplerQuality.QUICK, // VAD doesn't need high quality\n );\n }\n } else if (frame.sampleRate !== this.#inputSampleRate) {\n this.#logger.error('a frame with a different sample rate was already published');\n continue;\n }\n\n inputFrames.push(frame);\n if (resampler) {\n inferenceFrames.push(...resampler.push(frame));\n } else {\n inferenceFrames.push(frame);\n }\n\n while (true) {\n const startTime = process.hrtime.bigint();\n const availableInferenceSamples = inferenceFrames\n .map((x) => x.samplesPerChannel)\n .reduce((acc, x) => acc + x, 0);\n\n if (availableInferenceSamples < this.#model.windowSizeSamples) {\n break; // not enough samples to run inference\n }\n\n const inputFrame = mergeFrames(inputFrames);\n const inferenceFrame = mergeFrames(inferenceFrames);\n\n // convert data to f32\n inferenceData = Float32Array.from(\n inferenceFrame.data.subarray(0, this.#model.windowSizeSamples),\n (x) => x / 32767,\n );\n\n const p = await this.#model\n .run(inferenceData)\n .then((data) => this.#expFilter.apply(1, data));\n\n const windowDuration = (this.#model.windowSizeSamples / this.#opts.sampleRate) * 1000;\n pubCurrentSample += this.#model.windowSizeSamples;\n pubTimestamp += windowDuration;\n const resamplingRatio = this.#inputSampleRate / this.#model.sampleRate;\n const toCopy = this.#model.windowSizeSamples * resamplingRatio + inputCopyRemainingFrac;\n const toCopyInt = Math.trunc(toCopy);\n inputCopyRemainingFrac = toCopy - toCopyInt;\n\n // copy the inference window to the speech buffer\n const availableSpace = this.#speechBuffer.length - speechBufferIndex;\n const toCopyBuffer = Math.min(toCopyInt, availableSpace);\n if (toCopyBuffer > 0) {\n this.#speechBuffer.set(inputFrame.data.subarray(0, toCopyBuffer), speechBufferIndex);\n speechBufferIndex += toCopyBuffer;\n } else if (!this.#speechBufferMaxReached) {\n this.#speechBufferMaxReached = true;\n this.#logger.warn(\n 'maxBufferedSpeech reached, ignoring further data for the current speech input',\n );\n }\n\n const inferenceDuration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.#extraInferenceTime = Math.max(\n 0,\n this.#extraInferenceTime + inferenceDuration - windowDuration,\n );\n if (this.#extraInferenceTime > SLOW_INFERENCE_THRESHOLD) {\n this.#logger\n .child({ delay: this.#extraInferenceTime })\n .warn('inference is slower than realtime');\n }\n\n if (pubSpeaking) {\n pubSpeechDuration += windowDuration;\n } else {\n pubSilenceDuration += windowDuration;\n }\n\n this.queue.put({\n type: VADEventType.INFERENCE_DONE,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [\n new AudioFrame(\n inputFrame.data.subarray(0, toCopyInt),\n this.#inputSampleRate,\n 1,\n toCopyInt,\n ),\n ],\n speaking: pubSpeaking,\n rawAccumulatedSilence: silenceThresholdDuration,\n rawAccumulatedSpeech: speechThresholdDuration,\n });\n\n const resetWriteCursor = () => {\n if (!this.#speechBuffer) throw new Error('speechBuffer is empty');\n if (speechBufferIndex <= this.#prefixPaddingSamples) {\n return;\n }\n\n const paddingData = this.#speechBuffer.subarray(\n speechBufferIndex - this.#prefixPaddingSamples,\n speechBufferIndex,\n );\n this.#speechBuffer.set(paddingData, 0);\n speechBufferIndex = this.#prefixPaddingSamples;\n this.#speechBufferMaxReached = false;\n };\n\n const copySpeechBuffer = (): AudioFrame => {\n if (!this.#speechBuffer) throw new Error('speechBuffer is empty');\n return new AudioFrame(\n this.#speechBuffer.subarray(this.#prefixPaddingSamples, speechBufferIndex),\n this.#inputSampleRate,\n 1,\n speechBufferIndex,\n );\n };\n\n if (p > this.#opts.activationThreshold) {\n speechThresholdDuration += windowDuration;\n silenceThresholdDuration = 0;\n if (!pubSpeaking && speechThresholdDuration >= this.#opts.minSpeechDuration) {\n pubSpeaking = true;\n pubSilenceDuration = 0;\n pubSpeechDuration = speechThresholdDuration;\n\n this.queue.put({\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [copySpeechBuffer()],\n speaking: pubSpeaking,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n }\n } else {\n silenceThresholdDuration += windowDuration;\n speechThresholdDuration = 0;\n\n if (!pubSpeaking) {\n resetWriteCursor();\n }\n\n if (pubSpeaking && silenceThresholdDuration > this.#opts.minSilenceDuration) {\n pubSpeaking = false;\n pubSpeechDuration = 0;\n pubSilenceDuration = silenceThresholdDuration;\n\n this.queue.put({\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [copySpeechBuffer()],\n speaking: pubSpeaking,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n\n resetWriteCursor();\n }\n }\n\n inputFrames = [];\n inferenceFrames = [];\n\n if (inputFrame.data.length > toCopyInt) {\n const data = inputFrame.data.subarray(toCopyInt);\n inputFrames.push(\n new AudioFrame(data, this.#inputSampleRate, 1, Math.trunc(data.length / 2)),\n );\n }\n if (inferenceFrame.data.length > this.#model.windowSizeSamples) {\n const data = inferenceFrame.data.subarray(this.#model.windowSizeSamples);\n inferenceFrames.push(\n new AudioFrame(data, this.#opts.sampleRate, 1, Math.trunc(data.length / 2)),\n );\n }\n }\n }\n });\n }\n\n /**\n * Update the VAD options\n *\n * @param opts - Partial options object containing the values to update\n * @remarks\n * This method allows you to update the VAD options after the VAD object has been created\n */\n updateOptions(opts: Partial<VADOptions>) {\n const oldMaxBufferedSpeech = this.#opts.maxBufferedSpeech;\n this.#opts = { ...this.#opts, ...opts };\n\n if (this.#inputSampleRate) {\n // Assert speech buffer exists\n if (this.#speechBuffer === null) throw new Error('speechBuffer is null');\n\n // Resize speech buffer\n this.#prefixPaddingSamples = Math.trunc(\n (this.#opts.prefixPaddingDuration * this.#inputSampleRate) / 1000,\n );\n const bufferSize =\n Math.trunc((this.#opts.maxBufferedSpeech * this.#inputSampleRate) / 1000) +\n this.#prefixPaddingSamples;\n const resizedBuffer = new Int16Array(bufferSize);\n resizedBuffer.set(\n this.#speechBuffer.subarray(0, Math.min(this.#speechBuffer.length, bufferSize)),\n );\n this.#speechBuffer = resizedBuffer;\n\n // Determine if max has been reached\n if (this.#opts.maxBufferedSpeech > oldMaxBufferedSpeech) {\n this.#speechBufferMaxReached = false;\n }\n }\n }\n}\n"],"mappings":"AAGA;AAAA,EACE;AAAA,EACA;AAAA,EACA,aAAa;AAAA,EACb,OAAO;AAAA,EACP;AAAA,EACA;AAAA,OACK;AACP,SAAS,YAAY,gBAAgB,6BAA6B;AAGlE,SAAS,WAAW,2BAA2B;AAE/C,MAAM,2BAA2B;AAmBjC,MAAM,oBAAgC;AAAA,EACpC,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,uBAAuB;AAAA,EACvB,mBAAmB;AAAA;AAAA,EACnB,qBAAqB;AAAA,EACrB,YAAY;AAAA,EACZ,UAAU;AACZ;AAEO,MAAM,YAAY,QAAQ;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,SAA2B,MAAkB;AACvD,UAAM,EAAE,gBAAgB,GAAG,CAAC;AAC5B,SAAK,WAAW;AAChB,SAAK,QAAQ;AACb,SAAK,WAAW,CAAC;AAAA,EACnB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,cAAc,MAAiC;AAC7C,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,eAAW,UAAU,KAAK,UAAU;AAClC,aAAO,cAAc,KAAK,KAAK;AAAA,IACjC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EA4BA,aAAa,KAAK,OAA4B,CAAC,GAAiB;AAC9D,UAAM,aAAyB,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC/D,UAAM,UAAU,MAAM,oBAAoB,WAAW,QAAQ;AAC7D,WAAO,IAAI,IAAI,SAAS,UAAU;AAAA,EACpC;AAAA,EAEA,SAAoB;AAClB,UAAM,SAAS,IAAI;AAAA,MACjB;AAAA,MACA,KAAK;AAAA,MACL,IAAI,UAAU,KAAK,UAAU,KAAK,MAAM,UAAU;AAAA,IACpD;AACA,SAAK,SAAS,KAAK,MAAM;AACzB,WAAO;AAAA,EACT;AACF;AAEO,MAAM,kBAAkB,WAAW;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,aAAa,IAAI,UAAU,IAAI;AAAA,EAC/B,sBAAsB;AAAA,EACtB,UAAU,IAAI;AAAA,EAEd,YAAY,KAAU,MAAkB,OAAkB;AACxD,UAAM,GAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,mBAAmB;AACxB,SAAK,gBAAgB;AACrB,SAAK,0BAA0B;AAC/B,SAAK,wBAAwB;AAE7B,SAAK,QAAQ,IAAI,QAAQ,YAAY;AACnC,UAAI,gBAAgB,IAAI,aAAa,KAAK,OAAO,iBAAiB;AAGlE,UAAI,oBAAoB;AAGxB,UAAI,cAAc;AAClB,UAAI,oBAAoB;AACxB,UAAI,qBAAqB;AACzB,UAAI,mBAAmB;AACvB,UAAI,eAAe;AACnB,UAAI,0BAA0B;AAC9B,UAAI,2BAA2B;AAE/B,UAAI,cAAc,CAAC;AACnB,UAAI,kBAAgC,CAAC;AACrC,UAAI,YAAmC;AAGvC,UAAI,yBAAyB;AAE7B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,OAAO,UAAU,UAAU;AAC7B;AAAA,QACF;AAEA,YAAI,CAAC,KAAK,oBAAoB,CAAC,KAAK,eAAe;AACjD,eAAK,mBAAmB,MAAM;AAC9B,eAAK,wBAAwB,KAAK;AAAA,YAC/B,KAAK,MAAM,wBAAwB,KAAK,mBAAoB;AAAA,UAC/D;AACA,gBAAM,aACJ,KAAK,MAAO,KAAK,MAAM,oBAAoB,KAAK,mBAAoB,GAAI,IACxE,KAAK;AACP,eAAK,gBAAgB,IAAI,WAAW,UAAU;AAE9C,cAAI,KAAK,MAAM,eAAe,KAAK,kBAAkB;AAGnD,wBAAY,IAAI;AAAA,cACd,KAAK;AAAA,cACL,KAAK,MAAM;AAAA,cACX;AAAA,cACA,sBAAsB;AAAA;AAAA,YACxB;AAAA,UACF;AAAA,QACF,WAAW,MAAM,eAAe,KAAK,kBAAkB;AACrD,eAAK,QAAQ,MAAM,4DAA4D;AAC/E;AAAA,QACF;AAEA,oBAAY,KAAK,KAAK;AACtB,YAAI,WAAW;AACb,0BAAgB,KAAK,GAAG,UAAU,KAAK,KAAK,CAAC;AAAA,QAC/C,OAAO;AACL,0BAAgB,KAAK,KAAK;AAAA,QAC5B;AAEA,eAAO,MAAM;AACX,gBAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,gBAAM,4BAA4B,gBAC/B,IAAI,CAAC,MAAM,EAAE,iBAAiB,EAC9B,OAAO,CAAC,KAAK,MAAM,MAAM,GAAG,CAAC;AAEhC,cAAI,4BAA4B,KAAK,OAAO,mBAAmB;AAC7D;AAAA,UACF;AAEA,gBAAM,aAAa,YAAY,WAAW;AAC1C,gBAAM,iBAAiB,YAAY,eAAe;AAGlD,0BAAgB,aAAa;AAAA,YAC3B,eAAe,KAAK,SAAS,GAAG,KAAK,OAAO,iBAAiB;AAAA,YAC7D,CAAC,MAAM,IAAI;AAAA,UACb;AAEA,gBAAM,IAAI,MAAM,KAAK,OAClB,IAAI,aAAa,EACjB,KAAK,CAAC,SAAS,KAAK,WAAW,MAAM,GAAG,IAAI,CAAC;AAEhD,gBAAM,iBAAkB,KAAK,OAAO,oBAAoB,KAAK,MAAM,aAAc;AACjF,8BAAoB,KAAK,OAAO;AAChC,0BAAgB;AAChB,gBAAM,kBAAkB,KAAK,mBAAmB,KAAK,OAAO;AAC5D,gBAAM,SAAS,KAAK,OAAO,oBAAoB,kBAAkB;AACjE,gBAAM,YAAY,KAAK,MAAM,MAAM;AACnC,mCAAyB,SAAS;AAGlC,gBAAM,iBAAiB,KAAK,cAAc,SAAS;AACnD,gBAAM,eAAe,KAAK,IAAI,WAAW,cAAc;AACvD,cAAI,eAAe,GAAG;AACpB,iBAAK,cAAc,IAAI,WAAW,KAAK,SAAS,GAAG,YAAY,GAAG,iBAAiB;AACnF,iCAAqB;AAAA,UACvB,WAAW,CAAC,KAAK,yBAAyB;AACxC,iBAAK,0BAA0B;AAC/B,iBAAK,QAAQ;AAAA,cACX;AAAA,YACF;AAAA,UACF;AAEA,gBAAM,oBAAoB,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACxF,eAAK,sBAAsB,KAAK;AAAA,YAC9B;AAAA,YACA,KAAK,sBAAsB,oBAAoB;AAAA,UACjD;AACA,cAAI,KAAK,sBAAsB,0BAA0B;AACvD,iBAAK,QACF,MAAM,EAAE,OAAO,KAAK,oBAAoB,CAAC,EACzC,KAAK,mCAAmC;AAAA,UAC7C;AAEA,cAAI,aAAa;AACf,iCAAqB;AAAA,UACvB,OAAO;AACL,kCAAsB;AAAA,UACxB;AAEA,eAAK,MAAM,IAAI;AAAA,YACb,MAAM,aAAa;AAAA,YACnB,cAAc;AAAA,YACd,WAAW;AAAA,YACX,iBAAiB;AAAA,YACjB,gBAAgB;AAAA,YAChB,aAAa;AAAA,YACb;AAAA,YACA,QAAQ;AAAA,cACN,IAAI;AAAA,gBACF,WAAW,KAAK,SAAS,GAAG,SAAS;AAAA,gBACrC,KAAK;AAAA,gBACL;AAAA,gBACA;AAAA,cACF;AAAA,YACF;AAAA,YACA,UAAU;AAAA,YACV,uBAAuB;AAAA,YACvB,sBAAsB;AAAA,UACxB,CAAC;AAED,gBAAM,mBAAmB,MAAM;AAC7B,gBAAI,CAAC,KAAK,cAAe,OAAM,IAAI,MAAM,uBAAuB;AAChE,gBAAI,qBAAqB,KAAK,uBAAuB;AACnD;AAAA,YACF;AAEA,kBAAM,cAAc,KAAK,cAAc;AAAA,cACrC,oBAAoB,KAAK;AAAA,cACzB;AAAA,YACF;AACA,iBAAK,cAAc,IAAI,aAAa,CAAC;AACrC,gCAAoB,KAAK;AACzB,iBAAK,0BAA0B;AAAA,UACjC;AAEA,gBAAM,mBAAmB,MAAkB;AACzC,gBAAI,CAAC,KAAK,cAAe,OAAM,IAAI,MAAM,uBAAuB;AAChE,mBAAO,IAAI;AAAA,cACT,KAAK,cAAc,SAAS,KAAK,uBAAuB,iBAAiB;AAAA,cACzE,KAAK;AAAA,cACL;AAAA,cACA;AAAA,YACF;AAAA,UACF;AAEA,cAAI,IAAI,KAAK,MAAM,qBAAqB;AACtC,uCAA2B;AAC3B,uCAA2B;AAC3B,gBAAI,CAAC,eAAe,2BAA2B,KAAK,MAAM,mBAAmB;AAC3E,4BAAc;AACd,mCAAqB;AACrB,kCAAoB;AAEpB,mBAAK,MAAM,IAAI;AAAA,gBACb,MAAM,aAAa;AAAA,gBACnB,cAAc;AAAA,gBACd,WAAW;AAAA,gBACX,iBAAiB;AAAA,gBACjB,gBAAgB;AAAA,gBAChB,aAAa;AAAA,gBACb;AAAA,gBACA,QAAQ,CAAC,iBAAiB,CAAC;AAAA,gBAC3B,UAAU;AAAA,gBACV,uBAAuB;AAAA,gBACvB,sBAAsB;AAAA,cACxB,CAAC;AAAA,YACH;AAAA,UACF,OAAO;AACL,wCAA4B;AAC5B,sCAA0B;AAE1B,gBAAI,CAAC,aAAa;AAChB,+BAAiB;AAAA,YACnB;AAEA,gBAAI,eAAe,2BAA2B,KAAK,MAAM,oBAAoB;AAC3E,4BAAc;AACd,kCAAoB;AACpB,mCAAqB;AAErB,mBAAK,MAAM,IAAI;AAAA,gBACb,MAAM,aAAa;AAAA,gBACnB,cAAc;AAAA,gBACd,WAAW;AAAA,gBACX,iBAAiB;AAAA,gBACjB,gBAAgB;AAAA,gBAChB,aAAa;AAAA,gBACb;AAAA,gBACA,QAAQ,CAAC,iBAAiB,CAAC;AAAA,gBAC3B,UAAU;AAAA,gBACV,uBAAuB;AAAA,gBACvB,sBAAsB;AAAA,cACxB,CAAC;AAED,+BAAiB;AAAA,YACnB;AAAA,UACF;AAEA,wBAAc,CAAC;AACf,4BAAkB,CAAC;AAEnB,cAAI,WAAW,KAAK,SAAS,WAAW;AACtC,kBAAM,OAAO,WAAW,KAAK,SAAS,SAAS;AAC/C,wBAAY;AAAA,cACV,IAAI,WAAW,MAAM,KAAK,kBAAkB,GAAG,KAAK,MAAM,KAAK,SAAS,CAAC,CAAC;AAAA,YAC5E;AAAA,UACF;AACA,cAAI,eAAe,KAAK,SAAS,KAAK,OAAO,mBAAmB;AAC9D,kBAAM,OAAO,eAAe,KAAK,SAAS,KAAK,OAAO,iBAAiB;AACvE,4BAAgB;AAAA,cACd,IAAI,WAAW,MAAM,KAAK,MAAM,YAAY,GAAG,KAAK,MAAM,KAAK,SAAS,CAAC,CAAC;AAAA,YAC5E;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,cAAc,MAA2B;AACvC,UAAM,uBAAuB,KAAK,MAAM;AACxC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,QAAI,KAAK,kBAAkB;AAEzB,UAAI,KAAK,kBAAkB,KAAM,OAAM,IAAI,MAAM,sBAAsB;AAGvE,WAAK,wBAAwB,KAAK;AAAA,QAC/B,KAAK,MAAM,wBAAwB,KAAK,mBAAoB;AAAA,MAC/D;AACA,YAAM,aACJ,KAAK,MAAO,KAAK,MAAM,oBAAoB,KAAK,mBAAoB,GAAI,IACxE,KAAK;AACP,YAAM,gBAAgB,IAAI,WAAW,UAAU;AAC/C,oBAAc;AAAA,QACZ,KAAK,cAAc,SAAS,GAAG,KAAK,IAAI,KAAK,cAAc,QAAQ,UAAU,CAAC;AAAA,MAChF;AACA,WAAK,gBAAgB;AAGrB,UAAI,KAAK,MAAM,oBAAoB,sBAAsB;AACvD,aAAK,0BAA0B;AAAA,MACjC;AAAA,IACF;AAAA,EACF;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n ExpFilter,\n VADEventType,\n VADStream as baseStream,\n VAD as baseVAD,\n log,\n mergeFrames,\n} from '@livekit/agents';\nimport { AudioFrame, AudioResampler, AudioResamplerQuality } from '@livekit/rtc-node';\nimport type { InferenceSession } from 'onnxruntime-node';\nimport type { SampleRate } from './onnx_model.js';\nimport { OnnxModel, newInferenceSession } from './onnx_model.js';\n\nconst SLOW_INFERENCE_THRESHOLD = 200; // late by 200ms\n\nexport interface VADOptions {\n /** Minimum duration of speech to start a new speech chunk */\n minSpeechDuration: number;\n /** At the end of each speech, wait this duration before ending the speech */\n minSilenceDuration: number;\n /** Duration of padding to add to the beginning of each speech chunk */\n prefixPaddingDuration: number;\n /** Maximum duration of speech to keep in the buffer */\n maxBufferedSpeech: number;\n /** Maximum duration of speech to keep in the buffer*/\n activationThreshold: number;\n /** Sample rate for the inference (only 8KHz and 16KHz are supported) */\n sampleRate: SampleRate;\n /** Force the use of CPU for inference */\n forceCPU: boolean;\n}\n\nconst defaultVADOptions: VADOptions = {\n minSpeechDuration: 50,\n minSilenceDuration: 550,\n prefixPaddingDuration: 500,\n maxBufferedSpeech: 60000, // 60 seconds\n activationThreshold: 0.5,\n sampleRate: 16000,\n forceCPU: true,\n};\n\nexport class VAD extends baseVAD {\n #session: InferenceSession;\n #opts: VADOptions;\n #streams: VADStream[];\n label = 'silero.VAD';\n\n constructor(session: InferenceSession, opts: VADOptions) {\n super({ updateInterval: 32 });\n this.#session = session;\n this.#opts = opts;\n this.#streams = [];\n }\n\n /**\n * Updates the VAD options with new values.\n *\n * @param opts - Partial options object containing the values to update\n * @remarks\n * This method will merge the provided options with existing options and update all active streams.\n * Only the properties specified in opts will be updated, other properties retain their current values.\n */\n updateOptions(opts: Partial<VADOptions>): void {\n this.#opts = { ...this.#opts, ...opts };\n for (const stream of this.#streams) {\n stream.updateOptions(this.#opts);\n }\n }\n\n /**\n * Load and initialize the Silero VAD model.\n *\n * This method loads the ONNX model and prepares it for inference. When options are not provided,\n * sane defaults are used.\n *\n * @remarks\n * This method may take time to load the model into memory.\n * It is recommended to call this method inside your prewarm mechanism.\n *\n * @example\n * ```ts\n * export default defineAgent({\n * prewarm: async (proc: JobProcess) => {\n * proc.userData.vad = await VAD.load();\n * },\n * entry: async (ctx: JobContext) => {\n * const vad = ctx.proc.userData.vad! as VAD;\n * // the rest of your agent logic\n * },\n * });\n * ```\n *\n * @param options -\n * @returns Promise\\<{@link VAD}\\>: An instance of the VAD class ready for streaming.\n */\n static async load(opts: Partial<VADOptions> = {}): Promise<VAD> {\n const mergedOpts: VADOptions = { ...defaultVADOptions, ...opts };\n const session = await newInferenceSession(mergedOpts.forceCPU);\n return new VAD(session, mergedOpts);\n }\n\n stream(): VADStream {\n const stream = new VADStream(\n this,\n this.#opts,\n new OnnxModel(this.#session, this.#opts.sampleRate),\n );\n this.#streams.push(stream);\n return stream;\n }\n}\n\nexport class VADStream extends baseStream {\n #opts: VADOptions;\n #model: OnnxModel;\n #inputSampleRate: number;\n #speechBuffer: Int16Array | null;\n #speechBufferMaxReached: boolean;\n #prefixPaddingSamples: number;\n #task: Promise<void>;\n #expFilter = new ExpFilter(0.35);\n #extraInferenceTime = 0;\n #logger = log();\n\n constructor(vad: VAD, opts: VADOptions, model: OnnxModel) {\n super(vad);\n this.#opts = opts;\n this.#model = model;\n this.#inputSampleRate = 0;\n this.#speechBuffer = null;\n this.#speechBufferMaxReached = false;\n this.#prefixPaddingSamples = 0;\n\n this.#task = new Promise(async () => {\n let inferenceData = new Float32Array(this.#model.windowSizeSamples);\n\n // a copy is exposed to the user in END_OF_SPEECH\n let speechBufferIndex = 0;\n\n // \"pub\" means public, these values are exposed to the users through events\n let pubSpeaking = false;\n let pubSpeechDuration = 0;\n let pubSilenceDuration = 0;\n let pubCurrentSample = 0;\n let pubTimestamp = 0;\n let speechThresholdDuration = 0;\n let silenceThresholdDuration = 0;\n\n let inputFrames = [];\n let inferenceFrames: AudioFrame[] = [];\n let resampler: AudioResampler | null = null;\n\n // used to avoid drift when the sampleRate ratio is not an integer\n let inputCopyRemainingFrac = 0.0;\n\n while (true) {\n const { done, value: frame } = await this.inputReader.read();\n if (done) {\n break;\n }\n\n if (typeof frame === 'symbol') {\n continue; // ignore flush sentinel for now\n }\n\n if (!this.#inputSampleRate || !this.#speechBuffer) {\n this.#inputSampleRate = frame.sampleRate;\n this.#prefixPaddingSamples = Math.trunc(\n (this.#opts.prefixPaddingDuration * this.#inputSampleRate) / 1000,\n );\n const bufferSize =\n Math.trunc((this.#opts.maxBufferedSpeech * this.#inputSampleRate) / 1000) +\n this.#prefixPaddingSamples;\n this.#speechBuffer = new Int16Array(bufferSize);\n\n if (this.#opts.sampleRate !== this.#inputSampleRate) {\n // resampling needed: the input sample rate isn't the same as the model's\n // sample rate used for inference\n resampler = new AudioResampler(\n this.#inputSampleRate,\n this.#opts.sampleRate,\n 1,\n AudioResamplerQuality.QUICK, // VAD doesn't need high quality\n );\n }\n } else if (frame.sampleRate !== this.#inputSampleRate) {\n this.#logger.error('a frame with a different sample rate was already published');\n continue;\n }\n\n inputFrames.push(frame);\n if (resampler) {\n inferenceFrames.push(...resampler.push(frame));\n } else {\n inferenceFrames.push(frame);\n }\n\n while (true) {\n const startTime = process.hrtime.bigint();\n const availableInferenceSamples = inferenceFrames\n .map((x) => x.samplesPerChannel)\n .reduce((acc, x) => acc + x, 0);\n\n if (availableInferenceSamples < this.#model.windowSizeSamples) {\n break; // not enough samples to run inference\n }\n\n const inputFrame = mergeFrames(inputFrames);\n const inferenceFrame = mergeFrames(inferenceFrames);\n\n // convert data to f32\n inferenceData = Float32Array.from(\n inferenceFrame.data.subarray(0, this.#model.windowSizeSamples),\n (x) => x / 32767,\n );\n\n const p = await this.#model\n .run(inferenceData)\n .then((data) => this.#expFilter.apply(1, data));\n\n const windowDuration = (this.#model.windowSizeSamples / this.#opts.sampleRate) * 1000;\n pubCurrentSample += this.#model.windowSizeSamples;\n pubTimestamp += windowDuration;\n const resamplingRatio = this.#inputSampleRate / this.#model.sampleRate;\n const toCopy = this.#model.windowSizeSamples * resamplingRatio + inputCopyRemainingFrac;\n const toCopyInt = Math.trunc(toCopy);\n inputCopyRemainingFrac = toCopy - toCopyInt;\n\n // copy the inference window to the speech buffer\n const availableSpace = this.#speechBuffer.length - speechBufferIndex;\n const toCopyBuffer = Math.min(toCopyInt, availableSpace);\n if (toCopyBuffer > 0) {\n this.#speechBuffer.set(inputFrame.data.subarray(0, toCopyBuffer), speechBufferIndex);\n speechBufferIndex += toCopyBuffer;\n } else if (!this.#speechBufferMaxReached) {\n this.#speechBufferMaxReached = true;\n this.#logger.warn(\n 'maxBufferedSpeech reached, ignoring further data for the current speech input',\n );\n }\n\n const inferenceDuration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.#extraInferenceTime = Math.max(\n 0,\n this.#extraInferenceTime + inferenceDuration - windowDuration,\n );\n if (this.#extraInferenceTime > SLOW_INFERENCE_THRESHOLD) {\n this.#logger\n .child({ delay: this.#extraInferenceTime })\n .warn('inference is slower than realtime');\n }\n\n if (pubSpeaking) {\n pubSpeechDuration += windowDuration;\n } else {\n pubSilenceDuration += windowDuration;\n }\n\n this.outputWriter.write({\n type: VADEventType.INFERENCE_DONE,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [\n new AudioFrame(\n inputFrame.data.subarray(0, toCopyInt),\n this.#inputSampleRate,\n 1,\n toCopyInt,\n ),\n ],\n speaking: pubSpeaking,\n rawAccumulatedSilence: silenceThresholdDuration,\n rawAccumulatedSpeech: speechThresholdDuration,\n });\n\n const resetWriteCursor = () => {\n if (!this.#speechBuffer) throw new Error('speechBuffer is empty');\n if (speechBufferIndex <= this.#prefixPaddingSamples) {\n return;\n }\n\n const paddingData = this.#speechBuffer.subarray(\n speechBufferIndex - this.#prefixPaddingSamples,\n speechBufferIndex,\n );\n this.#speechBuffer.set(paddingData, 0);\n speechBufferIndex = this.#prefixPaddingSamples;\n this.#speechBufferMaxReached = false;\n };\n\n const copySpeechBuffer = (): AudioFrame => {\n if (!this.#speechBuffer) throw new Error('speechBuffer is empty');\n return new AudioFrame(\n this.#speechBuffer.subarray(this.#prefixPaddingSamples, speechBufferIndex),\n this.#inputSampleRate,\n 1,\n speechBufferIndex,\n );\n };\n\n if (p > this.#opts.activationThreshold) {\n speechThresholdDuration += windowDuration;\n silenceThresholdDuration = 0;\n if (!pubSpeaking && speechThresholdDuration >= this.#opts.minSpeechDuration) {\n pubSpeaking = true;\n pubSilenceDuration = 0;\n pubSpeechDuration = speechThresholdDuration;\n\n this.outputWriter.write({\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [copySpeechBuffer()],\n speaking: pubSpeaking,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n }\n } else {\n silenceThresholdDuration += windowDuration;\n speechThresholdDuration = 0;\n\n if (!pubSpeaking) {\n resetWriteCursor();\n }\n\n if (pubSpeaking && silenceThresholdDuration > this.#opts.minSilenceDuration) {\n pubSpeaking = false;\n pubSpeechDuration = 0;\n pubSilenceDuration = silenceThresholdDuration;\n\n this.outputWriter.write({\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: pubCurrentSample,\n timestamp: pubTimestamp,\n silenceDuration: pubSilenceDuration,\n speechDuration: pubSpeechDuration,\n probability: p,\n inferenceDuration,\n frames: [copySpeechBuffer()],\n speaking: pubSpeaking,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n });\n\n resetWriteCursor();\n }\n }\n\n inputFrames = [];\n inferenceFrames = [];\n\n if (inputFrame.data.length > toCopyInt) {\n const data = inputFrame.data.subarray(toCopyInt);\n inputFrames.push(\n new AudioFrame(data, this.#inputSampleRate, 1, Math.trunc(data.length / 2)),\n );\n }\n if (inferenceFrame.data.length > this.#model.windowSizeSamples) {\n const data = inferenceFrame.data.subarray(this.#model.windowSizeSamples);\n inferenceFrames.push(\n new AudioFrame(data, this.#opts.sampleRate, 1, Math.trunc(data.length / 2)),\n );\n }\n }\n }\n });\n }\n\n /**\n * Update the VAD options\n *\n * @param opts - Partial options object containing the values to update\n * @remarks\n * This method allows you to update the VAD options after the VAD object has been created\n */\n updateOptions(opts: Partial<VADOptions>) {\n const oldMaxBufferedSpeech = this.#opts.maxBufferedSpeech;\n this.#opts = { ...this.#opts, ...opts };\n\n if (this.#inputSampleRate) {\n // Assert speech buffer exists\n if (this.#speechBuffer === null) throw new Error('speechBuffer is null');\n\n // Resize speech buffer\n this.#prefixPaddingSamples = Math.trunc(\n (this.#opts.prefixPaddingDuration * this.#inputSampleRate) / 1000,\n );\n const bufferSize =\n Math.trunc((this.#opts.maxBufferedSpeech * this.#inputSampleRate) / 1000) +\n this.#prefixPaddingSamples;\n const resizedBuffer = new Int16Array(bufferSize);\n resizedBuffer.set(\n this.#speechBuffer.subarray(0, Math.min(this.#speechBuffer.length, bufferSize)),\n );\n this.#speechBuffer = resizedBuffer;\n\n // Determine if max has been reached\n if (this.#opts.maxBufferedSpeech > oldMaxBufferedSpeech) {\n this.#speechBufferMaxReached = false;\n }\n }\n }\n}\n"],"mappings":"AAGA;AAAA,EACE;AAAA,EACA;AAAA,EACA,aAAa;AAAA,EACb,OAAO;AAAA,EACP;AAAA,EACA;AAAA,OACK;AACP,SAAS,YAAY,gBAAgB,6BAA6B;AAGlE,SAAS,WAAW,2BAA2B;AAE/C,MAAM,2BAA2B;AAmBjC,MAAM,oBAAgC;AAAA,EACpC,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,uBAAuB;AAAA,EACvB,mBAAmB;AAAA;AAAA,EACnB,qBAAqB;AAAA,EACrB,YAAY;AAAA,EACZ,UAAU;AACZ;AAEO,MAAM,YAAY,QAAQ;AAAA,EAC/B;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ;AAAA,EAER,YAAY,SAA2B,MAAkB;AACvD,UAAM,EAAE,gBAAgB,GAAG,CAAC;AAC5B,SAAK,WAAW;AAChB,SAAK,QAAQ;AACb,SAAK,WAAW,CAAC;AAAA,EACnB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,cAAc,MAAiC;AAC7C,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,eAAW,UAAU,KAAK,UAAU;AAClC,aAAO,cAAc,KAAK,KAAK;AAAA,IACjC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EA4BA,aAAa,KAAK,OAA4B,CAAC,GAAiB;AAC9D,UAAM,aAAyB,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAC/D,UAAM,UAAU,MAAM,oBAAoB,WAAW,QAAQ;AAC7D,WAAO,IAAI,IAAI,SAAS,UAAU;AAAA,EACpC;AAAA,EAEA,SAAoB;AAClB,UAAM,SAAS,IAAI;AAAA,MACjB;AAAA,MACA,KAAK;AAAA,MACL,IAAI,UAAU,KAAK,UAAU,KAAK,MAAM,UAAU;AAAA,IACpD;AACA,SAAK,SAAS,KAAK,MAAM;AACzB,WAAO;AAAA,EACT;AACF;AAEO,MAAM,kBAAkB,WAAW;AAAA,EACxC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,aAAa,IAAI,UAAU,IAAI;AAAA,EAC/B,sBAAsB;AAAA,EACtB,UAAU,IAAI;AAAA,EAEd,YAAY,KAAU,MAAkB,OAAkB;AACxD,UAAM,GAAG;AACT,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,mBAAmB;AACxB,SAAK,gBAAgB;AACrB,SAAK,0BAA0B;AAC/B,SAAK,wBAAwB;AAE7B,SAAK,QAAQ,IAAI,QAAQ,YAAY;AACnC,UAAI,gBAAgB,IAAI,aAAa,KAAK,OAAO,iBAAiB;AAGlE,UAAI,oBAAoB;AAGxB,UAAI,cAAc;AAClB,UAAI,oBAAoB;AACxB,UAAI,qBAAqB;AACzB,UAAI,mBAAmB;AACvB,UAAI,eAAe;AACnB,UAAI,0BAA0B;AAC9B,UAAI,2BAA2B;AAE/B,UAAI,cAAc,CAAC;AACnB,UAAI,kBAAgC,CAAC;AACrC,UAAI,YAAmC;AAGvC,UAAI,yBAAyB;AAE7B,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,OAAO,MAAM,IAAI,MAAM,KAAK,YAAY,KAAK;AAC3D,YAAI,MAAM;AACR;AAAA,QACF;AAEA,YAAI,OAAO,UAAU,UAAU;AAC7B;AAAA,QACF;AAEA,YAAI,CAAC,KAAK,oBAAoB,CAAC,KAAK,eAAe;AACjD,eAAK,mBAAmB,MAAM;AAC9B,eAAK,wBAAwB,KAAK;AAAA,YAC/B,KAAK,MAAM,wBAAwB,KAAK,mBAAoB;AAAA,UAC/D;AACA,gBAAM,aACJ,KAAK,MAAO,KAAK,MAAM,oBAAoB,KAAK,mBAAoB,GAAI,IACxE,KAAK;AACP,eAAK,gBAAgB,IAAI,WAAW,UAAU;AAE9C,cAAI,KAAK,MAAM,eAAe,KAAK,kBAAkB;AAGnD,wBAAY,IAAI;AAAA,cACd,KAAK;AAAA,cACL,KAAK,MAAM;AAAA,cACX;AAAA,cACA,sBAAsB;AAAA;AAAA,YACxB;AAAA,UACF;AAAA,QACF,WAAW,MAAM,eAAe,KAAK,kBAAkB;AACrD,eAAK,QAAQ,MAAM,4DAA4D;AAC/E;AAAA,QACF;AAEA,oBAAY,KAAK,KAAK;AACtB,YAAI,WAAW;AACb,0BAAgB,KAAK,GAAG,UAAU,KAAK,KAAK,CAAC;AAAA,QAC/C,OAAO;AACL,0BAAgB,KAAK,KAAK;AAAA,QAC5B;AAEA,eAAO,MAAM;AACX,gBAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,gBAAM,4BAA4B,gBAC/B,IAAI,CAAC,MAAM,EAAE,iBAAiB,EAC9B,OAAO,CAAC,KAAK,MAAM,MAAM,GAAG,CAAC;AAEhC,cAAI,4BAA4B,KAAK,OAAO,mBAAmB;AAC7D;AAAA,UACF;AAEA,gBAAM,aAAa,YAAY,WAAW;AAC1C,gBAAM,iBAAiB,YAAY,eAAe;AAGlD,0BAAgB,aAAa;AAAA,YAC3B,eAAe,KAAK,SAAS,GAAG,KAAK,OAAO,iBAAiB;AAAA,YAC7D,CAAC,MAAM,IAAI;AAAA,UACb;AAEA,gBAAM,IAAI,MAAM,KAAK,OAClB,IAAI,aAAa,EACjB,KAAK,CAAC,SAAS,KAAK,WAAW,MAAM,GAAG,IAAI,CAAC;AAEhD,gBAAM,iBAAkB,KAAK,OAAO,oBAAoB,KAAK,MAAM,aAAc;AACjF,8BAAoB,KAAK,OAAO;AAChC,0BAAgB;AAChB,gBAAM,kBAAkB,KAAK,mBAAmB,KAAK,OAAO;AAC5D,gBAAM,SAAS,KAAK,OAAO,oBAAoB,kBAAkB;AACjE,gBAAM,YAAY,KAAK,MAAM,MAAM;AACnC,mCAAyB,SAAS;AAGlC,gBAAM,iBAAiB,KAAK,cAAc,SAAS;AACnD,gBAAM,eAAe,KAAK,IAAI,WAAW,cAAc;AACvD,cAAI,eAAe,GAAG;AACpB,iBAAK,cAAc,IAAI,WAAW,KAAK,SAAS,GAAG,YAAY,GAAG,iBAAiB;AACnF,iCAAqB;AAAA,UACvB,WAAW,CAAC,KAAK,yBAAyB;AACxC,iBAAK,0BAA0B;AAC/B,iBAAK,QAAQ;AAAA,cACX;AAAA,YACF;AAAA,UACF;AAEA,gBAAM,oBAAoB,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACxF,eAAK,sBAAsB,KAAK;AAAA,YAC9B;AAAA,YACA,KAAK,sBAAsB,oBAAoB;AAAA,UACjD;AACA,cAAI,KAAK,sBAAsB,0BAA0B;AACvD,iBAAK,QACF,MAAM,EAAE,OAAO,KAAK,oBAAoB,CAAC,EACzC,KAAK,mCAAmC;AAAA,UAC7C;AAEA,cAAI,aAAa;AACf,iCAAqB;AAAA,UACvB,OAAO;AACL,kCAAsB;AAAA,UACxB;AAEA,eAAK,aAAa,MAAM;AAAA,YACtB,MAAM,aAAa;AAAA,YACnB,cAAc;AAAA,YACd,WAAW;AAAA,YACX,iBAAiB;AAAA,YACjB,gBAAgB;AAAA,YAChB,aAAa;AAAA,YACb;AAAA,YACA,QAAQ;AAAA,cACN,IAAI;AAAA,gBACF,WAAW,KAAK,SAAS,GAAG,SAAS;AAAA,gBACrC,KAAK;AAAA,gBACL;AAAA,gBACA;AAAA,cACF;AAAA,YACF;AAAA,YACA,UAAU;AAAA,YACV,uBAAuB;AAAA,YACvB,sBAAsB;AAAA,UACxB,CAAC;AAED,gBAAM,mBAAmB,MAAM;AAC7B,gBAAI,CAAC,KAAK,cAAe,OAAM,IAAI,MAAM,uBAAuB;AAChE,gBAAI,qBAAqB,KAAK,uBAAuB;AACnD;AAAA,YACF;AAEA,kBAAM,cAAc,KAAK,cAAc;AAAA,cACrC,oBAAoB,KAAK;AAAA,cACzB;AAAA,YACF;AACA,iBAAK,cAAc,IAAI,aAAa,CAAC;AACrC,gCAAoB,KAAK;AACzB,iBAAK,0BAA0B;AAAA,UACjC;AAEA,gBAAM,mBAAmB,MAAkB;AACzC,gBAAI,CAAC,KAAK,cAAe,OAAM,IAAI,MAAM,uBAAuB;AAChE,mBAAO,IAAI;AAAA,cACT,KAAK,cAAc,SAAS,KAAK,uBAAuB,iBAAiB;AAAA,cACzE,KAAK;AAAA,cACL;AAAA,cACA;AAAA,YACF;AAAA,UACF;AAEA,cAAI,IAAI,KAAK,MAAM,qBAAqB;AACtC,uCAA2B;AAC3B,uCAA2B;AAC3B,gBAAI,CAAC,eAAe,2BAA2B,KAAK,MAAM,mBAAmB;AAC3E,4BAAc;AACd,mCAAqB;AACrB,kCAAoB;AAEpB,mBAAK,aAAa,MAAM;AAAA,gBACtB,MAAM,aAAa;AAAA,gBACnB,cAAc;AAAA,gBACd,WAAW;AAAA,gBACX,iBAAiB;AAAA,gBACjB,gBAAgB;AAAA,gBAChB,aAAa;AAAA,gBACb;AAAA,gBACA,QAAQ,CAAC,iBAAiB,CAAC;AAAA,gBAC3B,UAAU;AAAA,gBACV,uBAAuB;AAAA,gBACvB,sBAAsB;AAAA,cACxB,CAAC;AAAA,YACH;AAAA,UACF,OAAO;AACL,wCAA4B;AAC5B,sCAA0B;AAE1B,gBAAI,CAAC,aAAa;AAChB,+BAAiB;AAAA,YACnB;AAEA,gBAAI,eAAe,2BAA2B,KAAK,MAAM,oBAAoB;AAC3E,4BAAc;AACd,kCAAoB;AACpB,mCAAqB;AAErB,mBAAK,aAAa,MAAM;AAAA,gBACtB,MAAM,aAAa;AAAA,gBACnB,cAAc;AAAA,gBACd,WAAW;AAAA,gBACX,iBAAiB;AAAA,gBACjB,gBAAgB;AAAA,gBAChB,aAAa;AAAA,gBACb;AAAA,gBACA,QAAQ,CAAC,iBAAiB,CAAC;AAAA,gBAC3B,UAAU;AAAA,gBACV,uBAAuB;AAAA,gBACvB,sBAAsB;AAAA,cACxB,CAAC;AAED,+BAAiB;AAAA,YACnB;AAAA,UACF;AAEA,wBAAc,CAAC;AACf,4BAAkB,CAAC;AAEnB,cAAI,WAAW,KAAK,SAAS,WAAW;AACtC,kBAAM,OAAO,WAAW,KAAK,SAAS,SAAS;AAC/C,wBAAY;AAAA,cACV,IAAI,WAAW,MAAM,KAAK,kBAAkB,GAAG,KAAK,MAAM,KAAK,SAAS,CAAC,CAAC;AAAA,YAC5E;AAAA,UACF;AACA,cAAI,eAAe,KAAK,SAAS,KAAK,OAAO,mBAAmB;AAC9D,kBAAM,OAAO,eAAe,KAAK,SAAS,KAAK,OAAO,iBAAiB;AACvE,4BAAgB;AAAA,cACd,IAAI,WAAW,MAAM,KAAK,MAAM,YAAY,GAAG,KAAK,MAAM,KAAK,SAAS,CAAC,CAAC;AAAA,YAC5E;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,cAAc,MAA2B;AACvC,UAAM,uBAAuB,KAAK,MAAM;AACxC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAEtC,QAAI,KAAK,kBAAkB;AAEzB,UAAI,KAAK,kBAAkB,KAAM,OAAM,IAAI,MAAM,sBAAsB;AAGvE,WAAK,wBAAwB,KAAK;AAAA,QAC/B,KAAK,MAAM,wBAAwB,KAAK,mBAAoB;AAAA,MAC/D;AACA,YAAM,aACJ,KAAK,MAAO,KAAK,MAAM,oBAAoB,KAAK,mBAAoB,GAAI,IACxE,KAAK;AACP,YAAM,gBAAgB,IAAI,WAAW,UAAU;AAC/C,oBAAc;AAAA,QACZ,KAAK,cAAc,SAAS,GAAG,KAAK,IAAI,KAAK,cAAc,QAAQ,UAAU,CAAC;AAAA,MAChF;AACA,WAAK,gBAAgB;AAGrB,UAAI,KAAK,MAAM,oBAAoB,sBAAsB;AACvD,aAAK,0BAA0B;AAAA,MACjC;AAAA,IACF;AAAA,EACF;AACF;","names":[]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-silero",
3
- "version": "0.5.9",
3
+ "version": "1.0.0-next.0",
4
4
  "description": "Silero voice activity detection LiveKit Node Agents",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -25,8 +25,8 @@
25
25
  "README.md"
26
26
  ],
27
27
  "devDependencies": {
28
- "@livekit/agents": "^x",
29
- "@livekit/rtc-node": "^0.13.11",
28
+ "@livekit/agents": "^1.0.0-next.0",
29
+ "@livekit/rtc-node": "^0.13.12",
30
30
  "@microsoft/api-extractor": "^7.35.0",
31
31
  "@types/ws": "^8.5.10",
32
32
  "onnxruntime-common": ">=1.19.0 <1.22.0",
@@ -38,8 +38,8 @@
38
38
  "ws": "^8.16.0"
39
39
  },
40
40
  "peerDependencies": {
41
- "@livekit/rtc-node": "^0.13.11",
42
- "@livekit/agents": "^0.7.9x"
41
+ "@livekit/rtc-node": "^0.13.12",
42
+ "@livekit/agents": "^1.0.0-next.01.0.0-next.0"
43
43
  },
44
44
  "scripts": {
45
45
  "build": "tsup --onSuccess \"pnpm build:types\" && cp src/silero_vad.onnx dist/",
package/src/index.ts CHANGED
@@ -1,4 +1,18 @@
1
- // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
+ import { Plugin } from '@livekit/agents';
5
+
4
6
  export { VAD, VADStream } from './vad.js';
7
+
8
+ class SileroPlugin extends Plugin {
9
+ constructor() {
10
+ super({
11
+ title: 'silero',
12
+ version: '0.5.6',
13
+ package: '@livekit/agents-plugin-silero',
14
+ });
15
+ }
16
+ }
17
+
18
+ Plugin.registerPlugin(new SileroPlugin());
package/src/vad.ts CHANGED
@@ -157,7 +157,12 @@ export class VADStream extends baseStream {
157
157
  // used to avoid drift when the sampleRate ratio is not an integer
158
158
  let inputCopyRemainingFrac = 0.0;
159
159
 
160
- for await (const frame of this.input) {
160
+ while (true) {
161
+ const { done, value: frame } = await this.inputReader.read();
162
+ if (done) {
163
+ break;
164
+ }
165
+
161
166
  if (typeof frame === 'symbol') {
162
167
  continue; // ignore flush sentinel for now
163
168
  }
@@ -255,7 +260,7 @@ export class VADStream extends baseStream {
255
260
  pubSilenceDuration += windowDuration;
256
261
  }
257
262
 
258
- this.queue.put({
263
+ this.outputWriter.write({
259
264
  type: VADEventType.INFERENCE_DONE,
260
265
  samplesIndex: pubCurrentSample,
261
266
  timestamp: pubTimestamp,
@@ -309,7 +314,7 @@ export class VADStream extends baseStream {
309
314
  pubSilenceDuration = 0;
310
315
  pubSpeechDuration = speechThresholdDuration;
311
316
 
312
- this.queue.put({
317
+ this.outputWriter.write({
313
318
  type: VADEventType.START_OF_SPEECH,
314
319
  samplesIndex: pubCurrentSample,
315
320
  timestamp: pubTimestamp,
@@ -336,7 +341,7 @@ export class VADStream extends baseStream {
336
341
  pubSpeechDuration = 0;
337
342
  pubSilenceDuration = silenceThresholdDuration;
338
343
 
339
- this.queue.put({
344
+ this.outputWriter.write({
340
345
  type: VADEventType.END_OF_SPEECH,
341
346
  samplesIndex: pubCurrentSample,
342
347
  timestamp: pubTimestamp,