@guidekit/vad 0.1.0-beta.1 → 0.1.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.cjs +3 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +3 -3
- package/dist/index.js.map +1 -1
- package/package.json +33 -12
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 GuideKit
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/dist/index.cjs
CHANGED
|
@@ -38,7 +38,7 @@ __export(index_exports, {
|
|
|
38
38
|
});
|
|
39
39
|
module.exports = __toCommonJS(index_exports);
|
|
40
40
|
var ort = __toESM(require("onnxruntime-web"), 1);
|
|
41
|
-
var VAD_VERSION = "0.1.0";
|
|
41
|
+
var VAD_VERSION = "0.1.0-beta.2";
|
|
42
42
|
var LOG_PREFIX = "[GuideKit:VAD]";
|
|
43
43
|
var DEFAULT_MODEL_URL = "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.20/dist/silero_vad_v5.onnx";
|
|
44
44
|
var CACHE_NAME = `guidekit-vad-v${VAD_VERSION}`;
|
|
@@ -230,9 +230,9 @@ var SileroVAD = class {
|
|
|
230
230
|
this._resetStates();
|
|
231
231
|
this._isCalibrating = true;
|
|
232
232
|
this._calibrationSamples = [];
|
|
233
|
-
this._calibrationFramesNeeded = Math.
|
|
233
|
+
this._calibrationFramesNeeded = Math.max(1, Math.floor(
|
|
234
234
|
CALIBRATION_DURATION_MS / 1e3 * this._sampleRate / FRAME_SIZE
|
|
235
|
-
);
|
|
235
|
+
));
|
|
236
236
|
this._log("Calibrating noise floor for", this._calibrationFramesNeeded, "frames");
|
|
237
237
|
this._setupAudioPipeline(stream);
|
|
238
238
|
}
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// @guidekit/vad — Silero VAD ONNX model wrapper for voice activity detection\nimport * as ort from 'onnxruntime-web';\n\nexport const VAD_VERSION = '0.1.0';\n\n// ---------------------------------------------------------------------------\n// Constants\n// ---------------------------------------------------------------------------\n\nconst LOG_PREFIX = '[GuideKit:VAD]';\n\n/** Default CDN URL for the Silero VAD ONNX model (v5). */\nconst DEFAULT_MODEL_URL =\n 'https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.20/dist/silero_vad_v5.onnx';\n\n/** Cache API key used for persisting the downloaded ONNX model. */\nconst CACHE_NAME = `guidekit-vad-v${VAD_VERSION}`;\nconst CACHE_MODEL_KEY = 'model.onnx';\n\n/** Silero VAD frame size: 512 samples at 16 kHz = 32 ms per frame. */\nconst FRAME_SIZE = 512;\n\n/** Target sample rate for VAD processing. */\nconst TARGET_SAMPLE_RATE = 16000;\n\n/** Duration (in ms) of audio collected for noise floor calibration. */\nconst CALIBRATION_DURATION_MS = 500;\n\n/** Hidden/cell state size for Silero VAD v5 LSTM. */\nconst STATE_SIZE = 128;\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\nexport interface VADOptions {\n /** Speech probability threshold (0-1). Default: 0.5 */\n threshold?: number;\n /** Minimum speech duration in ms to trigger start. Default: 300 */\n minSpeechDurationMs?: number;\n /** Silence duration in ms after speech to trigger end. Default: 500 */\n silenceDurationMs?: number;\n /** Sample rate. Default: 16000 */\n sampleRate?: number;\n /** Enable debug logging. Default: false */\n debug?: boolean;\n /** Custom URL for the Silero VAD ONNX model file. */\n modelUrl?: string;\n}\n\nexport interface VADEvent {\n type: 'speech-start' | 'speech-end' | 'vad-ready';\n timestamp: number;\n /** Speech probability (0-1) at the moment of the event. */\n probability?: number;\n}\n\ntype VADEventType = VADEvent['type'];\ntype VADCallback = (event: VADEvent) => void;\n\n// ---------------------------------------------------------------------------\n// Utility: Cache API helpers\n// ---------------------------------------------------------------------------\n\nasync function loadModelFromCache(): Promise<ArrayBuffer | null> {\n if (typeof caches === 'undefined') return null;\n try {\n const cache = await caches.open(CACHE_NAME);\n const response = await cache.match(CACHE_MODEL_KEY);\n return response ? response.arrayBuffer() : null;\n } catch {\n // Cache API may be unavailable in certain contexts (e.g. opaque origins).\n return null;\n }\n}\n\nasync function saveModelToCache(data: ArrayBuffer): Promise<void> {\n if (typeof caches === 'undefined') return;\n try {\n const cache = await caches.open(CACHE_NAME);\n await cache.put(CACHE_MODEL_KEY, new Response(data));\n } catch {\n // Silently ignore cache write failures.\n }\n}\n\n// ---------------------------------------------------------------------------\n// Utility: Resampler\n// ---------------------------------------------------------------------------\n\n/**\n * Simple linear-interpolation resampler from `inputRate` to `outputRate`.\n * Adequate for VAD where perceptual audio quality is irrelevant.\n */\nfunction resample(\n input: Float32Array,\n inputRate: number,\n outputRate: number,\n): Float32Array {\n if (inputRate === outputRate) return input;\n const ratio = inputRate / outputRate;\n const outputLength = Math.round(input.length / ratio);\n const output = new Float32Array(outputLength);\n for (let i = 0; i < outputLength; i++) {\n const srcIndex = i * ratio;\n const srcFloor = Math.floor(srcIndex);\n const srcCeil = Math.min(srcFloor + 1, input.length - 1);\n const frac = srcIndex - srcFloor;\n output[i] = (input[srcFloor] as number) * (1 - frac) + (input[srcCeil] as number) * frac;\n }\n return output;\n}\n\n// ---------------------------------------------------------------------------\n// SileroVAD\n// ---------------------------------------------------------------------------\n\nexport class SileroVAD {\n // Options (resolved with defaults)\n private readonly _threshold: number;\n private readonly _minSpeechDurationMs: number;\n private readonly _silenceDurationMs: number;\n private readonly _sampleRate: number;\n private readonly _debug: boolean;\n private readonly _modelUrl: string;\n\n // ONNX Runtime state\n private _session: ort.InferenceSession | null = null;\n private _h: ort.Tensor | null = null;\n private _c: ort.Tensor | null = null;\n\n // Audio pipeline\n private _audioContext: AudioContext | null = null;\n private _ownsAudioContext = false;\n private _sourceNode: MediaStreamAudioSourceNode | null = null;\n private _workletNode: AudioWorkletNode | ScriptProcessorNode | null = null;\n private _stream: MediaStream | null = null;\n\n // Frame buffer for accumulating resampled samples into FRAME_SIZE chunks\n private _frameBuffer: Float32Array = new Float32Array(0);\n private _frameBufferOffset = 0;\n\n // State tracking\n private _isReady = false;\n private _isSpeaking = false;\n private _isStarted = false;\n private _isDestroyed = false;\n\n // Duration tracking (in frames)\n private _consecutiveSpeechFrames = 0;\n private _consecutiveSilenceFrames = 0;\n private _frameDurationMs: number;\n private _minSpeechFrames: number;\n private _silenceFrames: number;\n\n // Noise floor calibration\n private _isCalibrating = false;\n private _calibrationSamples: number[] = [];\n private _calibrationFramesNeeded = 0;\n private _calibratedThreshold: number;\n\n // Event listeners\n private _listeners: Map<VADEventType, Set<VADCallback>> = new Map();\n\n // Processing lock to serialise frame inference\n private _processingPromise: Promise<void> = Promise.resolve();\n\n constructor(options?: VADOptions) {\n this._threshold = options?.threshold ?? 0.5;\n this._minSpeechDurationMs = options?.minSpeechDurationMs ?? 300;\n this._silenceDurationMs = options?.silenceDurationMs ?? 500;\n this._sampleRate = options?.sampleRate ?? TARGET_SAMPLE_RATE;\n this._debug = options?.debug ?? false;\n this._modelUrl = options?.modelUrl ?? DEFAULT_MODEL_URL;\n this._calibratedThreshold = this._threshold;\n\n // Pre-compute frame-duration-based counters\n this._frameDurationMs = (FRAME_SIZE / this._sampleRate) * 1000;\n this._minSpeechFrames = Math.ceil(this._minSpeechDurationMs / this._frameDurationMs);\n this._silenceFrames = Math.ceil(this._silenceDurationMs / this._frameDurationMs);\n\n this._log('Created with options', {\n threshold: this._threshold,\n minSpeechDurationMs: this._minSpeechDurationMs,\n silenceDurationMs: this._silenceDurationMs,\n sampleRate: this._sampleRate,\n modelUrl: this._modelUrl,\n });\n }\n\n // -------------------------------------------------------------------------\n // Public API\n // -------------------------------------------------------------------------\n\n /** Load the ONNX model. Uses Cache API for persistence across sessions. */\n async init(): Promise<void> {\n if (this._isDestroyed) {\n throw new Error(`${LOG_PREFIX} Cannot init after destroy`);\n }\n if (this._isReady) {\n this._log('Already initialised — skipping');\n return;\n }\n\n this._log('Initialising...');\n\n // 1. Attempt to load model bytes from cache, falling back to network.\n let modelBuffer = await loadModelFromCache();\n if (modelBuffer) {\n this._log('Loaded model from Cache API');\n } else {\n this._log('Fetching model from', this._modelUrl);\n const response = await fetch(this._modelUrl);\n if (!response.ok) {\n throw new Error(\n `${LOG_PREFIX} Failed to fetch model: ${response.status} ${response.statusText}`,\n );\n }\n modelBuffer = await response.arrayBuffer();\n this._log('Model fetched, size:', modelBuffer.byteLength, 'bytes');\n\n // Persist to Cache API for next time.\n await saveModelToCache(modelBuffer);\n this._log('Model saved to Cache API');\n }\n\n // 2. Create ONNX InferenceSession.\n this._session = await ort.InferenceSession.create(modelBuffer, {\n executionProviders: ['wasm'],\n graphOptimizationLevel: 'all',\n });\n\n // 3. Initialise LSTM hidden/cell state tensors (zeros).\n this._resetStates();\n\n this._isReady = true;\n this._log('Model loaded and ready');\n\n this._emit({\n type: 'vad-ready',\n timestamp: Date.now(),\n });\n }\n\n /**\n * Process a single audio frame (512 samples at 16 kHz).\n * Returns the speech probability (0-1).\n */\n async processFrame(audioData: Float32Array): Promise<number> {\n if (!this._session) {\n throw new Error(`${LOG_PREFIX} Model not loaded. Call init() first.`);\n }\n if (audioData.length !== FRAME_SIZE) {\n throw new Error(\n `${LOG_PREFIX} Expected ${FRAME_SIZE} samples, got ${audioData.length}`,\n );\n }\n\n const inputTensor = new ort.Tensor('float32', audioData, [1, FRAME_SIZE]);\n const srTensor = new ort.Tensor('int64', BigInt64Array.from([BigInt(this._sampleRate)]), [1]);\n\n const feeds: Record<string, ort.Tensor> = {\n input: inputTensor,\n sr: srTensor,\n h: this._h!,\n c: this._c!,\n };\n\n const results = await this._session.run(feeds);\n\n // Update LSTM hidden/cell states for the next frame.\n this._h = results['hn'] as ort.Tensor;\n this._c = results['cn'] as ort.Tensor;\n\n const probability = (results['output'] as ort.Tensor).data[0] as number;\n return probability;\n }\n\n /** Start VAD processing on a MediaStream (typically from getUserMedia). */\n start(stream: MediaStream): void {\n if (this._isDestroyed) {\n throw new Error(`${LOG_PREFIX} Cannot start after destroy`);\n }\n if (!this._isReady) {\n throw new Error(`${LOG_PREFIX} Model not loaded. Call init() first.`);\n }\n if (this._isStarted) {\n this._log('Already started — stopping previous session first');\n this.stop();\n }\n\n this._log('Starting VAD on MediaStream');\n this._stream = stream;\n this._isStarted = true;\n\n // Reset speech tracking state.\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._consecutiveSilenceFrames = 0;\n this._frameBuffer = new Float32Array(FRAME_SIZE);\n this._frameBufferOffset = 0;\n\n // Reset LSTM states for a fresh stream.\n this._resetStates();\n\n // Begin noise floor calibration.\n this._isCalibrating = true;\n this._calibrationSamples = [];\n this._calibrationFramesNeeded = Math.ceil(\n (CALIBRATION_DURATION_MS / 1000) * this._sampleRate / FRAME_SIZE,\n );\n this._log('Calibrating noise floor for', this._calibrationFramesNeeded, 'frames');\n\n // Build the audio processing pipeline.\n this._setupAudioPipeline(stream);\n }\n\n /** Stop VAD processing and release audio resources (but keep the model). */\n stop(): void {\n if (!this._isStarted) return;\n\n this._log('Stopping VAD');\n\n // Tear down audio nodes.\n this._teardownAudioPipeline();\n\n // If we were speaking, emit speech-end.\n if (this._isSpeaking) {\n this._isSpeaking = false;\n this._emit({\n type: 'speech-end',\n timestamp: Date.now(),\n probability: 0,\n });\n }\n\n // Reset state.\n this._isStarted = false;\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._consecutiveSilenceFrames = 0;\n this._frameBufferOffset = 0;\n this._isCalibrating = false;\n this._calibrationSamples = [];\n this._processingPromise = Promise.resolve();\n\n this._log('VAD stopped');\n }\n\n /** Register a callback for speech-start events. Returns an unsubscribe function. */\n onSpeechStart(callback: VADCallback): () => void {\n return this._on('speech-start', callback);\n }\n\n /** Register a callback for speech-end events. Returns an unsubscribe function. */\n onSpeechEnd(callback: VADCallback): () => void {\n return this._on('speech-end', callback);\n }\n\n /** Register a callback for vad-ready events. Returns an unsubscribe function. */\n onReady(callback: VADCallback): () => void {\n return this._on('vad-ready', callback);\n }\n\n /** Whether the ONNX model is loaded and ready. */\n get isReady(): boolean {\n return this._isReady;\n }\n\n /** Whether speech is currently detected. */\n get isSpeaking(): boolean {\n return this._isSpeaking;\n }\n\n /** The MediaStream currently being processed, or null. */\n get stream(): MediaStream | null {\n return this._stream;\n }\n\n /** Release ONNX model session and all audio resources. */\n async destroy(): Promise<void> {\n if (this._isDestroyed) return;\n this._log('Destroying...');\n\n this.stop();\n\n if (this._session) {\n await this._session.release();\n this._session = null;\n }\n\n // Dispose tensors.\n this._h?.dispose();\n this._c?.dispose();\n this._h = null;\n this._c = null;\n\n this._isReady = false;\n this._isDestroyed = true;\n this._listeners.clear();\n\n this._log('Destroyed');\n }\n\n // -------------------------------------------------------------------------\n // Private: Event system\n // -------------------------------------------------------------------------\n\n private _on(type: VADEventType, callback: VADCallback): () => void {\n let set = this._listeners.get(type);\n if (!set) {\n set = new Set();\n this._listeners.set(type, set);\n }\n set.add(callback);\n return () => {\n set!.delete(callback);\n };\n }\n\n private _emit(event: VADEvent): void {\n const set = this._listeners.get(event.type);\n if (!set) return;\n for (const cb of set) {\n try {\n cb(event);\n } catch (err) {\n // eslint-disable-next-line no-console\n console.error(`${LOG_PREFIX} Error in ${event.type} callback:`, err);\n }\n }\n }\n\n // -------------------------------------------------------------------------\n // Private: Audio pipeline\n // -------------------------------------------------------------------------\n\n private _setupAudioPipeline(stream: MediaStream): void {\n // Determine the incoming sample rate.\n const tracks = stream.getAudioTracks();\n const trackSettings = tracks[0]?.getSettings();\n const inputSampleRate = trackSettings?.sampleRate ?? 48000;\n\n this._log('Input sample rate:', inputSampleRate);\n\n // Create AudioContext at the input sample rate so we don't double-resample.\n // SSR guard: AudioContext may not exist.\n if (typeof AudioContext === 'undefined' && typeof webkitAudioContext === 'undefined') {\n throw new Error(`${LOG_PREFIX} AudioContext is not available in this environment`);\n }\n\n const AudioContextClass =\n typeof AudioContext !== 'undefined'\n ? AudioContext\n : // eslint-disable-next-line @typescript-eslint/no-explicit-any\n (globalThis as any).webkitAudioContext as typeof AudioContext;\n\n this._audioContext = new AudioContextClass({ sampleRate: inputSampleRate });\n this._ownsAudioContext = true;\n\n this._sourceNode = this._audioContext.createMediaStreamSource(stream);\n\n // Try AudioWorklet first, fall back to ScriptProcessorNode.\n this._setupScriptProcessor(inputSampleRate);\n }\n\n /**\n * ScriptProcessorNode fallback (works everywhere, including Safari).\n * We use a buffer size of 4096 which gives ~85 ms of audio at 48 kHz.\n */\n private _setupScriptProcessor(inputSampleRate: number): void {\n if (!this._audioContext || !this._sourceNode) return;\n\n // Buffer size must be a power of 2: 256, 512, 1024, 2048, 4096, 8192, 16384.\n const bufferSize = 4096;\n const processor = this._audioContext.createScriptProcessor(bufferSize, 1, 1);\n\n processor.onaudioprocess = (event: AudioProcessingEvent) => {\n if (!this._isStarted) return;\n\n const inputData = event.inputBuffer.getChannelData(0);\n\n // Resample to target rate if needed.\n const resampled =\n inputSampleRate !== this._sampleRate\n ? resample(inputData, inputSampleRate, this._sampleRate)\n : new Float32Array(inputData);\n\n // Feed resampled audio into frame-sized chunks.\n this._feedAudio(resampled);\n };\n\n this._sourceNode.connect(processor);\n processor.connect(this._audioContext.destination);\n this._workletNode = processor;\n\n this._log('Audio pipeline set up (ScriptProcessorNode)');\n }\n\n /**\n * Accumulate resampled audio into FRAME_SIZE chunks and process each full frame.\n */\n private _feedAudio(samples: Float32Array): void {\n let offset = 0;\n\n while (offset < samples.length) {\n const remaining = FRAME_SIZE - this._frameBufferOffset;\n const available = samples.length - offset;\n const toCopy = Math.min(remaining, available);\n\n this._frameBuffer.set(\n samples.subarray(offset, offset + toCopy),\n this._frameBufferOffset,\n );\n this._frameBufferOffset += toCopy;\n offset += toCopy;\n\n if (this._frameBufferOffset === FRAME_SIZE) {\n const frame = new Float32Array(this._frameBuffer);\n this._frameBufferOffset = 0;\n\n // Serialise inference calls to avoid overlapping ONNX sessions.\n this._processingPromise = this._processingPromise.then(() =>\n this._handleFrame(frame),\n );\n }\n }\n }\n\n /**\n * Process a single FRAME_SIZE frame: run inference and update speech state.\n */\n private async _handleFrame(frame: Float32Array): Promise<void> {\n if (!this._isStarted || !this._session) return;\n\n let probability: number;\n try {\n probability = await this.processFrame(frame);\n } catch (err) {\n if (this._debug) {\n // eslint-disable-next-line no-console\n console.error(`${LOG_PREFIX} Inference error:`, err);\n }\n return;\n }\n\n // Noise floor calibration phase.\n if (this._isCalibrating) {\n this._calibrationSamples.push(probability);\n\n if (this._calibrationSamples.length >= this._calibrationFramesNeeded) {\n this._finishCalibration();\n }\n return;\n }\n\n // Speech state machine.\n const isSpeechFrame = probability >= this._calibratedThreshold;\n\n if (isSpeechFrame) {\n this._consecutiveSpeechFrames++;\n this._consecutiveSilenceFrames = 0;\n\n if (!this._isSpeaking && this._consecutiveSpeechFrames >= this._minSpeechFrames) {\n this._isSpeaking = true;\n this._log('Speech started, probability:', probability.toFixed(3));\n this._emit({\n type: 'speech-start',\n timestamp: Date.now(),\n probability,\n });\n }\n } else {\n this._consecutiveSilenceFrames++;\n // Do NOT reset _consecutiveSpeechFrames here — only reset when speech-end fires.\n\n if (this._isSpeaking && this._consecutiveSilenceFrames >= this._silenceFrames) {\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._log('Speech ended, probability:', probability.toFixed(3));\n this._emit({\n type: 'speech-end',\n timestamp: Date.now(),\n probability,\n });\n }\n }\n }\n\n private _finishCalibration(): void {\n if (this._calibrationSamples.length === 0) {\n this._isCalibrating = false;\n return;\n }\n\n // Compute average noise floor probability.\n const sum = this._calibrationSamples.reduce((a, b) => a + b, 0);\n const avgNoise = sum / this._calibrationSamples.length;\n\n // If the ambient noise floor is high, nudge the threshold above it.\n // We add a margin so we don't constantly trigger on background noise.\n const NOISE_MARGIN = 0.15;\n if (avgNoise + NOISE_MARGIN > this._threshold) {\n this._calibratedThreshold = Math.min(avgNoise + NOISE_MARGIN, 0.95);\n this._log(\n 'Noise floor is high. Adjusted threshold from',\n this._threshold.toFixed(3),\n 'to',\n this._calibratedThreshold.toFixed(3),\n '(avg noise:',\n avgNoise.toFixed(3) + ')',\n );\n } else {\n this._calibratedThreshold = this._threshold;\n this._log('Noise floor OK, avg:', avgNoise.toFixed(3), '— keeping threshold at', this._threshold.toFixed(3));\n }\n\n // Recompute frame counters in case threshold changed min speech behaviour.\n this._minSpeechFrames = Math.ceil(this._minSpeechDurationMs / this._frameDurationMs);\n this._silenceFrames = Math.ceil(this._silenceDurationMs / this._frameDurationMs);\n\n this._isCalibrating = false;\n this._calibrationSamples = [];\n }\n\n private _teardownAudioPipeline(): void {\n if (this._workletNode) {\n try {\n this._workletNode.disconnect();\n } catch {\n // Ignore disconnect errors.\n }\n if ('onaudioprocess' in this._workletNode) {\n (this._workletNode as ScriptProcessorNode).onaudioprocess = null;\n }\n this._workletNode = null;\n }\n\n if (this._sourceNode) {\n try {\n this._sourceNode.disconnect();\n } catch {\n // Ignore.\n }\n this._sourceNode = null;\n }\n\n if (this._audioContext && this._ownsAudioContext) {\n try {\n void this._audioContext.close();\n } catch {\n // Ignore.\n }\n this._audioContext = null;\n this._ownsAudioContext = false;\n }\n\n this._stream = null;\n }\n\n // -------------------------------------------------------------------------\n // Private: ONNX state helpers\n // -------------------------------------------------------------------------\n\n /** Reset the LSTM hidden and cell states to zeros. */\n private _resetStates(): void {\n // Dispose any existing tensors to free memory.\n this._h?.dispose();\n this._c?.dispose();\n\n const zeros = new Float32Array(2 * STATE_SIZE).fill(0);\n this._h = new ort.Tensor('float32', zeros.slice(0, STATE_SIZE), [2, 1, 64]);\n this._c = new ort.Tensor('float32', zeros.slice(STATE_SIZE), [2, 1, 64]);\n }\n\n // -------------------------------------------------------------------------\n // Private: Logging\n // -------------------------------------------------------------------------\n\n private _log(...args: unknown[]): void {\n if (!this._debug) return;\n // eslint-disable-next-line no-console\n console.log(LOG_PREFIX, ...args);\n }\n}\n\n// ---------------------------------------------------------------------------\n// Convenience factory\n// ---------------------------------------------------------------------------\n\n/**\n * Create and initialise a SileroVAD instance in one call.\n *\n * ```ts\n * const vad = await createVAD({ debug: true });\n * vad.onSpeechStart(() => console.log('speaking'));\n * vad.start(stream);\n * ```\n */\nexport async function createVAD(options?: VADOptions): Promise<SileroVAD> {\n const vad = new SileroVAD(options);\n await vad.init();\n return vad;\n}\n\n// Re-export the frame size constant so consumers can align their buffers.\nexport { FRAME_SIZE, TARGET_SAMPLE_RATE };\n\n// Type-only declaration for environments that provide webkitAudioContext.\ndeclare global {\n // eslint-disable-next-line no-var\n var webkitAudioContext: typeof AudioContext | undefined;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AACA,UAAqB;AAEd,IAAM,cAAc;AAM3B,IAAM,aAAa;AAGnB,IAAM,oBACJ;AAGF,IAAM,aAAa,iBAAiB,WAAW;AAC/C,IAAM,kBAAkB;AAGxB,IAAM,aAAa;AAGnB,IAAM,qBAAqB;AAG3B,IAAM,0BAA0B;AAGhC,IAAM,aAAa;AAmCnB,eAAe,qBAAkD;AAC/D,MAAI,OAAO,WAAW,YAAa,QAAO;AAC1C,MAAI;AACF,UAAM,QAAQ,MAAM,OAAO,KAAK,UAAU;AAC1C,UAAM,WAAW,MAAM,MAAM,MAAM,eAAe;AAClD,WAAO,WAAW,SAAS,YAAY,IAAI;AAAA,EAC7C,QAAQ;AAEN,WAAO;AAAA,EACT;AACF;AAEA,eAAe,iBAAiB,MAAkC;AAChE,MAAI,OAAO,WAAW,YAAa;AACnC,MAAI;AACF,UAAM,QAAQ,MAAM,OAAO,KAAK,UAAU;AAC1C,UAAM,MAAM,IAAI,iBAAiB,IAAI,SAAS,IAAI,CAAC;AAAA,EACrD,QAAQ;AAAA,EAER;AACF;AAUA,SAAS,SACP,OACA,WACA,YACc;AACd,MAAI,cAAc,WAAY,QAAO;AACrC,QAAM,QAAQ,YAAY;AAC1B,QAAM,eAAe,KAAK,MAAM,MAAM,SAAS,KAAK;AACpD,QAAM,SAAS,IAAI,aAAa,YAAY;AAC5C,WAAS,IAAI,GAAG,IAAI,cAAc,KAAK;AACrC,UAAM,WAAW,IAAI;AACrB,UAAM,WAAW,KAAK,MAAM,QAAQ;AACpC,UAAM,UAAU,KAAK,IAAI,WAAW,GAAG,MAAM,SAAS,CAAC;AACvD,UAAM,OAAO,WAAW;AACxB,WAAO,CAAC,IAAK,MAAM,QAAQ,KAAgB,IAAI,QAAS,MAAM,OAAO,IAAe;AAAA,EACtF;AACA,SAAO;AACT;AAMO,IAAM,YAAN,MAAgB;AAAA;AAAA,EAEJ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGT,WAAwC;AAAA,EACxC,KAAwB;AAAA,EACxB,KAAwB;AAAA;AAAA,EAGxB,gBAAqC;AAAA,EACrC,oBAAoB;AAAA,EACpB,cAAiD;AAAA,EACjD,eAA8D;AAAA,EAC9D,UAA8B;AAAA;AAAA,EAG9B,eAA6B,IAAI,aAAa,CAAC;AAAA,EAC/C,qBAAqB;AAAA;AAAA,EAGrB,WAAW;AAAA,EACX,cAAc;AAAA,EACd,aAAa;AAAA,EACb,eAAe;AAAA;AAAA,EAGf,2BAA2B;AAAA,EAC3B,4BAA4B;AAAA,EAC5B;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA,iBAAiB;AAAA,EACjB,sBAAgC,CAAC;AAAA,EACjC,2BAA2B;AAAA,EAC3B;AAAA;AAAA,EAGA,aAAkD,oBAAI,IAAI;AAAA;AAAA,EAG1D,qBAAoC,QAAQ,QAAQ;AAAA,EAE5D,YAAY,SAAsB;AAChC,SAAK,aAAa,SAAS,aAAa;AACxC,SAAK,uBAAuB,SAAS,uBAAuB;AAC5D,SAAK,qBAAqB,SAAS,qBAAqB;AACxD,SAAK,cAAc,SAAS,cAAc;AAC1C,SAAK,SAAS,SAAS,SAAS;AAChC,SAAK,YAAY,SAAS,YAAY;AACtC,SAAK,uBAAuB,KAAK;AAGjC,SAAK,mBAAoB,aAAa,KAAK,cAAe;AAC1D,SAAK,mBAAmB,KAAK,KAAK,KAAK,uBAAuB,KAAK,gBAAgB;AACnF,SAAK,iBAAiB,KAAK,KAAK,KAAK,qBAAqB,KAAK,gBAAgB;AAE/E,SAAK,KAAK,wBAAwB;AAAA,MAChC,WAAW,KAAK;AAAA,MAChB,qBAAqB,KAAK;AAAA,MAC1B,mBAAmB,KAAK;AAAA,MACxB,YAAY,KAAK;AAAA,MACjB,UAAU,KAAK;AAAA,IACjB,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,OAAsB;AAC1B,QAAI,KAAK,cAAc;AACrB,YAAM,IAAI,MAAM,GAAG,UAAU,4BAA4B;AAAA,IAC3D;AACA,QAAI,KAAK,UAAU;AACjB,WAAK,KAAK,qCAAgC;AAC1C;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB;AAG3B,QAAI,cAAc,MAAM,mBAAmB;AAC3C,QAAI,aAAa;AACf,WAAK,KAAK,6BAA6B;AAAA,IACzC,OAAO;AACL,WAAK,KAAK,uBAAuB,KAAK,SAAS;AAC/C,YAAM,WAAW,MAAM,MAAM,KAAK,SAAS;AAC3C,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,IAAI;AAAA,UACR,GAAG,UAAU,2BAA2B,SAAS,MAAM,IAAI,SAAS,UAAU;AAAA,QAChF;AAAA,MACF;AACA,oBAAc,MAAM,SAAS,YAAY;AACzC,WAAK,KAAK,wBAAwB,YAAY,YAAY,OAAO;AAGjE,YAAM,iBAAiB,WAAW;AAClC,WAAK,KAAK,0BAA0B;AAAA,IACtC;AAGA,SAAK,WAAW,MAAU,qBAAiB,OAAO,aAAa;AAAA,MAC7D,oBAAoB,CAAC,MAAM;AAAA,MAC3B,wBAAwB;AAAA,IAC1B,CAAC;AAGD,SAAK,aAAa;AAElB,SAAK,WAAW;AAChB,SAAK,KAAK,wBAAwB;AAElC,SAAK,MAAM;AAAA,MACT,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,IACtB,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,aAAa,WAA0C;AAC3D,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,GAAG,UAAU,uCAAuC;AAAA,IACtE;AACA,QAAI,UAAU,WAAW,YAAY;AACnC,YAAM,IAAI;AAAA,QACR,GAAG,UAAU,aAAa,UAAU,iBAAiB,UAAU,MAAM;AAAA,MACvE;AAAA,IACF;AAEA,UAAM,cAAc,IAAQ,WAAO,WAAW,WAAW,CAAC,GAAG,UAAU,CAAC;AACxE,UAAM,WAAW,IAAQ,WAAO,SAAS,cAAc,KAAK,CAAC,OAAO,KAAK,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;AAE5F,UAAM,QAAoC;AAAA,MACxC,OAAO;AAAA,MACP,IAAI;AAAA,MACJ,GAAG,KAAK;AAAA,MACR,GAAG,KAAK;AAAA,IACV;AAEA,UAAM,UAAU,MAAM,KAAK,SAAS,IAAI,KAAK;AAG7C,SAAK,KAAK,QAAQ,IAAI;AACtB,SAAK,KAAK,QAAQ,IAAI;AAEtB,UAAM,cAAe,QAAQ,QAAQ,EAAiB,KAAK,CAAC;AAC5D,WAAO;AAAA,EACT;AAAA;AAAA,EAGA,MAAM,QAA2B;AAC/B,QAAI,KAAK,cAAc;AACrB,YAAM,IAAI,MAAM,GAAG,UAAU,6BAA6B;AAAA,IAC5D;AACA,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,GAAG,UAAU,uCAAuC;AAAA,IACtE;AACA,QAAI,KAAK,YAAY;AACnB,WAAK,KAAK,wDAAmD;AAC7D,WAAK,KAAK;AAAA,IACZ;AAEA,SAAK,KAAK,6BAA6B;AACvC,SAAK,UAAU;AACf,SAAK,aAAa;AAGlB,SAAK,cAAc;AACnB,SAAK,2BAA2B;AAChC,SAAK,4BAA4B;AACjC,SAAK,eAAe,IAAI,aAAa,UAAU;AAC/C,SAAK,qBAAqB;AAG1B,SAAK,aAAa;AAGlB,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAC5B,SAAK,2BAA2B,KAAK;AAAA,MAClC,0BAA0B,MAAQ,KAAK,cAAc;AAAA,IACxD;AACA,SAAK,KAAK,+BAA+B,KAAK,0BAA0B,QAAQ;AAGhF,SAAK,oBAAoB,MAAM;AAAA,EACjC;AAAA;AAAA,EAGA,OAAa;AACX,QAAI,CAAC,KAAK,WAAY;AAEtB,SAAK,KAAK,cAAc;AAGxB,SAAK,uBAAuB;AAG5B,QAAI,KAAK,aAAa;AACpB,WAAK,cAAc;AACnB,WAAK,MAAM;AAAA,QACT,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,aAAa;AAAA,MACf,CAAC;AAAA,IACH;AAGA,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,2BAA2B;AAChC,SAAK,4BAA4B;AACjC,SAAK,qBAAqB;AAC1B,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAC5B,SAAK,qBAAqB,QAAQ,QAAQ;AAE1C,SAAK,KAAK,aAAa;AAAA,EACzB;AAAA;AAAA,EAGA,cAAc,UAAmC;AAC/C,WAAO,KAAK,IAAI,gBAAgB,QAAQ;AAAA,EAC1C;AAAA;AAAA,EAGA,YAAY,UAAmC;AAC7C,WAAO,KAAK,IAAI,cAAc,QAAQ;AAAA,EACxC;AAAA;AAAA,EAGA,QAAQ,UAAmC;AACzC,WAAO,KAAK,IAAI,aAAa,QAAQ;AAAA,EACvC;AAAA;AAAA,EAGA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,aAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,SAA6B;AAC/B,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAyB;AAC7B,QAAI,KAAK,aAAc;AACvB,SAAK,KAAK,eAAe;AAEzB,SAAK,KAAK;AAEV,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,SAAS,QAAQ;AAC5B,WAAK,WAAW;AAAA,IAClB;AAGA,SAAK,IAAI,QAAQ;AACjB,SAAK,IAAI,QAAQ;AACjB,SAAK,KAAK;AACV,SAAK,KAAK;AAEV,SAAK,WAAW;AAChB,SAAK,eAAe;AACpB,SAAK,WAAW,MAAM;AAEtB,SAAK,KAAK,WAAW;AAAA,EACvB;AAAA;AAAA;AAAA;AAAA,EAMQ,IAAI,MAAoB,UAAmC;AACjE,QAAI,MAAM,KAAK,WAAW,IAAI,IAAI;AAClC,QAAI,CAAC,KAAK;AACR,YAAM,oBAAI,IAAI;AACd,WAAK,WAAW,IAAI,MAAM,GAAG;AAAA,IAC/B;AACA,QAAI,IAAI,QAAQ;AAChB,WAAO,MAAM;AACX,UAAK,OAAO,QAAQ;AAAA,IACtB;AAAA,EACF;AAAA,EAEQ,MAAM,OAAuB;AACnC,UAAM,MAAM,KAAK,WAAW,IAAI,MAAM,IAAI;AAC1C,QAAI,CAAC,IAAK;AACV,eAAW,MAAM,KAAK;AACpB,UAAI;AACF,WAAG,KAAK;AAAA,MACV,SAAS,KAAK;AAEZ,gBAAQ,MAAM,GAAG,UAAU,aAAa,MAAM,IAAI,cAAc,GAAG;AAAA,MACrE;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAMQ,oBAAoB,QAA2B;AAErD,UAAM,SAAS,OAAO,eAAe;AACrC,UAAM,gBAAgB,OAAO,CAAC,GAAG,YAAY;AAC7C,UAAM,kBAAkB,eAAe,cAAc;AAErD,SAAK,KAAK,sBAAsB,eAAe;AAI/C,QAAI,OAAO,iBAAiB,eAAe,OAAO,uBAAuB,aAAa;AACpF,YAAM,IAAI,MAAM,GAAG,UAAU,oDAAoD;AAAA,IACnF;AAEA,UAAM,oBACJ,OAAO,iBAAiB,cACpB;AAAA;AAAA,MAEC,WAAmB;AAAA;AAE1B,SAAK,gBAAgB,IAAI,kBAAkB,EAAE,YAAY,gBAAgB,CAAC;AAC1E,SAAK,oBAAoB;AAEzB,SAAK,cAAc,KAAK,cAAc,wBAAwB,MAAM;AAGpE,SAAK,sBAAsB,eAAe;AAAA,EAC5C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,sBAAsB,iBAA+B;AAC3D,QAAI,CAAC,KAAK,iBAAiB,CAAC,KAAK,YAAa;AAG9C,UAAM,aAAa;AACnB,UAAM,YAAY,KAAK,cAAc,sBAAsB,YAAY,GAAG,CAAC;AAE3E,cAAU,iBAAiB,CAAC,UAAgC;AAC1D,UAAI,CAAC,KAAK,WAAY;AAEtB,YAAM,YAAY,MAAM,YAAY,eAAe,CAAC;AAGpD,YAAM,YACJ,oBAAoB,KAAK,cACrB,SAAS,WAAW,iBAAiB,KAAK,WAAW,IACrD,IAAI,aAAa,SAAS;AAGhC,WAAK,WAAW,SAAS;AAAA,IAC3B;AAEA,SAAK,YAAY,QAAQ,SAAS;AAClC,cAAU,QAAQ,KAAK,cAAc,WAAW;AAChD,SAAK,eAAe;AAEpB,SAAK,KAAK,6CAA6C;AAAA,EACzD;AAAA;AAAA;AAAA;AAAA,EAKQ,WAAW,SAA6B;AAC9C,QAAI,SAAS;AAEb,WAAO,SAAS,QAAQ,QAAQ;AAC9B,YAAM,YAAY,aAAa,KAAK;AACpC,YAAM,YAAY,QAAQ,SAAS;AACnC,YAAM,SAAS,KAAK,IAAI,WAAW,SAAS;AAE5C,WAAK,aAAa;AAAA,QAChB,QAAQ,SAAS,QAAQ,SAAS,MAAM;AAAA,QACxC,KAAK;AAAA,MACP;AACA,WAAK,sBAAsB;AAC3B,gBAAU;AAEV,UAAI,KAAK,uBAAuB,YAAY;AAC1C,cAAM,QAAQ,IAAI,aAAa,KAAK,YAAY;AAChD,aAAK,qBAAqB;AAG1B,aAAK,qBAAqB,KAAK,mBAAmB;AAAA,UAAK,MACrD,KAAK,aAAa,KAAK;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,aAAa,OAAoC;AAC7D,QAAI,CAAC,KAAK,cAAc,CAAC,KAAK,SAAU;AAExC,QAAI;AACJ,QAAI;AACF,oBAAc,MAAM,KAAK,aAAa,KAAK;AAAA,IAC7C,SAAS,KAAK;AACZ,UAAI,KAAK,QAAQ;AAEf,gBAAQ,MAAM,GAAG,UAAU,qBAAqB,GAAG;AAAA,MACrD;AACA;AAAA,IACF;AAGA,QAAI,KAAK,gBAAgB;AACvB,WAAK,oBAAoB,KAAK,WAAW;AAEzC,UAAI,KAAK,oBAAoB,UAAU,KAAK,0BAA0B;AACpE,aAAK,mBAAmB;AAAA,MAC1B;AACA;AAAA,IACF;AAGA,UAAM,gBAAgB,eAAe,KAAK;AAE1C,QAAI,eAAe;AACjB,WAAK;AACL,WAAK,4BAA4B;AAEjC,UAAI,CAAC,KAAK,eAAe,KAAK,4BAA4B,KAAK,kBAAkB;AAC/E,aAAK,cAAc;AACnB,aAAK,KAAK,gCAAgC,YAAY,QAAQ,CAAC,CAAC;AAChE,aAAK,MAAM;AAAA,UACT,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,OAAO;AACL,WAAK;AAGL,UAAI,KAAK,eAAe,KAAK,6BAA6B,KAAK,gBAAgB;AAC7E,aAAK,cAAc;AACnB,aAAK,2BAA2B;AAChC,aAAK,KAAK,8BAA8B,YAAY,QAAQ,CAAC,CAAC;AAC9D,aAAK,MAAM;AAAA,UACT,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,qBAA2B;AACjC,QAAI,KAAK,oBAAoB,WAAW,GAAG;AACzC,WAAK,iBAAiB;AACtB;AAAA,IACF;AAGA,UAAM,MAAM,KAAK,oBAAoB,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC;AAC9D,UAAM,WAAW,MAAM,KAAK,oBAAoB;AAIhD,UAAM,eAAe;AACrB,QAAI,WAAW,eAAe,KAAK,YAAY;AAC7C,WAAK,uBAAuB,KAAK,IAAI,WAAW,cAAc,IAAI;AAClE,WAAK;AAAA,QACH;AAAA,QACA,KAAK,WAAW,QAAQ,CAAC;AAAA,QACzB;AAAA,QACA,KAAK,qBAAqB,QAAQ,CAAC;AAAA,QACnC;AAAA,QACA,SAAS,QAAQ,CAAC,IAAI;AAAA,MACxB;AAAA,IACF,OAAO;AACL,WAAK,uBAAuB,KAAK;AACjC,WAAK,KAAK,wBAAwB,SAAS,QAAQ,CAAC,GAAG,+BAA0B,KAAK,WAAW,QAAQ,CAAC,CAAC;AAAA,IAC7G;AAGA,SAAK,mBAAmB,KAAK,KAAK,KAAK,uBAAuB,KAAK,gBAAgB;AACnF,SAAK,iBAAiB,KAAK,KAAK,KAAK,qBAAqB,KAAK,gBAAgB;AAE/E,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAAA,EAC9B;AAAA,EAEQ,yBAA+B;AACrC,QAAI,KAAK,cAAc;AACrB,UAAI;AACF,aAAK,aAAa,WAAW;AAAA,MAC/B,QAAQ;AAAA,MAER;AACA,UAAI,oBAAoB,KAAK,cAAc;AACzC,QAAC,KAAK,aAAqC,iBAAiB;AAAA,MAC9D;AACA,WAAK,eAAe;AAAA,IACtB;AAEA,QAAI,KAAK,aAAa;AACpB,UAAI;AACF,aAAK,YAAY,WAAW;AAAA,MAC9B,QAAQ;AAAA,MAER;AACA,WAAK,cAAc;AAAA,IACrB;AAEA,QAAI,KAAK,iBAAiB,KAAK,mBAAmB;AAChD,UAAI;AACF,aAAK,KAAK,cAAc,MAAM;AAAA,MAChC,QAAQ;AAAA,MAER;AACA,WAAK,gBAAgB;AACrB,WAAK,oBAAoB;AAAA,IAC3B;AAEA,SAAK,UAAU;AAAA,EACjB;AAAA;AAAA;AAAA;AAAA;AAAA,EAOQ,eAAqB;AAE3B,SAAK,IAAI,QAAQ;AACjB,SAAK,IAAI,QAAQ;AAEjB,UAAM,QAAQ,IAAI,aAAa,IAAI,UAAU,EAAE,KAAK,CAAC;AACrD,SAAK,KAAK,IAAQ,WAAO,WAAW,MAAM,MAAM,GAAG,UAAU,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC;AAC1E,SAAK,KAAK,IAAQ,WAAO,WAAW,MAAM,MAAM,UAAU,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC;AAAA,EACzE;AAAA;AAAA;AAAA;AAAA,EAMQ,QAAQ,MAAuB;AACrC,QAAI,CAAC,KAAK,OAAQ;AAElB,YAAQ,IAAI,YAAY,GAAG,IAAI;AAAA,EACjC;AACF;AAeA,eAAsB,UAAU,SAA0C;AACxE,QAAM,MAAM,IAAI,UAAU,OAAO;AACjC,QAAM,IAAI,KAAK;AACf,SAAO;AACT;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// @guidekit/vad — Silero VAD ONNX model wrapper for voice activity detection\nimport * as ort from 'onnxruntime-web';\n\nexport const VAD_VERSION = '0.1.0-beta.2';\n\n// ---------------------------------------------------------------------------\n// Constants\n// ---------------------------------------------------------------------------\n\nconst LOG_PREFIX = '[GuideKit:VAD]';\n\n/** Default CDN URL for the Silero VAD ONNX model (v5). */\nconst DEFAULT_MODEL_URL =\n 'https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.20/dist/silero_vad_v5.onnx';\n\n/** Cache API key used for persisting the downloaded ONNX model. */\nconst CACHE_NAME = `guidekit-vad-v${VAD_VERSION}`;\nconst CACHE_MODEL_KEY = 'model.onnx';\n\n/** Silero VAD frame size: 512 samples at 16 kHz = 32 ms per frame. */\nconst FRAME_SIZE = 512;\n\n/** Target sample rate for VAD processing. */\nconst TARGET_SAMPLE_RATE = 16000;\n\n/** Duration (in ms) of audio collected for noise floor calibration. */\nconst CALIBRATION_DURATION_MS = 500;\n\n/** Hidden/cell state size for Silero VAD v5 LSTM. */\nconst STATE_SIZE = 128;\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\nexport interface VADOptions {\n /** Speech probability threshold (0-1). Default: 0.5 */\n threshold?: number;\n /** Minimum speech duration in ms to trigger start. Default: 300 */\n minSpeechDurationMs?: number;\n /** Silence duration in ms after speech to trigger end. Default: 500 */\n silenceDurationMs?: number;\n /** Sample rate. Default: 16000 */\n sampleRate?: number;\n /** Enable debug logging. Default: false */\n debug?: boolean;\n /** Custom URL for the Silero VAD ONNX model file. */\n modelUrl?: string;\n}\n\nexport interface VADEvent {\n type: 'speech-start' | 'speech-end' | 'vad-ready';\n timestamp: number;\n /** Speech probability (0-1) at the moment of the event. */\n probability?: number;\n}\n\ntype VADEventType = VADEvent['type'];\ntype VADCallback = (event: VADEvent) => void;\n\n// ---------------------------------------------------------------------------\n// Utility: Cache API helpers\n// ---------------------------------------------------------------------------\n\nasync function loadModelFromCache(): Promise<ArrayBuffer | null> {\n if (typeof caches === 'undefined') return null;\n try {\n const cache = await caches.open(CACHE_NAME);\n const response = await cache.match(CACHE_MODEL_KEY);\n return response ? response.arrayBuffer() : null;\n } catch {\n // Cache API may be unavailable in certain contexts (e.g. opaque origins).\n return null;\n }\n}\n\nasync function saveModelToCache(data: ArrayBuffer): Promise<void> {\n if (typeof caches === 'undefined') return;\n try {\n const cache = await caches.open(CACHE_NAME);\n await cache.put(CACHE_MODEL_KEY, new Response(data));\n } catch {\n // Silently ignore cache write failures.\n }\n}\n\n// ---------------------------------------------------------------------------\n// Utility: Resampler\n// ---------------------------------------------------------------------------\n\n/**\n * Simple linear-interpolation resampler from `inputRate` to `outputRate`.\n * Adequate for VAD where perceptual audio quality is irrelevant.\n */\nfunction resample(\n input: Float32Array,\n inputRate: number,\n outputRate: number,\n): Float32Array {\n if (inputRate === outputRate) return input;\n const ratio = inputRate / outputRate;\n const outputLength = Math.round(input.length / ratio);\n const output = new Float32Array(outputLength);\n for (let i = 0; i < outputLength; i++) {\n const srcIndex = i * ratio;\n const srcFloor = Math.floor(srcIndex);\n const srcCeil = Math.min(srcFloor + 1, input.length - 1);\n const frac = srcIndex - srcFloor;\n output[i] = (input[srcFloor] as number) * (1 - frac) + (input[srcCeil] as number) * frac;\n }\n return output;\n}\n\n// ---------------------------------------------------------------------------\n// SileroVAD\n// ---------------------------------------------------------------------------\n\nexport class SileroVAD {\n // Options (resolved with defaults)\n private readonly _threshold: number;\n private readonly _minSpeechDurationMs: number;\n private readonly _silenceDurationMs: number;\n private readonly _sampleRate: number;\n private readonly _debug: boolean;\n private readonly _modelUrl: string;\n\n // ONNX Runtime state\n private _session: ort.InferenceSession | null = null;\n private _h: ort.Tensor | null = null;\n private _c: ort.Tensor | null = null;\n\n // Audio pipeline\n private _audioContext: AudioContext | null = null;\n private _ownsAudioContext = false;\n private _sourceNode: MediaStreamAudioSourceNode | null = null;\n private _workletNode: AudioWorkletNode | ScriptProcessorNode | null = null;\n private _stream: MediaStream | null = null;\n\n // Frame buffer for accumulating resampled samples into FRAME_SIZE chunks\n private _frameBuffer: Float32Array = new Float32Array(0);\n private _frameBufferOffset = 0;\n\n // State tracking\n private _isReady = false;\n private _isSpeaking = false;\n private _isStarted = false;\n private _isDestroyed = false;\n\n // Duration tracking (in frames)\n private _consecutiveSpeechFrames = 0;\n private _consecutiveSilenceFrames = 0;\n private _frameDurationMs: number;\n private _minSpeechFrames: number;\n private _silenceFrames: number;\n\n // Noise floor calibration\n private _isCalibrating = false;\n private _calibrationSamples: number[] = [];\n private _calibrationFramesNeeded = 0;\n private _calibratedThreshold: number;\n\n // Event listeners\n private _listeners: Map<VADEventType, Set<VADCallback>> = new Map();\n\n // Processing lock to serialise frame inference\n private _processingPromise: Promise<void> = Promise.resolve();\n\n constructor(options?: VADOptions) {\n this._threshold = options?.threshold ?? 0.5;\n this._minSpeechDurationMs = options?.minSpeechDurationMs ?? 300;\n this._silenceDurationMs = options?.silenceDurationMs ?? 500;\n this._sampleRate = options?.sampleRate ?? TARGET_SAMPLE_RATE;\n this._debug = options?.debug ?? false;\n this._modelUrl = options?.modelUrl ?? DEFAULT_MODEL_URL;\n this._calibratedThreshold = this._threshold;\n\n // Pre-compute frame-duration-based counters\n this._frameDurationMs = (FRAME_SIZE / this._sampleRate) * 1000;\n this._minSpeechFrames = Math.ceil(this._minSpeechDurationMs / this._frameDurationMs);\n this._silenceFrames = Math.ceil(this._silenceDurationMs / this._frameDurationMs);\n\n this._log('Created with options', {\n threshold: this._threshold,\n minSpeechDurationMs: this._minSpeechDurationMs,\n silenceDurationMs: this._silenceDurationMs,\n sampleRate: this._sampleRate,\n modelUrl: this._modelUrl,\n });\n }\n\n // -------------------------------------------------------------------------\n // Public API\n // -------------------------------------------------------------------------\n\n /** Load the ONNX model. Uses Cache API for persistence across sessions. */\n async init(): Promise<void> {\n if (this._isDestroyed) {\n throw new Error(`${LOG_PREFIX} Cannot init after destroy`);\n }\n if (this._isReady) {\n this._log('Already initialised — skipping');\n return;\n }\n\n this._log('Initialising...');\n\n // 1. Attempt to load model bytes from cache, falling back to network.\n let modelBuffer = await loadModelFromCache();\n if (modelBuffer) {\n this._log('Loaded model from Cache API');\n } else {\n this._log('Fetching model from', this._modelUrl);\n const response = await fetch(this._modelUrl);\n if (!response.ok) {\n throw new Error(\n `${LOG_PREFIX} Failed to fetch model: ${response.status} ${response.statusText}`,\n );\n }\n modelBuffer = await response.arrayBuffer();\n this._log('Model fetched, size:', modelBuffer.byteLength, 'bytes');\n\n // Persist to Cache API for next time.\n await saveModelToCache(modelBuffer);\n this._log('Model saved to Cache API');\n }\n\n // 2. Create ONNX InferenceSession.\n this._session = await ort.InferenceSession.create(modelBuffer, {\n executionProviders: ['wasm'],\n graphOptimizationLevel: 'all',\n });\n\n // 3. Initialise LSTM hidden/cell state tensors (zeros).\n this._resetStates();\n\n this._isReady = true;\n this._log('Model loaded and ready');\n\n this._emit({\n type: 'vad-ready',\n timestamp: Date.now(),\n });\n }\n\n /**\n * Process a single audio frame (512 samples at 16 kHz).\n * Returns the speech probability (0-1).\n */\n async processFrame(audioData: Float32Array): Promise<number> {\n if (!this._session) {\n throw new Error(`${LOG_PREFIX} Model not loaded. Call init() first.`);\n }\n if (audioData.length !== FRAME_SIZE) {\n throw new Error(\n `${LOG_PREFIX} Expected ${FRAME_SIZE} samples, got ${audioData.length}`,\n );\n }\n\n const inputTensor = new ort.Tensor('float32', audioData, [1, FRAME_SIZE]);\n const srTensor = new ort.Tensor('int64', BigInt64Array.from([BigInt(this._sampleRate)]), [1]);\n\n const feeds: Record<string, ort.Tensor> = {\n input: inputTensor,\n sr: srTensor,\n h: this._h!,\n c: this._c!,\n };\n\n const results = await this._session.run(feeds);\n\n // Update LSTM hidden/cell states for the next frame.\n this._h = results['hn'] as ort.Tensor;\n this._c = results['cn'] as ort.Tensor;\n\n const probability = (results['output'] as ort.Tensor).data[0] as number;\n return probability;\n }\n\n /** Start VAD processing on a MediaStream (typically from getUserMedia). */\n start(stream: MediaStream): void {\n if (this._isDestroyed) {\n throw new Error(`${LOG_PREFIX} Cannot start after destroy`);\n }\n if (!this._isReady) {\n throw new Error(`${LOG_PREFIX} Model not loaded. Call init() first.`);\n }\n if (this._isStarted) {\n this._log('Already started — stopping previous session first');\n this.stop();\n }\n\n this._log('Starting VAD on MediaStream');\n this._stream = stream;\n this._isStarted = true;\n\n // Reset speech tracking state.\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._consecutiveSilenceFrames = 0;\n this._frameBuffer = new Float32Array(FRAME_SIZE);\n this._frameBufferOffset = 0;\n\n // Reset LSTM states for a fresh stream.\n this._resetStates();\n\n // Begin noise floor calibration.\n this._isCalibrating = true;\n this._calibrationSamples = [];\n this._calibrationFramesNeeded = Math.max(1, Math.floor(\n (CALIBRATION_DURATION_MS / 1000) * this._sampleRate / FRAME_SIZE,\n ));\n this._log('Calibrating noise floor for', this._calibrationFramesNeeded, 'frames');\n\n // Build the audio processing pipeline.\n this._setupAudioPipeline(stream);\n }\n\n /** Stop VAD processing and release audio resources (but keep the model). */\n stop(): void {\n if (!this._isStarted) return;\n\n this._log('Stopping VAD');\n\n // Tear down audio nodes.\n this._teardownAudioPipeline();\n\n // If we were speaking, emit speech-end.\n if (this._isSpeaking) {\n this._isSpeaking = false;\n this._emit({\n type: 'speech-end',\n timestamp: Date.now(),\n probability: 0,\n });\n }\n\n // Reset state.\n this._isStarted = false;\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._consecutiveSilenceFrames = 0;\n this._frameBufferOffset = 0;\n this._isCalibrating = false;\n this._calibrationSamples = [];\n this._processingPromise = Promise.resolve();\n\n this._log('VAD stopped');\n }\n\n /** Register a callback for speech-start events. Returns an unsubscribe function. */\n onSpeechStart(callback: VADCallback): () => void {\n return this._on('speech-start', callback);\n }\n\n /** Register a callback for speech-end events. Returns an unsubscribe function. */\n onSpeechEnd(callback: VADCallback): () => void {\n return this._on('speech-end', callback);\n }\n\n /** Register a callback for vad-ready events. Returns an unsubscribe function. */\n onReady(callback: VADCallback): () => void {\n return this._on('vad-ready', callback);\n }\n\n /** Whether the ONNX model is loaded and ready. */\n get isReady(): boolean {\n return this._isReady;\n }\n\n /** Whether speech is currently detected. */\n get isSpeaking(): boolean {\n return this._isSpeaking;\n }\n\n /** The MediaStream currently being processed, or null. */\n get stream(): MediaStream | null {\n return this._stream;\n }\n\n /** Release ONNX model session and all audio resources. */\n async destroy(): Promise<void> {\n if (this._isDestroyed) return;\n this._log('Destroying...');\n\n this.stop();\n\n if (this._session) {\n await this._session.release();\n this._session = null;\n }\n\n // Dispose tensors.\n this._h?.dispose();\n this._c?.dispose();\n this._h = null;\n this._c = null;\n\n this._isReady = false;\n this._isDestroyed = true;\n this._listeners.clear();\n\n this._log('Destroyed');\n }\n\n // -------------------------------------------------------------------------\n // Private: Event system\n // -------------------------------------------------------------------------\n\n private _on(type: VADEventType, callback: VADCallback): () => void {\n let set = this._listeners.get(type);\n if (!set) {\n set = new Set();\n this._listeners.set(type, set);\n }\n set.add(callback);\n return () => {\n set!.delete(callback);\n };\n }\n\n private _emit(event: VADEvent): void {\n const set = this._listeners.get(event.type);\n if (!set) return;\n for (const cb of set) {\n try {\n cb(event);\n } catch (err) {\n // eslint-disable-next-line no-console\n console.error(`${LOG_PREFIX} Error in ${event.type} callback:`, err);\n }\n }\n }\n\n // -------------------------------------------------------------------------\n // Private: Audio pipeline\n // -------------------------------------------------------------------------\n\n private _setupAudioPipeline(stream: MediaStream): void {\n // Determine the incoming sample rate.\n const tracks = stream.getAudioTracks();\n const trackSettings = tracks[0]?.getSettings();\n const inputSampleRate = trackSettings?.sampleRate ?? 48000;\n\n this._log('Input sample rate:', inputSampleRate);\n\n // Create AudioContext at the input sample rate so we don't double-resample.\n // SSR guard: AudioContext may not exist.\n if (typeof AudioContext === 'undefined' && typeof webkitAudioContext === 'undefined') {\n throw new Error(`${LOG_PREFIX} AudioContext is not available in this environment`);\n }\n\n const AudioContextClass =\n typeof AudioContext !== 'undefined'\n ? AudioContext\n : // eslint-disable-next-line @typescript-eslint/no-explicit-any\n (globalThis as any).webkitAudioContext as typeof AudioContext;\n\n this._audioContext = new AudioContextClass({ sampleRate: inputSampleRate });\n this._ownsAudioContext = true;\n\n this._sourceNode = this._audioContext.createMediaStreamSource(stream);\n\n // Try AudioWorklet first, fall back to ScriptProcessorNode.\n this._setupScriptProcessor(inputSampleRate);\n }\n\n /**\n * ScriptProcessorNode fallback (works everywhere, including Safari).\n * We use a buffer size of 4096 which gives ~85 ms of audio at 48 kHz.\n */\n private _setupScriptProcessor(inputSampleRate: number): void {\n if (!this._audioContext || !this._sourceNode) return;\n\n // Buffer size must be a power of 2: 256, 512, 1024, 2048, 4096, 8192, 16384.\n const bufferSize = 4096;\n const processor = this._audioContext.createScriptProcessor(bufferSize, 1, 1);\n\n processor.onaudioprocess = (event: AudioProcessingEvent) => {\n if (!this._isStarted) return;\n\n const inputData = event.inputBuffer.getChannelData(0);\n\n // Resample to target rate if needed.\n const resampled =\n inputSampleRate !== this._sampleRate\n ? resample(inputData, inputSampleRate, this._sampleRate)\n : new Float32Array(inputData);\n\n // Feed resampled audio into frame-sized chunks.\n this._feedAudio(resampled);\n };\n\n this._sourceNode.connect(processor);\n processor.connect(this._audioContext.destination);\n this._workletNode = processor;\n\n this._log('Audio pipeline set up (ScriptProcessorNode)');\n }\n\n /**\n * Accumulate resampled audio into FRAME_SIZE chunks and process each full frame.\n */\n private _feedAudio(samples: Float32Array): void {\n let offset = 0;\n\n while (offset < samples.length) {\n const remaining = FRAME_SIZE - this._frameBufferOffset;\n const available = samples.length - offset;\n const toCopy = Math.min(remaining, available);\n\n this._frameBuffer.set(\n samples.subarray(offset, offset + toCopy),\n this._frameBufferOffset,\n );\n this._frameBufferOffset += toCopy;\n offset += toCopy;\n\n if (this._frameBufferOffset === FRAME_SIZE) {\n const frame = new Float32Array(this._frameBuffer);\n this._frameBufferOffset = 0;\n\n // Serialise inference calls to avoid overlapping ONNX sessions.\n this._processingPromise = this._processingPromise.then(() =>\n this._handleFrame(frame),\n );\n }\n }\n }\n\n /**\n * Process a single FRAME_SIZE frame: run inference and update speech state.\n */\n private async _handleFrame(frame: Float32Array): Promise<void> {\n if (!this._isStarted || !this._session) return;\n\n let probability: number;\n try {\n probability = await this.processFrame(frame);\n } catch (err) {\n if (this._debug) {\n // eslint-disable-next-line no-console\n console.error(`${LOG_PREFIX} Inference error:`, err);\n }\n return;\n }\n\n // Noise floor calibration phase.\n if (this._isCalibrating) {\n this._calibrationSamples.push(probability);\n\n if (this._calibrationSamples.length >= this._calibrationFramesNeeded) {\n this._finishCalibration();\n }\n return;\n }\n\n // Speech state machine.\n const isSpeechFrame = probability >= this._calibratedThreshold;\n\n if (isSpeechFrame) {\n this._consecutiveSpeechFrames++;\n this._consecutiveSilenceFrames = 0;\n\n if (!this._isSpeaking && this._consecutiveSpeechFrames >= this._minSpeechFrames) {\n this._isSpeaking = true;\n this._log('Speech started, probability:', probability.toFixed(3));\n this._emit({\n type: 'speech-start',\n timestamp: Date.now(),\n probability,\n });\n }\n } else {\n this._consecutiveSilenceFrames++;\n // Do NOT reset _consecutiveSpeechFrames here — only reset when speech-end fires.\n\n if (this._isSpeaking && this._consecutiveSilenceFrames >= this._silenceFrames) {\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._log('Speech ended, probability:', probability.toFixed(3));\n this._emit({\n type: 'speech-end',\n timestamp: Date.now(),\n probability,\n });\n }\n }\n }\n\n private _finishCalibration(): void {\n if (this._calibrationSamples.length === 0) {\n this._isCalibrating = false;\n return;\n }\n\n // Compute average noise floor probability.\n const sum = this._calibrationSamples.reduce((a, b) => a + b, 0);\n const avgNoise = sum / this._calibrationSamples.length;\n\n // If the ambient noise floor is high, nudge the threshold above it.\n // We add a margin so we don't constantly trigger on background noise.\n const NOISE_MARGIN = 0.15;\n if (avgNoise + NOISE_MARGIN > this._threshold) {\n this._calibratedThreshold = Math.min(avgNoise + NOISE_MARGIN, 0.95);\n this._log(\n 'Noise floor is high. Adjusted threshold from',\n this._threshold.toFixed(3),\n 'to',\n this._calibratedThreshold.toFixed(3),\n '(avg noise:',\n avgNoise.toFixed(3) + ')',\n );\n } else {\n this._calibratedThreshold = this._threshold;\n this._log('Noise floor OK, avg:', avgNoise.toFixed(3), '— keeping threshold at', this._threshold.toFixed(3));\n }\n\n // Recompute frame counters in case threshold changed min speech behaviour.\n this._minSpeechFrames = Math.ceil(this._minSpeechDurationMs / this._frameDurationMs);\n this._silenceFrames = Math.ceil(this._silenceDurationMs / this._frameDurationMs);\n\n this._isCalibrating = false;\n this._calibrationSamples = [];\n }\n\n private _teardownAudioPipeline(): void {\n if (this._workletNode) {\n try {\n this._workletNode.disconnect();\n } catch {\n // Ignore disconnect errors.\n }\n if ('onaudioprocess' in this._workletNode) {\n (this._workletNode as ScriptProcessorNode).onaudioprocess = null;\n }\n this._workletNode = null;\n }\n\n if (this._sourceNode) {\n try {\n this._sourceNode.disconnect();\n } catch {\n // Ignore.\n }\n this._sourceNode = null;\n }\n\n if (this._audioContext && this._ownsAudioContext) {\n try {\n void this._audioContext.close();\n } catch {\n // Ignore.\n }\n this._audioContext = null;\n this._ownsAudioContext = false;\n }\n\n this._stream = null;\n }\n\n // -------------------------------------------------------------------------\n // Private: ONNX state helpers\n // -------------------------------------------------------------------------\n\n /** Reset the LSTM hidden and cell states to zeros. */\n private _resetStates(): void {\n // Dispose any existing tensors to free memory.\n this._h?.dispose();\n this._c?.dispose();\n\n const zeros = new Float32Array(2 * STATE_SIZE).fill(0);\n this._h = new ort.Tensor('float32', zeros.slice(0, STATE_SIZE), [2, 1, 64]);\n this._c = new ort.Tensor('float32', zeros.slice(STATE_SIZE), [2, 1, 64]);\n }\n\n // -------------------------------------------------------------------------\n // Private: Logging\n // -------------------------------------------------------------------------\n\n private _log(...args: unknown[]): void {\n if (!this._debug) return;\n // eslint-disable-next-line no-console\n console.log(LOG_PREFIX, ...args);\n }\n}\n\n// ---------------------------------------------------------------------------\n// Convenience factory\n// ---------------------------------------------------------------------------\n\n/**\n * Create and initialise a SileroVAD instance in one call.\n *\n * ```ts\n * const vad = await createVAD({ debug: true });\n * vad.onSpeechStart(() => console.log('speaking'));\n * vad.start(stream);\n * ```\n */\nexport async function createVAD(options?: VADOptions): Promise<SileroVAD> {\n const vad = new SileroVAD(options);\n await vad.init();\n return vad;\n}\n\n// Re-export the frame size constant so consumers can align their buffers.\nexport { FRAME_SIZE, TARGET_SAMPLE_RATE };\n\n// Type-only declaration for environments that provide webkitAudioContext.\ndeclare global {\n // eslint-disable-next-line no-var\n var webkitAudioContext: typeof AudioContext | undefined;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AACA,UAAqB;AAEd,IAAM,cAAc;AAM3B,IAAM,aAAa;AAGnB,IAAM,oBACJ;AAGF,IAAM,aAAa,iBAAiB,WAAW;AAC/C,IAAM,kBAAkB;AAGxB,IAAM,aAAa;AAGnB,IAAM,qBAAqB;AAG3B,IAAM,0BAA0B;AAGhC,IAAM,aAAa;AAmCnB,eAAe,qBAAkD;AAC/D,MAAI,OAAO,WAAW,YAAa,QAAO;AAC1C,MAAI;AACF,UAAM,QAAQ,MAAM,OAAO,KAAK,UAAU;AAC1C,UAAM,WAAW,MAAM,MAAM,MAAM,eAAe;AAClD,WAAO,WAAW,SAAS,YAAY,IAAI;AAAA,EAC7C,QAAQ;AAEN,WAAO;AAAA,EACT;AACF;AAEA,eAAe,iBAAiB,MAAkC;AAChE,MAAI,OAAO,WAAW,YAAa;AACnC,MAAI;AACF,UAAM,QAAQ,MAAM,OAAO,KAAK,UAAU;AAC1C,UAAM,MAAM,IAAI,iBAAiB,IAAI,SAAS,IAAI,CAAC;AAAA,EACrD,QAAQ;AAAA,EAER;AACF;AAUA,SAAS,SACP,OACA,WACA,YACc;AACd,MAAI,cAAc,WAAY,QAAO;AACrC,QAAM,QAAQ,YAAY;AAC1B,QAAM,eAAe,KAAK,MAAM,MAAM,SAAS,KAAK;AACpD,QAAM,SAAS,IAAI,aAAa,YAAY;AAC5C,WAAS,IAAI,GAAG,IAAI,cAAc,KAAK;AACrC,UAAM,WAAW,IAAI;AACrB,UAAM,WAAW,KAAK,MAAM,QAAQ;AACpC,UAAM,UAAU,KAAK,IAAI,WAAW,GAAG,MAAM,SAAS,CAAC;AACvD,UAAM,OAAO,WAAW;AACxB,WAAO,CAAC,IAAK,MAAM,QAAQ,KAAgB,IAAI,QAAS,MAAM,OAAO,IAAe;AAAA,EACtF;AACA,SAAO;AACT;AAMO,IAAM,YAAN,MAAgB;AAAA;AAAA,EAEJ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGT,WAAwC;AAAA,EACxC,KAAwB;AAAA,EACxB,KAAwB;AAAA;AAAA,EAGxB,gBAAqC;AAAA,EACrC,oBAAoB;AAAA,EACpB,cAAiD;AAAA,EACjD,eAA8D;AAAA,EAC9D,UAA8B;AAAA;AAAA,EAG9B,eAA6B,IAAI,aAAa,CAAC;AAAA,EAC/C,qBAAqB;AAAA;AAAA,EAGrB,WAAW;AAAA,EACX,cAAc;AAAA,EACd,aAAa;AAAA,EACb,eAAe;AAAA;AAAA,EAGf,2BAA2B;AAAA,EAC3B,4BAA4B;AAAA,EAC5B;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA,iBAAiB;AAAA,EACjB,sBAAgC,CAAC;AAAA,EACjC,2BAA2B;AAAA,EAC3B;AAAA;AAAA,EAGA,aAAkD,oBAAI,IAAI;AAAA;AAAA,EAG1D,qBAAoC,QAAQ,QAAQ;AAAA,EAE5D,YAAY,SAAsB;AAChC,SAAK,aAAa,SAAS,aAAa;AACxC,SAAK,uBAAuB,SAAS,uBAAuB;AAC5D,SAAK,qBAAqB,SAAS,qBAAqB;AACxD,SAAK,cAAc,SAAS,cAAc;AAC1C,SAAK,SAAS,SAAS,SAAS;AAChC,SAAK,YAAY,SAAS,YAAY;AACtC,SAAK,uBAAuB,KAAK;AAGjC,SAAK,mBAAoB,aAAa,KAAK,cAAe;AAC1D,SAAK,mBAAmB,KAAK,KAAK,KAAK,uBAAuB,KAAK,gBAAgB;AACnF,SAAK,iBAAiB,KAAK,KAAK,KAAK,qBAAqB,KAAK,gBAAgB;AAE/E,SAAK,KAAK,wBAAwB;AAAA,MAChC,WAAW,KAAK;AAAA,MAChB,qBAAqB,KAAK;AAAA,MAC1B,mBAAmB,KAAK;AAAA,MACxB,YAAY,KAAK;AAAA,MACjB,UAAU,KAAK;AAAA,IACjB,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,OAAsB;AAC1B,QAAI,KAAK,cAAc;AACrB,YAAM,IAAI,MAAM,GAAG,UAAU,4BAA4B;AAAA,IAC3D;AACA,QAAI,KAAK,UAAU;AACjB,WAAK,KAAK,qCAAgC;AAC1C;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB;AAG3B,QAAI,cAAc,MAAM,mBAAmB;AAC3C,QAAI,aAAa;AACf,WAAK,KAAK,6BAA6B;AAAA,IACzC,OAAO;AACL,WAAK,KAAK,uBAAuB,KAAK,SAAS;AAC/C,YAAM,WAAW,MAAM,MAAM,KAAK,SAAS;AAC3C,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,IAAI;AAAA,UACR,GAAG,UAAU,2BAA2B,SAAS,MAAM,IAAI,SAAS,UAAU;AAAA,QAChF;AAAA,MACF;AACA,oBAAc,MAAM,SAAS,YAAY;AACzC,WAAK,KAAK,wBAAwB,YAAY,YAAY,OAAO;AAGjE,YAAM,iBAAiB,WAAW;AAClC,WAAK,KAAK,0BAA0B;AAAA,IACtC;AAGA,SAAK,WAAW,MAAU,qBAAiB,OAAO,aAAa;AAAA,MAC7D,oBAAoB,CAAC,MAAM;AAAA,MAC3B,wBAAwB;AAAA,IAC1B,CAAC;AAGD,SAAK,aAAa;AAElB,SAAK,WAAW;AAChB,SAAK,KAAK,wBAAwB;AAElC,SAAK,MAAM;AAAA,MACT,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,IACtB,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,aAAa,WAA0C;AAC3D,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,GAAG,UAAU,uCAAuC;AAAA,IACtE;AACA,QAAI,UAAU,WAAW,YAAY;AACnC,YAAM,IAAI;AAAA,QACR,GAAG,UAAU,aAAa,UAAU,iBAAiB,UAAU,MAAM;AAAA,MACvE;AAAA,IACF;AAEA,UAAM,cAAc,IAAQ,WAAO,WAAW,WAAW,CAAC,GAAG,UAAU,CAAC;AACxE,UAAM,WAAW,IAAQ,WAAO,SAAS,cAAc,KAAK,CAAC,OAAO,KAAK,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;AAE5F,UAAM,QAAoC;AAAA,MACxC,OAAO;AAAA,MACP,IAAI;AAAA,MACJ,GAAG,KAAK;AAAA,MACR,GAAG,KAAK;AAAA,IACV;AAEA,UAAM,UAAU,MAAM,KAAK,SAAS,IAAI,KAAK;AAG7C,SAAK,KAAK,QAAQ,IAAI;AACtB,SAAK,KAAK,QAAQ,IAAI;AAEtB,UAAM,cAAe,QAAQ,QAAQ,EAAiB,KAAK,CAAC;AAC5D,WAAO;AAAA,EACT;AAAA;AAAA,EAGA,MAAM,QAA2B;AAC/B,QAAI,KAAK,cAAc;AACrB,YAAM,IAAI,MAAM,GAAG,UAAU,6BAA6B;AAAA,IAC5D;AACA,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,GAAG,UAAU,uCAAuC;AAAA,IACtE;AACA,QAAI,KAAK,YAAY;AACnB,WAAK,KAAK,wDAAmD;AAC7D,WAAK,KAAK;AAAA,IACZ;AAEA,SAAK,KAAK,6BAA6B;AACvC,SAAK,UAAU;AACf,SAAK,aAAa;AAGlB,SAAK,cAAc;AACnB,SAAK,2BAA2B;AAChC,SAAK,4BAA4B;AACjC,SAAK,eAAe,IAAI,aAAa,UAAU;AAC/C,SAAK,qBAAqB;AAG1B,SAAK,aAAa;AAGlB,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAC5B,SAAK,2BAA2B,KAAK,IAAI,GAAG,KAAK;AAAA,MAC9C,0BAA0B,MAAQ,KAAK,cAAc;AAAA,IACxD,CAAC;AACD,SAAK,KAAK,+BAA+B,KAAK,0BAA0B,QAAQ;AAGhF,SAAK,oBAAoB,MAAM;AAAA,EACjC;AAAA;AAAA,EAGA,OAAa;AACX,QAAI,CAAC,KAAK,WAAY;AAEtB,SAAK,KAAK,cAAc;AAGxB,SAAK,uBAAuB;AAG5B,QAAI,KAAK,aAAa;AACpB,WAAK,cAAc;AACnB,WAAK,MAAM;AAAA,QACT,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,aAAa;AAAA,MACf,CAAC;AAAA,IACH;AAGA,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,2BAA2B;AAChC,SAAK,4BAA4B;AACjC,SAAK,qBAAqB;AAC1B,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAC5B,SAAK,qBAAqB,QAAQ,QAAQ;AAE1C,SAAK,KAAK,aAAa;AAAA,EACzB;AAAA;AAAA,EAGA,cAAc,UAAmC;AAC/C,WAAO,KAAK,IAAI,gBAAgB,QAAQ;AAAA,EAC1C;AAAA;AAAA,EAGA,YAAY,UAAmC;AAC7C,WAAO,KAAK,IAAI,cAAc,QAAQ;AAAA,EACxC;AAAA;AAAA,EAGA,QAAQ,UAAmC;AACzC,WAAO,KAAK,IAAI,aAAa,QAAQ;AAAA,EACvC;AAAA;AAAA,EAGA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,aAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,SAA6B;AAC/B,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAyB;AAC7B,QAAI,KAAK,aAAc;AACvB,SAAK,KAAK,eAAe;AAEzB,SAAK,KAAK;AAEV,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,SAAS,QAAQ;AAC5B,WAAK,WAAW;AAAA,IAClB;AAGA,SAAK,IAAI,QAAQ;AACjB,SAAK,IAAI,QAAQ;AACjB,SAAK,KAAK;AACV,SAAK,KAAK;AAEV,SAAK,WAAW;AAChB,SAAK,eAAe;AACpB,SAAK,WAAW,MAAM;AAEtB,SAAK,KAAK,WAAW;AAAA,EACvB;AAAA;AAAA;AAAA;AAAA,EAMQ,IAAI,MAAoB,UAAmC;AACjE,QAAI,MAAM,KAAK,WAAW,IAAI,IAAI;AAClC,QAAI,CAAC,KAAK;AACR,YAAM,oBAAI,IAAI;AACd,WAAK,WAAW,IAAI,MAAM,GAAG;AAAA,IAC/B;AACA,QAAI,IAAI,QAAQ;AAChB,WAAO,MAAM;AACX,UAAK,OAAO,QAAQ;AAAA,IACtB;AAAA,EACF;AAAA,EAEQ,MAAM,OAAuB;AACnC,UAAM,MAAM,KAAK,WAAW,IAAI,MAAM,IAAI;AAC1C,QAAI,CAAC,IAAK;AACV,eAAW,MAAM,KAAK;AACpB,UAAI;AACF,WAAG,KAAK;AAAA,MACV,SAAS,KAAK;AAEZ,gBAAQ,MAAM,GAAG,UAAU,aAAa,MAAM,IAAI,cAAc,GAAG;AAAA,MACrE;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAMQ,oBAAoB,QAA2B;AAErD,UAAM,SAAS,OAAO,eAAe;AACrC,UAAM,gBAAgB,OAAO,CAAC,GAAG,YAAY;AAC7C,UAAM,kBAAkB,eAAe,cAAc;AAErD,SAAK,KAAK,sBAAsB,eAAe;AAI/C,QAAI,OAAO,iBAAiB,eAAe,OAAO,uBAAuB,aAAa;AACpF,YAAM,IAAI,MAAM,GAAG,UAAU,oDAAoD;AAAA,IACnF;AAEA,UAAM,oBACJ,OAAO,iBAAiB,cACpB;AAAA;AAAA,MAEC,WAAmB;AAAA;AAE1B,SAAK,gBAAgB,IAAI,kBAAkB,EAAE,YAAY,gBAAgB,CAAC;AAC1E,SAAK,oBAAoB;AAEzB,SAAK,cAAc,KAAK,cAAc,wBAAwB,MAAM;AAGpE,SAAK,sBAAsB,eAAe;AAAA,EAC5C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,sBAAsB,iBAA+B;AAC3D,QAAI,CAAC,KAAK,iBAAiB,CAAC,KAAK,YAAa;AAG9C,UAAM,aAAa;AACnB,UAAM,YAAY,KAAK,cAAc,sBAAsB,YAAY,GAAG,CAAC;AAE3E,cAAU,iBAAiB,CAAC,UAAgC;AAC1D,UAAI,CAAC,KAAK,WAAY;AAEtB,YAAM,YAAY,MAAM,YAAY,eAAe,CAAC;AAGpD,YAAM,YACJ,oBAAoB,KAAK,cACrB,SAAS,WAAW,iBAAiB,KAAK,WAAW,IACrD,IAAI,aAAa,SAAS;AAGhC,WAAK,WAAW,SAAS;AAAA,IAC3B;AAEA,SAAK,YAAY,QAAQ,SAAS;AAClC,cAAU,QAAQ,KAAK,cAAc,WAAW;AAChD,SAAK,eAAe;AAEpB,SAAK,KAAK,6CAA6C;AAAA,EACzD;AAAA;AAAA;AAAA;AAAA,EAKQ,WAAW,SAA6B;AAC9C,QAAI,SAAS;AAEb,WAAO,SAAS,QAAQ,QAAQ;AAC9B,YAAM,YAAY,aAAa,KAAK;AACpC,YAAM,YAAY,QAAQ,SAAS;AACnC,YAAM,SAAS,KAAK,IAAI,WAAW,SAAS;AAE5C,WAAK,aAAa;AAAA,QAChB,QAAQ,SAAS,QAAQ,SAAS,MAAM;AAAA,QACxC,KAAK;AAAA,MACP;AACA,WAAK,sBAAsB;AAC3B,gBAAU;AAEV,UAAI,KAAK,uBAAuB,YAAY;AAC1C,cAAM,QAAQ,IAAI,aAAa,KAAK,YAAY;AAChD,aAAK,qBAAqB;AAG1B,aAAK,qBAAqB,KAAK,mBAAmB;AAAA,UAAK,MACrD,KAAK,aAAa,KAAK;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,aAAa,OAAoC;AAC7D,QAAI,CAAC,KAAK,cAAc,CAAC,KAAK,SAAU;AAExC,QAAI;AACJ,QAAI;AACF,oBAAc,MAAM,KAAK,aAAa,KAAK;AAAA,IAC7C,SAAS,KAAK;AACZ,UAAI,KAAK,QAAQ;AAEf,gBAAQ,MAAM,GAAG,UAAU,qBAAqB,GAAG;AAAA,MACrD;AACA;AAAA,IACF;AAGA,QAAI,KAAK,gBAAgB;AACvB,WAAK,oBAAoB,KAAK,WAAW;AAEzC,UAAI,KAAK,oBAAoB,UAAU,KAAK,0BAA0B;AACpE,aAAK,mBAAmB;AAAA,MAC1B;AACA;AAAA,IACF;AAGA,UAAM,gBAAgB,eAAe,KAAK;AAE1C,QAAI,eAAe;AACjB,WAAK;AACL,WAAK,4BAA4B;AAEjC,UAAI,CAAC,KAAK,eAAe,KAAK,4BAA4B,KAAK,kBAAkB;AAC/E,aAAK,cAAc;AACnB,aAAK,KAAK,gCAAgC,YAAY,QAAQ,CAAC,CAAC;AAChE,aAAK,MAAM;AAAA,UACT,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,OAAO;AACL,WAAK;AAGL,UAAI,KAAK,eAAe,KAAK,6BAA6B,KAAK,gBAAgB;AAC7E,aAAK,cAAc;AACnB,aAAK,2BAA2B;AAChC,aAAK,KAAK,8BAA8B,YAAY,QAAQ,CAAC,CAAC;AAC9D,aAAK,MAAM;AAAA,UACT,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,qBAA2B;AACjC,QAAI,KAAK,oBAAoB,WAAW,GAAG;AACzC,WAAK,iBAAiB;AACtB;AAAA,IACF;AAGA,UAAM,MAAM,KAAK,oBAAoB,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC;AAC9D,UAAM,WAAW,MAAM,KAAK,oBAAoB;AAIhD,UAAM,eAAe;AACrB,QAAI,WAAW,eAAe,KAAK,YAAY;AAC7C,WAAK,uBAAuB,KAAK,IAAI,WAAW,cAAc,IAAI;AAClE,WAAK;AAAA,QACH;AAAA,QACA,KAAK,WAAW,QAAQ,CAAC;AAAA,QACzB;AAAA,QACA,KAAK,qBAAqB,QAAQ,CAAC;AAAA,QACnC;AAAA,QACA,SAAS,QAAQ,CAAC,IAAI;AAAA,MACxB;AAAA,IACF,OAAO;AACL,WAAK,uBAAuB,KAAK;AACjC,WAAK,KAAK,wBAAwB,SAAS,QAAQ,CAAC,GAAG,+BAA0B,KAAK,WAAW,QAAQ,CAAC,CAAC;AAAA,IAC7G;AAGA,SAAK,mBAAmB,KAAK,KAAK,KAAK,uBAAuB,KAAK,gBAAgB;AACnF,SAAK,iBAAiB,KAAK,KAAK,KAAK,qBAAqB,KAAK,gBAAgB;AAE/E,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAAA,EAC9B;AAAA,EAEQ,yBAA+B;AACrC,QAAI,KAAK,cAAc;AACrB,UAAI;AACF,aAAK,aAAa,WAAW;AAAA,MAC/B,QAAQ;AAAA,MAER;AACA,UAAI,oBAAoB,KAAK,cAAc;AACzC,QAAC,KAAK,aAAqC,iBAAiB;AAAA,MAC9D;AACA,WAAK,eAAe;AAAA,IACtB;AAEA,QAAI,KAAK,aAAa;AACpB,UAAI;AACF,aAAK,YAAY,WAAW;AAAA,MAC9B,QAAQ;AAAA,MAER;AACA,WAAK,cAAc;AAAA,IACrB;AAEA,QAAI,KAAK,iBAAiB,KAAK,mBAAmB;AAChD,UAAI;AACF,aAAK,KAAK,cAAc,MAAM;AAAA,MAChC,QAAQ;AAAA,MAER;AACA,WAAK,gBAAgB;AACrB,WAAK,oBAAoB;AAAA,IAC3B;AAEA,SAAK,UAAU;AAAA,EACjB;AAAA;AAAA;AAAA;AAAA;AAAA,EAOQ,eAAqB;AAE3B,SAAK,IAAI,QAAQ;AACjB,SAAK,IAAI,QAAQ;AAEjB,UAAM,QAAQ,IAAI,aAAa,IAAI,UAAU,EAAE,KAAK,CAAC;AACrD,SAAK,KAAK,IAAQ,WAAO,WAAW,MAAM,MAAM,GAAG,UAAU,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC;AAC1E,SAAK,KAAK,IAAQ,WAAO,WAAW,MAAM,MAAM,UAAU,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC;AAAA,EACzE;AAAA;AAAA;AAAA;AAAA,EAMQ,QAAQ,MAAuB;AACrC,QAAI,CAAC,KAAK,OAAQ;AAElB,YAAQ,IAAI,YAAY,GAAG,IAAI;AAAA,EACjC;AACF;AAeA,eAAsB,UAAU,SAA0C;AACxE,QAAM,MAAM,IAAI,UAAU,OAAO;AACjC,QAAM,IAAI,KAAK;AACf,SAAO;AACT;","names":[]}
|
package/dist/index.d.cts
CHANGED
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// src/index.ts
|
|
2
2
|
import * as ort from "onnxruntime-web";
|
|
3
|
-
var VAD_VERSION = "0.1.0";
|
|
3
|
+
var VAD_VERSION = "0.1.0-beta.2";
|
|
4
4
|
var LOG_PREFIX = "[GuideKit:VAD]";
|
|
5
5
|
var DEFAULT_MODEL_URL = "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.20/dist/silero_vad_v5.onnx";
|
|
6
6
|
var CACHE_NAME = `guidekit-vad-v${VAD_VERSION}`;
|
|
@@ -192,9 +192,9 @@ var SileroVAD = class {
|
|
|
192
192
|
this._resetStates();
|
|
193
193
|
this._isCalibrating = true;
|
|
194
194
|
this._calibrationSamples = [];
|
|
195
|
-
this._calibrationFramesNeeded = Math.
|
|
195
|
+
this._calibrationFramesNeeded = Math.max(1, Math.floor(
|
|
196
196
|
CALIBRATION_DURATION_MS / 1e3 * this._sampleRate / FRAME_SIZE
|
|
197
|
-
);
|
|
197
|
+
));
|
|
198
198
|
this._log("Calibrating noise floor for", this._calibrationFramesNeeded, "frames");
|
|
199
199
|
this._setupAudioPipeline(stream);
|
|
200
200
|
}
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// @guidekit/vad — Silero VAD ONNX model wrapper for voice activity detection\nimport * as ort from 'onnxruntime-web';\n\nexport const VAD_VERSION = '0.1.0';\n\n// ---------------------------------------------------------------------------\n// Constants\n// ---------------------------------------------------------------------------\n\nconst LOG_PREFIX = '[GuideKit:VAD]';\n\n/** Default CDN URL for the Silero VAD ONNX model (v5). */\nconst DEFAULT_MODEL_URL =\n 'https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.20/dist/silero_vad_v5.onnx';\n\n/** Cache API key used for persisting the downloaded ONNX model. */\nconst CACHE_NAME = `guidekit-vad-v${VAD_VERSION}`;\nconst CACHE_MODEL_KEY = 'model.onnx';\n\n/** Silero VAD frame size: 512 samples at 16 kHz = 32 ms per frame. */\nconst FRAME_SIZE = 512;\n\n/** Target sample rate for VAD processing. */\nconst TARGET_SAMPLE_RATE = 16000;\n\n/** Duration (in ms) of audio collected for noise floor calibration. */\nconst CALIBRATION_DURATION_MS = 500;\n\n/** Hidden/cell state size for Silero VAD v5 LSTM. */\nconst STATE_SIZE = 128;\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\nexport interface VADOptions {\n /** Speech probability threshold (0-1). Default: 0.5 */\n threshold?: number;\n /** Minimum speech duration in ms to trigger start. Default: 300 */\n minSpeechDurationMs?: number;\n /** Silence duration in ms after speech to trigger end. Default: 500 */\n silenceDurationMs?: number;\n /** Sample rate. Default: 16000 */\n sampleRate?: number;\n /** Enable debug logging. Default: false */\n debug?: boolean;\n /** Custom URL for the Silero VAD ONNX model file. */\n modelUrl?: string;\n}\n\nexport interface VADEvent {\n type: 'speech-start' | 'speech-end' | 'vad-ready';\n timestamp: number;\n /** Speech probability (0-1) at the moment of the event. */\n probability?: number;\n}\n\ntype VADEventType = VADEvent['type'];\ntype VADCallback = (event: VADEvent) => void;\n\n// ---------------------------------------------------------------------------\n// Utility: Cache API helpers\n// ---------------------------------------------------------------------------\n\nasync function loadModelFromCache(): Promise<ArrayBuffer | null> {\n if (typeof caches === 'undefined') return null;\n try {\n const cache = await caches.open(CACHE_NAME);\n const response = await cache.match(CACHE_MODEL_KEY);\n return response ? response.arrayBuffer() : null;\n } catch {\n // Cache API may be unavailable in certain contexts (e.g. opaque origins).\n return null;\n }\n}\n\nasync function saveModelToCache(data: ArrayBuffer): Promise<void> {\n if (typeof caches === 'undefined') return;\n try {\n const cache = await caches.open(CACHE_NAME);\n await cache.put(CACHE_MODEL_KEY, new Response(data));\n } catch {\n // Silently ignore cache write failures.\n }\n}\n\n// ---------------------------------------------------------------------------\n// Utility: Resampler\n// ---------------------------------------------------------------------------\n\n/**\n * Simple linear-interpolation resampler from `inputRate` to `outputRate`.\n * Adequate for VAD where perceptual audio quality is irrelevant.\n */\nfunction resample(\n input: Float32Array,\n inputRate: number,\n outputRate: number,\n): Float32Array {\n if (inputRate === outputRate) return input;\n const ratio = inputRate / outputRate;\n const outputLength = Math.round(input.length / ratio);\n const output = new Float32Array(outputLength);\n for (let i = 0; i < outputLength; i++) {\n const srcIndex = i * ratio;\n const srcFloor = Math.floor(srcIndex);\n const srcCeil = Math.min(srcFloor + 1, input.length - 1);\n const frac = srcIndex - srcFloor;\n output[i] = (input[srcFloor] as number) * (1 - frac) + (input[srcCeil] as number) * frac;\n }\n return output;\n}\n\n// ---------------------------------------------------------------------------\n// SileroVAD\n// ---------------------------------------------------------------------------\n\nexport class SileroVAD {\n // Options (resolved with defaults)\n private readonly _threshold: number;\n private readonly _minSpeechDurationMs: number;\n private readonly _silenceDurationMs: number;\n private readonly _sampleRate: number;\n private readonly _debug: boolean;\n private readonly _modelUrl: string;\n\n // ONNX Runtime state\n private _session: ort.InferenceSession | null = null;\n private _h: ort.Tensor | null = null;\n private _c: ort.Tensor | null = null;\n\n // Audio pipeline\n private _audioContext: AudioContext | null = null;\n private _ownsAudioContext = false;\n private _sourceNode: MediaStreamAudioSourceNode | null = null;\n private _workletNode: AudioWorkletNode | ScriptProcessorNode | null = null;\n private _stream: MediaStream | null = null;\n\n // Frame buffer for accumulating resampled samples into FRAME_SIZE chunks\n private _frameBuffer: Float32Array = new Float32Array(0);\n private _frameBufferOffset = 0;\n\n // State tracking\n private _isReady = false;\n private _isSpeaking = false;\n private _isStarted = false;\n private _isDestroyed = false;\n\n // Duration tracking (in frames)\n private _consecutiveSpeechFrames = 0;\n private _consecutiveSilenceFrames = 0;\n private _frameDurationMs: number;\n private _minSpeechFrames: number;\n private _silenceFrames: number;\n\n // Noise floor calibration\n private _isCalibrating = false;\n private _calibrationSamples: number[] = [];\n private _calibrationFramesNeeded = 0;\n private _calibratedThreshold: number;\n\n // Event listeners\n private _listeners: Map<VADEventType, Set<VADCallback>> = new Map();\n\n // Processing lock to serialise frame inference\n private _processingPromise: Promise<void> = Promise.resolve();\n\n constructor(options?: VADOptions) {\n this._threshold = options?.threshold ?? 0.5;\n this._minSpeechDurationMs = options?.minSpeechDurationMs ?? 300;\n this._silenceDurationMs = options?.silenceDurationMs ?? 500;\n this._sampleRate = options?.sampleRate ?? TARGET_SAMPLE_RATE;\n this._debug = options?.debug ?? false;\n this._modelUrl = options?.modelUrl ?? DEFAULT_MODEL_URL;\n this._calibratedThreshold = this._threshold;\n\n // Pre-compute frame-duration-based counters\n this._frameDurationMs = (FRAME_SIZE / this._sampleRate) * 1000;\n this._minSpeechFrames = Math.ceil(this._minSpeechDurationMs / this._frameDurationMs);\n this._silenceFrames = Math.ceil(this._silenceDurationMs / this._frameDurationMs);\n\n this._log('Created with options', {\n threshold: this._threshold,\n minSpeechDurationMs: this._minSpeechDurationMs,\n silenceDurationMs: this._silenceDurationMs,\n sampleRate: this._sampleRate,\n modelUrl: this._modelUrl,\n });\n }\n\n // -------------------------------------------------------------------------\n // Public API\n // -------------------------------------------------------------------------\n\n /** Load the ONNX model. Uses Cache API for persistence across sessions. */\n async init(): Promise<void> {\n if (this._isDestroyed) {\n throw new Error(`${LOG_PREFIX} Cannot init after destroy`);\n }\n if (this._isReady) {\n this._log('Already initialised — skipping');\n return;\n }\n\n this._log('Initialising...');\n\n // 1. Attempt to load model bytes from cache, falling back to network.\n let modelBuffer = await loadModelFromCache();\n if (modelBuffer) {\n this._log('Loaded model from Cache API');\n } else {\n this._log('Fetching model from', this._modelUrl);\n const response = await fetch(this._modelUrl);\n if (!response.ok) {\n throw new Error(\n `${LOG_PREFIX} Failed to fetch model: ${response.status} ${response.statusText}`,\n );\n }\n modelBuffer = await response.arrayBuffer();\n this._log('Model fetched, size:', modelBuffer.byteLength, 'bytes');\n\n // Persist to Cache API for next time.\n await saveModelToCache(modelBuffer);\n this._log('Model saved to Cache API');\n }\n\n // 2. Create ONNX InferenceSession.\n this._session = await ort.InferenceSession.create(modelBuffer, {\n executionProviders: ['wasm'],\n graphOptimizationLevel: 'all',\n });\n\n // 3. Initialise LSTM hidden/cell state tensors (zeros).\n this._resetStates();\n\n this._isReady = true;\n this._log('Model loaded and ready');\n\n this._emit({\n type: 'vad-ready',\n timestamp: Date.now(),\n });\n }\n\n /**\n * Process a single audio frame (512 samples at 16 kHz).\n * Returns the speech probability (0-1).\n */\n async processFrame(audioData: Float32Array): Promise<number> {\n if (!this._session) {\n throw new Error(`${LOG_PREFIX} Model not loaded. Call init() first.`);\n }\n if (audioData.length !== FRAME_SIZE) {\n throw new Error(\n `${LOG_PREFIX} Expected ${FRAME_SIZE} samples, got ${audioData.length}`,\n );\n }\n\n const inputTensor = new ort.Tensor('float32', audioData, [1, FRAME_SIZE]);\n const srTensor = new ort.Tensor('int64', BigInt64Array.from([BigInt(this._sampleRate)]), [1]);\n\n const feeds: Record<string, ort.Tensor> = {\n input: inputTensor,\n sr: srTensor,\n h: this._h!,\n c: this._c!,\n };\n\n const results = await this._session.run(feeds);\n\n // Update LSTM hidden/cell states for the next frame.\n this._h = results['hn'] as ort.Tensor;\n this._c = results['cn'] as ort.Tensor;\n\n const probability = (results['output'] as ort.Tensor).data[0] as number;\n return probability;\n }\n\n /** Start VAD processing on a MediaStream (typically from getUserMedia). */\n start(stream: MediaStream): void {\n if (this._isDestroyed) {\n throw new Error(`${LOG_PREFIX} Cannot start after destroy`);\n }\n if (!this._isReady) {\n throw new Error(`${LOG_PREFIX} Model not loaded. Call init() first.`);\n }\n if (this._isStarted) {\n this._log('Already started — stopping previous session first');\n this.stop();\n }\n\n this._log('Starting VAD on MediaStream');\n this._stream = stream;\n this._isStarted = true;\n\n // Reset speech tracking state.\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._consecutiveSilenceFrames = 0;\n this._frameBuffer = new Float32Array(FRAME_SIZE);\n this._frameBufferOffset = 0;\n\n // Reset LSTM states for a fresh stream.\n this._resetStates();\n\n // Begin noise floor calibration.\n this._isCalibrating = true;\n this._calibrationSamples = [];\n this._calibrationFramesNeeded = Math.ceil(\n (CALIBRATION_DURATION_MS / 1000) * this._sampleRate / FRAME_SIZE,\n );\n this._log('Calibrating noise floor for', this._calibrationFramesNeeded, 'frames');\n\n // Build the audio processing pipeline.\n this._setupAudioPipeline(stream);\n }\n\n /** Stop VAD processing and release audio resources (but keep the model). */\n stop(): void {\n if (!this._isStarted) return;\n\n this._log('Stopping VAD');\n\n // Tear down audio nodes.\n this._teardownAudioPipeline();\n\n // If we were speaking, emit speech-end.\n if (this._isSpeaking) {\n this._isSpeaking = false;\n this._emit({\n type: 'speech-end',\n timestamp: Date.now(),\n probability: 0,\n });\n }\n\n // Reset state.\n this._isStarted = false;\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._consecutiveSilenceFrames = 0;\n this._frameBufferOffset = 0;\n this._isCalibrating = false;\n this._calibrationSamples = [];\n this._processingPromise = Promise.resolve();\n\n this._log('VAD stopped');\n }\n\n /** Register a callback for speech-start events. Returns an unsubscribe function. */\n onSpeechStart(callback: VADCallback): () => void {\n return this._on('speech-start', callback);\n }\n\n /** Register a callback for speech-end events. Returns an unsubscribe function. */\n onSpeechEnd(callback: VADCallback): () => void {\n return this._on('speech-end', callback);\n }\n\n /** Register a callback for vad-ready events. Returns an unsubscribe function. */\n onReady(callback: VADCallback): () => void {\n return this._on('vad-ready', callback);\n }\n\n /** Whether the ONNX model is loaded and ready. */\n get isReady(): boolean {\n return this._isReady;\n }\n\n /** Whether speech is currently detected. */\n get isSpeaking(): boolean {\n return this._isSpeaking;\n }\n\n /** The MediaStream currently being processed, or null. */\n get stream(): MediaStream | null {\n return this._stream;\n }\n\n /** Release ONNX model session and all audio resources. */\n async destroy(): Promise<void> {\n if (this._isDestroyed) return;\n this._log('Destroying...');\n\n this.stop();\n\n if (this._session) {\n await this._session.release();\n this._session = null;\n }\n\n // Dispose tensors.\n this._h?.dispose();\n this._c?.dispose();\n this._h = null;\n this._c = null;\n\n this._isReady = false;\n this._isDestroyed = true;\n this._listeners.clear();\n\n this._log('Destroyed');\n }\n\n // -------------------------------------------------------------------------\n // Private: Event system\n // -------------------------------------------------------------------------\n\n private _on(type: VADEventType, callback: VADCallback): () => void {\n let set = this._listeners.get(type);\n if (!set) {\n set = new Set();\n this._listeners.set(type, set);\n }\n set.add(callback);\n return () => {\n set!.delete(callback);\n };\n }\n\n private _emit(event: VADEvent): void {\n const set = this._listeners.get(event.type);\n if (!set) return;\n for (const cb of set) {\n try {\n cb(event);\n } catch (err) {\n // eslint-disable-next-line no-console\n console.error(`${LOG_PREFIX} Error in ${event.type} callback:`, err);\n }\n }\n }\n\n // -------------------------------------------------------------------------\n // Private: Audio pipeline\n // -------------------------------------------------------------------------\n\n private _setupAudioPipeline(stream: MediaStream): void {\n // Determine the incoming sample rate.\n const tracks = stream.getAudioTracks();\n const trackSettings = tracks[0]?.getSettings();\n const inputSampleRate = trackSettings?.sampleRate ?? 48000;\n\n this._log('Input sample rate:', inputSampleRate);\n\n // Create AudioContext at the input sample rate so we don't double-resample.\n // SSR guard: AudioContext may not exist.\n if (typeof AudioContext === 'undefined' && typeof webkitAudioContext === 'undefined') {\n throw new Error(`${LOG_PREFIX} AudioContext is not available in this environment`);\n }\n\n const AudioContextClass =\n typeof AudioContext !== 'undefined'\n ? AudioContext\n : // eslint-disable-next-line @typescript-eslint/no-explicit-any\n (globalThis as any).webkitAudioContext as typeof AudioContext;\n\n this._audioContext = new AudioContextClass({ sampleRate: inputSampleRate });\n this._ownsAudioContext = true;\n\n this._sourceNode = this._audioContext.createMediaStreamSource(stream);\n\n // Try AudioWorklet first, fall back to ScriptProcessorNode.\n this._setupScriptProcessor(inputSampleRate);\n }\n\n /**\n * ScriptProcessorNode fallback (works everywhere, including Safari).\n * We use a buffer size of 4096 which gives ~85 ms of audio at 48 kHz.\n */\n private _setupScriptProcessor(inputSampleRate: number): void {\n if (!this._audioContext || !this._sourceNode) return;\n\n // Buffer size must be a power of 2: 256, 512, 1024, 2048, 4096, 8192, 16384.\n const bufferSize = 4096;\n const processor = this._audioContext.createScriptProcessor(bufferSize, 1, 1);\n\n processor.onaudioprocess = (event: AudioProcessingEvent) => {\n if (!this._isStarted) return;\n\n const inputData = event.inputBuffer.getChannelData(0);\n\n // Resample to target rate if needed.\n const resampled =\n inputSampleRate !== this._sampleRate\n ? resample(inputData, inputSampleRate, this._sampleRate)\n : new Float32Array(inputData);\n\n // Feed resampled audio into frame-sized chunks.\n this._feedAudio(resampled);\n };\n\n this._sourceNode.connect(processor);\n processor.connect(this._audioContext.destination);\n this._workletNode = processor;\n\n this._log('Audio pipeline set up (ScriptProcessorNode)');\n }\n\n /**\n * Accumulate resampled audio into FRAME_SIZE chunks and process each full frame.\n */\n private _feedAudio(samples: Float32Array): void {\n let offset = 0;\n\n while (offset < samples.length) {\n const remaining = FRAME_SIZE - this._frameBufferOffset;\n const available = samples.length - offset;\n const toCopy = Math.min(remaining, available);\n\n this._frameBuffer.set(\n samples.subarray(offset, offset + toCopy),\n this._frameBufferOffset,\n );\n this._frameBufferOffset += toCopy;\n offset += toCopy;\n\n if (this._frameBufferOffset === FRAME_SIZE) {\n const frame = new Float32Array(this._frameBuffer);\n this._frameBufferOffset = 0;\n\n // Serialise inference calls to avoid overlapping ONNX sessions.\n this._processingPromise = this._processingPromise.then(() =>\n this._handleFrame(frame),\n );\n }\n }\n }\n\n /**\n * Process a single FRAME_SIZE frame: run inference and update speech state.\n */\n private async _handleFrame(frame: Float32Array): Promise<void> {\n if (!this._isStarted || !this._session) return;\n\n let probability: number;\n try {\n probability = await this.processFrame(frame);\n } catch (err) {\n if (this._debug) {\n // eslint-disable-next-line no-console\n console.error(`${LOG_PREFIX} Inference error:`, err);\n }\n return;\n }\n\n // Noise floor calibration phase.\n if (this._isCalibrating) {\n this._calibrationSamples.push(probability);\n\n if (this._calibrationSamples.length >= this._calibrationFramesNeeded) {\n this._finishCalibration();\n }\n return;\n }\n\n // Speech state machine.\n const isSpeechFrame = probability >= this._calibratedThreshold;\n\n if (isSpeechFrame) {\n this._consecutiveSpeechFrames++;\n this._consecutiveSilenceFrames = 0;\n\n if (!this._isSpeaking && this._consecutiveSpeechFrames >= this._minSpeechFrames) {\n this._isSpeaking = true;\n this._log('Speech started, probability:', probability.toFixed(3));\n this._emit({\n type: 'speech-start',\n timestamp: Date.now(),\n probability,\n });\n }\n } else {\n this._consecutiveSilenceFrames++;\n // Do NOT reset _consecutiveSpeechFrames here — only reset when speech-end fires.\n\n if (this._isSpeaking && this._consecutiveSilenceFrames >= this._silenceFrames) {\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._log('Speech ended, probability:', probability.toFixed(3));\n this._emit({\n type: 'speech-end',\n timestamp: Date.now(),\n probability,\n });\n }\n }\n }\n\n private _finishCalibration(): void {\n if (this._calibrationSamples.length === 0) {\n this._isCalibrating = false;\n return;\n }\n\n // Compute average noise floor probability.\n const sum = this._calibrationSamples.reduce((a, b) => a + b, 0);\n const avgNoise = sum / this._calibrationSamples.length;\n\n // If the ambient noise floor is high, nudge the threshold above it.\n // We add a margin so we don't constantly trigger on background noise.\n const NOISE_MARGIN = 0.15;\n if (avgNoise + NOISE_MARGIN > this._threshold) {\n this._calibratedThreshold = Math.min(avgNoise + NOISE_MARGIN, 0.95);\n this._log(\n 'Noise floor is high. Adjusted threshold from',\n this._threshold.toFixed(3),\n 'to',\n this._calibratedThreshold.toFixed(3),\n '(avg noise:',\n avgNoise.toFixed(3) + ')',\n );\n } else {\n this._calibratedThreshold = this._threshold;\n this._log('Noise floor OK, avg:', avgNoise.toFixed(3), '— keeping threshold at', this._threshold.toFixed(3));\n }\n\n // Recompute frame counters in case threshold changed min speech behaviour.\n this._minSpeechFrames = Math.ceil(this._minSpeechDurationMs / this._frameDurationMs);\n this._silenceFrames = Math.ceil(this._silenceDurationMs / this._frameDurationMs);\n\n this._isCalibrating = false;\n this._calibrationSamples = [];\n }\n\n private _teardownAudioPipeline(): void {\n if (this._workletNode) {\n try {\n this._workletNode.disconnect();\n } catch {\n // Ignore disconnect errors.\n }\n if ('onaudioprocess' in this._workletNode) {\n (this._workletNode as ScriptProcessorNode).onaudioprocess = null;\n }\n this._workletNode = null;\n }\n\n if (this._sourceNode) {\n try {\n this._sourceNode.disconnect();\n } catch {\n // Ignore.\n }\n this._sourceNode = null;\n }\n\n if (this._audioContext && this._ownsAudioContext) {\n try {\n void this._audioContext.close();\n } catch {\n // Ignore.\n }\n this._audioContext = null;\n this._ownsAudioContext = false;\n }\n\n this._stream = null;\n }\n\n // -------------------------------------------------------------------------\n // Private: ONNX state helpers\n // -------------------------------------------------------------------------\n\n /** Reset the LSTM hidden and cell states to zeros. */\n private _resetStates(): void {\n // Dispose any existing tensors to free memory.\n this._h?.dispose();\n this._c?.dispose();\n\n const zeros = new Float32Array(2 * STATE_SIZE).fill(0);\n this._h = new ort.Tensor('float32', zeros.slice(0, STATE_SIZE), [2, 1, 64]);\n this._c = new ort.Tensor('float32', zeros.slice(STATE_SIZE), [2, 1, 64]);\n }\n\n // -------------------------------------------------------------------------\n // Private: Logging\n // -------------------------------------------------------------------------\n\n private _log(...args: unknown[]): void {\n if (!this._debug) return;\n // eslint-disable-next-line no-console\n console.log(LOG_PREFIX, ...args);\n }\n}\n\n// ---------------------------------------------------------------------------\n// Convenience factory\n// ---------------------------------------------------------------------------\n\n/**\n * Create and initialise a SileroVAD instance in one call.\n *\n * ```ts\n * const vad = await createVAD({ debug: true });\n * vad.onSpeechStart(() => console.log('speaking'));\n * vad.start(stream);\n * ```\n */\nexport async function createVAD(options?: VADOptions): Promise<SileroVAD> {\n const vad = new SileroVAD(options);\n await vad.init();\n return vad;\n}\n\n// Re-export the frame size constant so consumers can align their buffers.\nexport { FRAME_SIZE, TARGET_SAMPLE_RATE };\n\n// Type-only declaration for environments that provide webkitAudioContext.\ndeclare global {\n // eslint-disable-next-line no-var\n var webkitAudioContext: typeof AudioContext | undefined;\n}\n"],"mappings":";AACA,YAAY,SAAS;AAEd,IAAM,cAAc;AAM3B,IAAM,aAAa;AAGnB,IAAM,oBACJ;AAGF,IAAM,aAAa,iBAAiB,WAAW;AAC/C,IAAM,kBAAkB;AAGxB,IAAM,aAAa;AAGnB,IAAM,qBAAqB;AAG3B,IAAM,0BAA0B;AAGhC,IAAM,aAAa;AAmCnB,eAAe,qBAAkD;AAC/D,MAAI,OAAO,WAAW,YAAa,QAAO;AAC1C,MAAI;AACF,UAAM,QAAQ,MAAM,OAAO,KAAK,UAAU;AAC1C,UAAM,WAAW,MAAM,MAAM,MAAM,eAAe;AAClD,WAAO,WAAW,SAAS,YAAY,IAAI;AAAA,EAC7C,QAAQ;AAEN,WAAO;AAAA,EACT;AACF;AAEA,eAAe,iBAAiB,MAAkC;AAChE,MAAI,OAAO,WAAW,YAAa;AACnC,MAAI;AACF,UAAM,QAAQ,MAAM,OAAO,KAAK,UAAU;AAC1C,UAAM,MAAM,IAAI,iBAAiB,IAAI,SAAS,IAAI,CAAC;AAAA,EACrD,QAAQ;AAAA,EAER;AACF;AAUA,SAAS,SACP,OACA,WACA,YACc;AACd,MAAI,cAAc,WAAY,QAAO;AACrC,QAAM,QAAQ,YAAY;AAC1B,QAAM,eAAe,KAAK,MAAM,MAAM,SAAS,KAAK;AACpD,QAAM,SAAS,IAAI,aAAa,YAAY;AAC5C,WAAS,IAAI,GAAG,IAAI,cAAc,KAAK;AACrC,UAAM,WAAW,IAAI;AACrB,UAAM,WAAW,KAAK,MAAM,QAAQ;AACpC,UAAM,UAAU,KAAK,IAAI,WAAW,GAAG,MAAM,SAAS,CAAC;AACvD,UAAM,OAAO,WAAW;AACxB,WAAO,CAAC,IAAK,MAAM,QAAQ,KAAgB,IAAI,QAAS,MAAM,OAAO,IAAe;AAAA,EACtF;AACA,SAAO;AACT;AAMO,IAAM,YAAN,MAAgB;AAAA;AAAA,EAEJ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGT,WAAwC;AAAA,EACxC,KAAwB;AAAA,EACxB,KAAwB;AAAA;AAAA,EAGxB,gBAAqC;AAAA,EACrC,oBAAoB;AAAA,EACpB,cAAiD;AAAA,EACjD,eAA8D;AAAA,EAC9D,UAA8B;AAAA;AAAA,EAG9B,eAA6B,IAAI,aAAa,CAAC;AAAA,EAC/C,qBAAqB;AAAA;AAAA,EAGrB,WAAW;AAAA,EACX,cAAc;AAAA,EACd,aAAa;AAAA,EACb,eAAe;AAAA;AAAA,EAGf,2BAA2B;AAAA,EAC3B,4BAA4B;AAAA,EAC5B;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA,iBAAiB;AAAA,EACjB,sBAAgC,CAAC;AAAA,EACjC,2BAA2B;AAAA,EAC3B;AAAA;AAAA,EAGA,aAAkD,oBAAI,IAAI;AAAA;AAAA,EAG1D,qBAAoC,QAAQ,QAAQ;AAAA,EAE5D,YAAY,SAAsB;AAChC,SAAK,aAAa,SAAS,aAAa;AACxC,SAAK,uBAAuB,SAAS,uBAAuB;AAC5D,SAAK,qBAAqB,SAAS,qBAAqB;AACxD,SAAK,cAAc,SAAS,cAAc;AAC1C,SAAK,SAAS,SAAS,SAAS;AAChC,SAAK,YAAY,SAAS,YAAY;AACtC,SAAK,uBAAuB,KAAK;AAGjC,SAAK,mBAAoB,aAAa,KAAK,cAAe;AAC1D,SAAK,mBAAmB,KAAK,KAAK,KAAK,uBAAuB,KAAK,gBAAgB;AACnF,SAAK,iBAAiB,KAAK,KAAK,KAAK,qBAAqB,KAAK,gBAAgB;AAE/E,SAAK,KAAK,wBAAwB;AAAA,MAChC,WAAW,KAAK;AAAA,MAChB,qBAAqB,KAAK;AAAA,MAC1B,mBAAmB,KAAK;AAAA,MACxB,YAAY,KAAK;AAAA,MACjB,UAAU,KAAK;AAAA,IACjB,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,OAAsB;AAC1B,QAAI,KAAK,cAAc;AACrB,YAAM,IAAI,MAAM,GAAG,UAAU,4BAA4B;AAAA,IAC3D;AACA,QAAI,KAAK,UAAU;AACjB,WAAK,KAAK,qCAAgC;AAC1C;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB;AAG3B,QAAI,cAAc,MAAM,mBAAmB;AAC3C,QAAI,aAAa;AACf,WAAK,KAAK,6BAA6B;AAAA,IACzC,OAAO;AACL,WAAK,KAAK,uBAAuB,KAAK,SAAS;AAC/C,YAAM,WAAW,MAAM,MAAM,KAAK,SAAS;AAC3C,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,IAAI;AAAA,UACR,GAAG,UAAU,2BAA2B,SAAS,MAAM,IAAI,SAAS,UAAU;AAAA,QAChF;AAAA,MACF;AACA,oBAAc,MAAM,SAAS,YAAY;AACzC,WAAK,KAAK,wBAAwB,YAAY,YAAY,OAAO;AAGjE,YAAM,iBAAiB,WAAW;AAClC,WAAK,KAAK,0BAA0B;AAAA,IACtC;AAGA,SAAK,WAAW,MAAU,qBAAiB,OAAO,aAAa;AAAA,MAC7D,oBAAoB,CAAC,MAAM;AAAA,MAC3B,wBAAwB;AAAA,IAC1B,CAAC;AAGD,SAAK,aAAa;AAElB,SAAK,WAAW;AAChB,SAAK,KAAK,wBAAwB;AAElC,SAAK,MAAM;AAAA,MACT,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,IACtB,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,aAAa,WAA0C;AAC3D,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,GAAG,UAAU,uCAAuC;AAAA,IACtE;AACA,QAAI,UAAU,WAAW,YAAY;AACnC,YAAM,IAAI;AAAA,QACR,GAAG,UAAU,aAAa,UAAU,iBAAiB,UAAU,MAAM;AAAA,MACvE;AAAA,IACF;AAEA,UAAM,cAAc,IAAQ,WAAO,WAAW,WAAW,CAAC,GAAG,UAAU,CAAC;AACxE,UAAM,WAAW,IAAQ,WAAO,SAAS,cAAc,KAAK,CAAC,OAAO,KAAK,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;AAE5F,UAAM,QAAoC;AAAA,MACxC,OAAO;AAAA,MACP,IAAI;AAAA,MACJ,GAAG,KAAK;AAAA,MACR,GAAG,KAAK;AAAA,IACV;AAEA,UAAM,UAAU,MAAM,KAAK,SAAS,IAAI,KAAK;AAG7C,SAAK,KAAK,QAAQ,IAAI;AACtB,SAAK,KAAK,QAAQ,IAAI;AAEtB,UAAM,cAAe,QAAQ,QAAQ,EAAiB,KAAK,CAAC;AAC5D,WAAO;AAAA,EACT;AAAA;AAAA,EAGA,MAAM,QAA2B;AAC/B,QAAI,KAAK,cAAc;AACrB,YAAM,IAAI,MAAM,GAAG,UAAU,6BAA6B;AAAA,IAC5D;AACA,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,GAAG,UAAU,uCAAuC;AAAA,IACtE;AACA,QAAI,KAAK,YAAY;AACnB,WAAK,KAAK,wDAAmD;AAC7D,WAAK,KAAK;AAAA,IACZ;AAEA,SAAK,KAAK,6BAA6B;AACvC,SAAK,UAAU;AACf,SAAK,aAAa;AAGlB,SAAK,cAAc;AACnB,SAAK,2BAA2B;AAChC,SAAK,4BAA4B;AACjC,SAAK,eAAe,IAAI,aAAa,UAAU;AAC/C,SAAK,qBAAqB;AAG1B,SAAK,aAAa;AAGlB,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAC5B,SAAK,2BAA2B,KAAK;AAAA,MAClC,0BAA0B,MAAQ,KAAK,cAAc;AAAA,IACxD;AACA,SAAK,KAAK,+BAA+B,KAAK,0BAA0B,QAAQ;AAGhF,SAAK,oBAAoB,MAAM;AAAA,EACjC;AAAA;AAAA,EAGA,OAAa;AACX,QAAI,CAAC,KAAK,WAAY;AAEtB,SAAK,KAAK,cAAc;AAGxB,SAAK,uBAAuB;AAG5B,QAAI,KAAK,aAAa;AACpB,WAAK,cAAc;AACnB,WAAK,MAAM;AAAA,QACT,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,aAAa;AAAA,MACf,CAAC;AAAA,IACH;AAGA,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,2BAA2B;AAChC,SAAK,4BAA4B;AACjC,SAAK,qBAAqB;AAC1B,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAC5B,SAAK,qBAAqB,QAAQ,QAAQ;AAE1C,SAAK,KAAK,aAAa;AAAA,EACzB;AAAA;AAAA,EAGA,cAAc,UAAmC;AAC/C,WAAO,KAAK,IAAI,gBAAgB,QAAQ;AAAA,EAC1C;AAAA;AAAA,EAGA,YAAY,UAAmC;AAC7C,WAAO,KAAK,IAAI,cAAc,QAAQ;AAAA,EACxC;AAAA;AAAA,EAGA,QAAQ,UAAmC;AACzC,WAAO,KAAK,IAAI,aAAa,QAAQ;AAAA,EACvC;AAAA;AAAA,EAGA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,aAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,SAA6B;AAC/B,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAyB;AAC7B,QAAI,KAAK,aAAc;AACvB,SAAK,KAAK,eAAe;AAEzB,SAAK,KAAK;AAEV,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,SAAS,QAAQ;AAC5B,WAAK,WAAW;AAAA,IAClB;AAGA,SAAK,IAAI,QAAQ;AACjB,SAAK,IAAI,QAAQ;AACjB,SAAK,KAAK;AACV,SAAK,KAAK;AAEV,SAAK,WAAW;AAChB,SAAK,eAAe;AACpB,SAAK,WAAW,MAAM;AAEtB,SAAK,KAAK,WAAW;AAAA,EACvB;AAAA;AAAA;AAAA;AAAA,EAMQ,IAAI,MAAoB,UAAmC;AACjE,QAAI,MAAM,KAAK,WAAW,IAAI,IAAI;AAClC,QAAI,CAAC,KAAK;AACR,YAAM,oBAAI,IAAI;AACd,WAAK,WAAW,IAAI,MAAM,GAAG;AAAA,IAC/B;AACA,QAAI,IAAI,QAAQ;AAChB,WAAO,MAAM;AACX,UAAK,OAAO,QAAQ;AAAA,IACtB;AAAA,EACF;AAAA,EAEQ,MAAM,OAAuB;AACnC,UAAM,MAAM,KAAK,WAAW,IAAI,MAAM,IAAI;AAC1C,QAAI,CAAC,IAAK;AACV,eAAW,MAAM,KAAK;AACpB,UAAI;AACF,WAAG,KAAK;AAAA,MACV,SAAS,KAAK;AAEZ,gBAAQ,MAAM,GAAG,UAAU,aAAa,MAAM,IAAI,cAAc,GAAG;AAAA,MACrE;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAMQ,oBAAoB,QAA2B;AAErD,UAAM,SAAS,OAAO,eAAe;AACrC,UAAM,gBAAgB,OAAO,CAAC,GAAG,YAAY;AAC7C,UAAM,kBAAkB,eAAe,cAAc;AAErD,SAAK,KAAK,sBAAsB,eAAe;AAI/C,QAAI,OAAO,iBAAiB,eAAe,OAAO,uBAAuB,aAAa;AACpF,YAAM,IAAI,MAAM,GAAG,UAAU,oDAAoD;AAAA,IACnF;AAEA,UAAM,oBACJ,OAAO,iBAAiB,cACpB;AAAA;AAAA,MAEC,WAAmB;AAAA;AAE1B,SAAK,gBAAgB,IAAI,kBAAkB,EAAE,YAAY,gBAAgB,CAAC;AAC1E,SAAK,oBAAoB;AAEzB,SAAK,cAAc,KAAK,cAAc,wBAAwB,MAAM;AAGpE,SAAK,sBAAsB,eAAe;AAAA,EAC5C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,sBAAsB,iBAA+B;AAC3D,QAAI,CAAC,KAAK,iBAAiB,CAAC,KAAK,YAAa;AAG9C,UAAM,aAAa;AACnB,UAAM,YAAY,KAAK,cAAc,sBAAsB,YAAY,GAAG,CAAC;AAE3E,cAAU,iBAAiB,CAAC,UAAgC;AAC1D,UAAI,CAAC,KAAK,WAAY;AAEtB,YAAM,YAAY,MAAM,YAAY,eAAe,CAAC;AAGpD,YAAM,YACJ,oBAAoB,KAAK,cACrB,SAAS,WAAW,iBAAiB,KAAK,WAAW,IACrD,IAAI,aAAa,SAAS;AAGhC,WAAK,WAAW,SAAS;AAAA,IAC3B;AAEA,SAAK,YAAY,QAAQ,SAAS;AAClC,cAAU,QAAQ,KAAK,cAAc,WAAW;AAChD,SAAK,eAAe;AAEpB,SAAK,KAAK,6CAA6C;AAAA,EACzD;AAAA;AAAA;AAAA;AAAA,EAKQ,WAAW,SAA6B;AAC9C,QAAI,SAAS;AAEb,WAAO,SAAS,QAAQ,QAAQ;AAC9B,YAAM,YAAY,aAAa,KAAK;AACpC,YAAM,YAAY,QAAQ,SAAS;AACnC,YAAM,SAAS,KAAK,IAAI,WAAW,SAAS;AAE5C,WAAK,aAAa;AAAA,QAChB,QAAQ,SAAS,QAAQ,SAAS,MAAM;AAAA,QACxC,KAAK;AAAA,MACP;AACA,WAAK,sBAAsB;AAC3B,gBAAU;AAEV,UAAI,KAAK,uBAAuB,YAAY;AAC1C,cAAM,QAAQ,IAAI,aAAa,KAAK,YAAY;AAChD,aAAK,qBAAqB;AAG1B,aAAK,qBAAqB,KAAK,mBAAmB;AAAA,UAAK,MACrD,KAAK,aAAa,KAAK;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,aAAa,OAAoC;AAC7D,QAAI,CAAC,KAAK,cAAc,CAAC,KAAK,SAAU;AAExC,QAAI;AACJ,QAAI;AACF,oBAAc,MAAM,KAAK,aAAa,KAAK;AAAA,IAC7C,SAAS,KAAK;AACZ,UAAI,KAAK,QAAQ;AAEf,gBAAQ,MAAM,GAAG,UAAU,qBAAqB,GAAG;AAAA,MACrD;AACA;AAAA,IACF;AAGA,QAAI,KAAK,gBAAgB;AACvB,WAAK,oBAAoB,KAAK,WAAW;AAEzC,UAAI,KAAK,oBAAoB,UAAU,KAAK,0BAA0B;AACpE,aAAK,mBAAmB;AAAA,MAC1B;AACA;AAAA,IACF;AAGA,UAAM,gBAAgB,eAAe,KAAK;AAE1C,QAAI,eAAe;AACjB,WAAK;AACL,WAAK,4BAA4B;AAEjC,UAAI,CAAC,KAAK,eAAe,KAAK,4BAA4B,KAAK,kBAAkB;AAC/E,aAAK,cAAc;AACnB,aAAK,KAAK,gCAAgC,YAAY,QAAQ,CAAC,CAAC;AAChE,aAAK,MAAM;AAAA,UACT,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,OAAO;AACL,WAAK;AAGL,UAAI,KAAK,eAAe,KAAK,6BAA6B,KAAK,gBAAgB;AAC7E,aAAK,cAAc;AACnB,aAAK,2BAA2B;AAChC,aAAK,KAAK,8BAA8B,YAAY,QAAQ,CAAC,CAAC;AAC9D,aAAK,MAAM;AAAA,UACT,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,qBAA2B;AACjC,QAAI,KAAK,oBAAoB,WAAW,GAAG;AACzC,WAAK,iBAAiB;AACtB;AAAA,IACF;AAGA,UAAM,MAAM,KAAK,oBAAoB,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC;AAC9D,UAAM,WAAW,MAAM,KAAK,oBAAoB;AAIhD,UAAM,eAAe;AACrB,QAAI,WAAW,eAAe,KAAK,YAAY;AAC7C,WAAK,uBAAuB,KAAK,IAAI,WAAW,cAAc,IAAI;AAClE,WAAK;AAAA,QACH;AAAA,QACA,KAAK,WAAW,QAAQ,CAAC;AAAA,QACzB;AAAA,QACA,KAAK,qBAAqB,QAAQ,CAAC;AAAA,QACnC;AAAA,QACA,SAAS,QAAQ,CAAC,IAAI;AAAA,MACxB;AAAA,IACF,OAAO;AACL,WAAK,uBAAuB,KAAK;AACjC,WAAK,KAAK,wBAAwB,SAAS,QAAQ,CAAC,GAAG,+BAA0B,KAAK,WAAW,QAAQ,CAAC,CAAC;AAAA,IAC7G;AAGA,SAAK,mBAAmB,KAAK,KAAK,KAAK,uBAAuB,KAAK,gBAAgB;AACnF,SAAK,iBAAiB,KAAK,KAAK,KAAK,qBAAqB,KAAK,gBAAgB;AAE/E,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAAA,EAC9B;AAAA,EAEQ,yBAA+B;AACrC,QAAI,KAAK,cAAc;AACrB,UAAI;AACF,aAAK,aAAa,WAAW;AAAA,MAC/B,QAAQ;AAAA,MAER;AACA,UAAI,oBAAoB,KAAK,cAAc;AACzC,QAAC,KAAK,aAAqC,iBAAiB;AAAA,MAC9D;AACA,WAAK,eAAe;AAAA,IACtB;AAEA,QAAI,KAAK,aAAa;AACpB,UAAI;AACF,aAAK,YAAY,WAAW;AAAA,MAC9B,QAAQ;AAAA,MAER;AACA,WAAK,cAAc;AAAA,IACrB;AAEA,QAAI,KAAK,iBAAiB,KAAK,mBAAmB;AAChD,UAAI;AACF,aAAK,KAAK,cAAc,MAAM;AAAA,MAChC,QAAQ;AAAA,MAER;AACA,WAAK,gBAAgB;AACrB,WAAK,oBAAoB;AAAA,IAC3B;AAEA,SAAK,UAAU;AAAA,EACjB;AAAA;AAAA;AAAA;AAAA;AAAA,EAOQ,eAAqB;AAE3B,SAAK,IAAI,QAAQ;AACjB,SAAK,IAAI,QAAQ;AAEjB,UAAM,QAAQ,IAAI,aAAa,IAAI,UAAU,EAAE,KAAK,CAAC;AACrD,SAAK,KAAK,IAAQ,WAAO,WAAW,MAAM,MAAM,GAAG,UAAU,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC;AAC1E,SAAK,KAAK,IAAQ,WAAO,WAAW,MAAM,MAAM,UAAU,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC;AAAA,EACzE;AAAA;AAAA;AAAA;AAAA,EAMQ,QAAQ,MAAuB;AACrC,QAAI,CAAC,KAAK,OAAQ;AAElB,YAAQ,IAAI,YAAY,GAAG,IAAI;AAAA,EACjC;AACF;AAeA,eAAsB,UAAU,SAA0C;AACxE,QAAM,MAAM,IAAI,UAAU,OAAO;AACjC,QAAM,IAAI,KAAK;AACf,SAAO;AACT;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// @guidekit/vad — Silero VAD ONNX model wrapper for voice activity detection\nimport * as ort from 'onnxruntime-web';\n\nexport const VAD_VERSION = '0.1.0-beta.2';\n\n// ---------------------------------------------------------------------------\n// Constants\n// ---------------------------------------------------------------------------\n\nconst LOG_PREFIX = '[GuideKit:VAD]';\n\n/** Default CDN URL for the Silero VAD ONNX model (v5). */\nconst DEFAULT_MODEL_URL =\n 'https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.20/dist/silero_vad_v5.onnx';\n\n/** Cache API key used for persisting the downloaded ONNX model. */\nconst CACHE_NAME = `guidekit-vad-v${VAD_VERSION}`;\nconst CACHE_MODEL_KEY = 'model.onnx';\n\n/** Silero VAD frame size: 512 samples at 16 kHz = 32 ms per frame. */\nconst FRAME_SIZE = 512;\n\n/** Target sample rate for VAD processing. */\nconst TARGET_SAMPLE_RATE = 16000;\n\n/** Duration (in ms) of audio collected for noise floor calibration. */\nconst CALIBRATION_DURATION_MS = 500;\n\n/** Hidden/cell state size for Silero VAD v5 LSTM. */\nconst STATE_SIZE = 128;\n\n// ---------------------------------------------------------------------------\n// Types\n// ---------------------------------------------------------------------------\n\nexport interface VADOptions {\n /** Speech probability threshold (0-1). Default: 0.5 */\n threshold?: number;\n /** Minimum speech duration in ms to trigger start. Default: 300 */\n minSpeechDurationMs?: number;\n /** Silence duration in ms after speech to trigger end. Default: 500 */\n silenceDurationMs?: number;\n /** Sample rate. Default: 16000 */\n sampleRate?: number;\n /** Enable debug logging. Default: false */\n debug?: boolean;\n /** Custom URL for the Silero VAD ONNX model file. */\n modelUrl?: string;\n}\n\nexport interface VADEvent {\n type: 'speech-start' | 'speech-end' | 'vad-ready';\n timestamp: number;\n /** Speech probability (0-1) at the moment of the event. */\n probability?: number;\n}\n\ntype VADEventType = VADEvent['type'];\ntype VADCallback = (event: VADEvent) => void;\n\n// ---------------------------------------------------------------------------\n// Utility: Cache API helpers\n// ---------------------------------------------------------------------------\n\nasync function loadModelFromCache(): Promise<ArrayBuffer | null> {\n if (typeof caches === 'undefined') return null;\n try {\n const cache = await caches.open(CACHE_NAME);\n const response = await cache.match(CACHE_MODEL_KEY);\n return response ? response.arrayBuffer() : null;\n } catch {\n // Cache API may be unavailable in certain contexts (e.g. opaque origins).\n return null;\n }\n}\n\nasync function saveModelToCache(data: ArrayBuffer): Promise<void> {\n if (typeof caches === 'undefined') return;\n try {\n const cache = await caches.open(CACHE_NAME);\n await cache.put(CACHE_MODEL_KEY, new Response(data));\n } catch {\n // Silently ignore cache write failures.\n }\n}\n\n// ---------------------------------------------------------------------------\n// Utility: Resampler\n// ---------------------------------------------------------------------------\n\n/**\n * Simple linear-interpolation resampler from `inputRate` to `outputRate`.\n * Adequate for VAD where perceptual audio quality is irrelevant.\n */\nfunction resample(\n input: Float32Array,\n inputRate: number,\n outputRate: number,\n): Float32Array {\n if (inputRate === outputRate) return input;\n const ratio = inputRate / outputRate;\n const outputLength = Math.round(input.length / ratio);\n const output = new Float32Array(outputLength);\n for (let i = 0; i < outputLength; i++) {\n const srcIndex = i * ratio;\n const srcFloor = Math.floor(srcIndex);\n const srcCeil = Math.min(srcFloor + 1, input.length - 1);\n const frac = srcIndex - srcFloor;\n output[i] = (input[srcFloor] as number) * (1 - frac) + (input[srcCeil] as number) * frac;\n }\n return output;\n}\n\n// ---------------------------------------------------------------------------\n// SileroVAD\n// ---------------------------------------------------------------------------\n\nexport class SileroVAD {\n // Options (resolved with defaults)\n private readonly _threshold: number;\n private readonly _minSpeechDurationMs: number;\n private readonly _silenceDurationMs: number;\n private readonly _sampleRate: number;\n private readonly _debug: boolean;\n private readonly _modelUrl: string;\n\n // ONNX Runtime state\n private _session: ort.InferenceSession | null = null;\n private _h: ort.Tensor | null = null;\n private _c: ort.Tensor | null = null;\n\n // Audio pipeline\n private _audioContext: AudioContext | null = null;\n private _ownsAudioContext = false;\n private _sourceNode: MediaStreamAudioSourceNode | null = null;\n private _workletNode: AudioWorkletNode | ScriptProcessorNode | null = null;\n private _stream: MediaStream | null = null;\n\n // Frame buffer for accumulating resampled samples into FRAME_SIZE chunks\n private _frameBuffer: Float32Array = new Float32Array(0);\n private _frameBufferOffset = 0;\n\n // State tracking\n private _isReady = false;\n private _isSpeaking = false;\n private _isStarted = false;\n private _isDestroyed = false;\n\n // Duration tracking (in frames)\n private _consecutiveSpeechFrames = 0;\n private _consecutiveSilenceFrames = 0;\n private _frameDurationMs: number;\n private _minSpeechFrames: number;\n private _silenceFrames: number;\n\n // Noise floor calibration\n private _isCalibrating = false;\n private _calibrationSamples: number[] = [];\n private _calibrationFramesNeeded = 0;\n private _calibratedThreshold: number;\n\n // Event listeners\n private _listeners: Map<VADEventType, Set<VADCallback>> = new Map();\n\n // Processing lock to serialise frame inference\n private _processingPromise: Promise<void> = Promise.resolve();\n\n constructor(options?: VADOptions) {\n this._threshold = options?.threshold ?? 0.5;\n this._minSpeechDurationMs = options?.minSpeechDurationMs ?? 300;\n this._silenceDurationMs = options?.silenceDurationMs ?? 500;\n this._sampleRate = options?.sampleRate ?? TARGET_SAMPLE_RATE;\n this._debug = options?.debug ?? false;\n this._modelUrl = options?.modelUrl ?? DEFAULT_MODEL_URL;\n this._calibratedThreshold = this._threshold;\n\n // Pre-compute frame-duration-based counters\n this._frameDurationMs = (FRAME_SIZE / this._sampleRate) * 1000;\n this._minSpeechFrames = Math.ceil(this._minSpeechDurationMs / this._frameDurationMs);\n this._silenceFrames = Math.ceil(this._silenceDurationMs / this._frameDurationMs);\n\n this._log('Created with options', {\n threshold: this._threshold,\n minSpeechDurationMs: this._minSpeechDurationMs,\n silenceDurationMs: this._silenceDurationMs,\n sampleRate: this._sampleRate,\n modelUrl: this._modelUrl,\n });\n }\n\n // -------------------------------------------------------------------------\n // Public API\n // -------------------------------------------------------------------------\n\n /** Load the ONNX model. Uses Cache API for persistence across sessions. */\n async init(): Promise<void> {\n if (this._isDestroyed) {\n throw new Error(`${LOG_PREFIX} Cannot init after destroy`);\n }\n if (this._isReady) {\n this._log('Already initialised — skipping');\n return;\n }\n\n this._log('Initialising...');\n\n // 1. Attempt to load model bytes from cache, falling back to network.\n let modelBuffer = await loadModelFromCache();\n if (modelBuffer) {\n this._log('Loaded model from Cache API');\n } else {\n this._log('Fetching model from', this._modelUrl);\n const response = await fetch(this._modelUrl);\n if (!response.ok) {\n throw new Error(\n `${LOG_PREFIX} Failed to fetch model: ${response.status} ${response.statusText}`,\n );\n }\n modelBuffer = await response.arrayBuffer();\n this._log('Model fetched, size:', modelBuffer.byteLength, 'bytes');\n\n // Persist to Cache API for next time.\n await saveModelToCache(modelBuffer);\n this._log('Model saved to Cache API');\n }\n\n // 2. Create ONNX InferenceSession.\n this._session = await ort.InferenceSession.create(modelBuffer, {\n executionProviders: ['wasm'],\n graphOptimizationLevel: 'all',\n });\n\n // 3. Initialise LSTM hidden/cell state tensors (zeros).\n this._resetStates();\n\n this._isReady = true;\n this._log('Model loaded and ready');\n\n this._emit({\n type: 'vad-ready',\n timestamp: Date.now(),\n });\n }\n\n /**\n * Process a single audio frame (512 samples at 16 kHz).\n * Returns the speech probability (0-1).\n */\n async processFrame(audioData: Float32Array): Promise<number> {\n if (!this._session) {\n throw new Error(`${LOG_PREFIX} Model not loaded. Call init() first.`);\n }\n if (audioData.length !== FRAME_SIZE) {\n throw new Error(\n `${LOG_PREFIX} Expected ${FRAME_SIZE} samples, got ${audioData.length}`,\n );\n }\n\n const inputTensor = new ort.Tensor('float32', audioData, [1, FRAME_SIZE]);\n const srTensor = new ort.Tensor('int64', BigInt64Array.from([BigInt(this._sampleRate)]), [1]);\n\n const feeds: Record<string, ort.Tensor> = {\n input: inputTensor,\n sr: srTensor,\n h: this._h!,\n c: this._c!,\n };\n\n const results = await this._session.run(feeds);\n\n // Update LSTM hidden/cell states for the next frame.\n this._h = results['hn'] as ort.Tensor;\n this._c = results['cn'] as ort.Tensor;\n\n const probability = (results['output'] as ort.Tensor).data[0] as number;\n return probability;\n }\n\n /** Start VAD processing on a MediaStream (typically from getUserMedia). */\n start(stream: MediaStream): void {\n if (this._isDestroyed) {\n throw new Error(`${LOG_PREFIX} Cannot start after destroy`);\n }\n if (!this._isReady) {\n throw new Error(`${LOG_PREFIX} Model not loaded. Call init() first.`);\n }\n if (this._isStarted) {\n this._log('Already started — stopping previous session first');\n this.stop();\n }\n\n this._log('Starting VAD on MediaStream');\n this._stream = stream;\n this._isStarted = true;\n\n // Reset speech tracking state.\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._consecutiveSilenceFrames = 0;\n this._frameBuffer = new Float32Array(FRAME_SIZE);\n this._frameBufferOffset = 0;\n\n // Reset LSTM states for a fresh stream.\n this._resetStates();\n\n // Begin noise floor calibration.\n this._isCalibrating = true;\n this._calibrationSamples = [];\n this._calibrationFramesNeeded = Math.max(1, Math.floor(\n (CALIBRATION_DURATION_MS / 1000) * this._sampleRate / FRAME_SIZE,\n ));\n this._log('Calibrating noise floor for', this._calibrationFramesNeeded, 'frames');\n\n // Build the audio processing pipeline.\n this._setupAudioPipeline(stream);\n }\n\n /** Stop VAD processing and release audio resources (but keep the model). */\n stop(): void {\n if (!this._isStarted) return;\n\n this._log('Stopping VAD');\n\n // Tear down audio nodes.\n this._teardownAudioPipeline();\n\n // If we were speaking, emit speech-end.\n if (this._isSpeaking) {\n this._isSpeaking = false;\n this._emit({\n type: 'speech-end',\n timestamp: Date.now(),\n probability: 0,\n });\n }\n\n // Reset state.\n this._isStarted = false;\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._consecutiveSilenceFrames = 0;\n this._frameBufferOffset = 0;\n this._isCalibrating = false;\n this._calibrationSamples = [];\n this._processingPromise = Promise.resolve();\n\n this._log('VAD stopped');\n }\n\n /** Register a callback for speech-start events. Returns an unsubscribe function. */\n onSpeechStart(callback: VADCallback): () => void {\n return this._on('speech-start', callback);\n }\n\n /** Register a callback for speech-end events. Returns an unsubscribe function. */\n onSpeechEnd(callback: VADCallback): () => void {\n return this._on('speech-end', callback);\n }\n\n /** Register a callback for vad-ready events. Returns an unsubscribe function. */\n onReady(callback: VADCallback): () => void {\n return this._on('vad-ready', callback);\n }\n\n /** Whether the ONNX model is loaded and ready. */\n get isReady(): boolean {\n return this._isReady;\n }\n\n /** Whether speech is currently detected. */\n get isSpeaking(): boolean {\n return this._isSpeaking;\n }\n\n /** The MediaStream currently being processed, or null. */\n get stream(): MediaStream | null {\n return this._stream;\n }\n\n /** Release ONNX model session and all audio resources. */\n async destroy(): Promise<void> {\n if (this._isDestroyed) return;\n this._log('Destroying...');\n\n this.stop();\n\n if (this._session) {\n await this._session.release();\n this._session = null;\n }\n\n // Dispose tensors.\n this._h?.dispose();\n this._c?.dispose();\n this._h = null;\n this._c = null;\n\n this._isReady = false;\n this._isDestroyed = true;\n this._listeners.clear();\n\n this._log('Destroyed');\n }\n\n // -------------------------------------------------------------------------\n // Private: Event system\n // -------------------------------------------------------------------------\n\n private _on(type: VADEventType, callback: VADCallback): () => void {\n let set = this._listeners.get(type);\n if (!set) {\n set = new Set();\n this._listeners.set(type, set);\n }\n set.add(callback);\n return () => {\n set!.delete(callback);\n };\n }\n\n private _emit(event: VADEvent): void {\n const set = this._listeners.get(event.type);\n if (!set) return;\n for (const cb of set) {\n try {\n cb(event);\n } catch (err) {\n // eslint-disable-next-line no-console\n console.error(`${LOG_PREFIX} Error in ${event.type} callback:`, err);\n }\n }\n }\n\n // -------------------------------------------------------------------------\n // Private: Audio pipeline\n // -------------------------------------------------------------------------\n\n private _setupAudioPipeline(stream: MediaStream): void {\n // Determine the incoming sample rate.\n const tracks = stream.getAudioTracks();\n const trackSettings = tracks[0]?.getSettings();\n const inputSampleRate = trackSettings?.sampleRate ?? 48000;\n\n this._log('Input sample rate:', inputSampleRate);\n\n // Create AudioContext at the input sample rate so we don't double-resample.\n // SSR guard: AudioContext may not exist.\n if (typeof AudioContext === 'undefined' && typeof webkitAudioContext === 'undefined') {\n throw new Error(`${LOG_PREFIX} AudioContext is not available in this environment`);\n }\n\n const AudioContextClass =\n typeof AudioContext !== 'undefined'\n ? AudioContext\n : // eslint-disable-next-line @typescript-eslint/no-explicit-any\n (globalThis as any).webkitAudioContext as typeof AudioContext;\n\n this._audioContext = new AudioContextClass({ sampleRate: inputSampleRate });\n this._ownsAudioContext = true;\n\n this._sourceNode = this._audioContext.createMediaStreamSource(stream);\n\n // Try AudioWorklet first, fall back to ScriptProcessorNode.\n this._setupScriptProcessor(inputSampleRate);\n }\n\n /**\n * ScriptProcessorNode fallback (works everywhere, including Safari).\n * We use a buffer size of 4096 which gives ~85 ms of audio at 48 kHz.\n */\n private _setupScriptProcessor(inputSampleRate: number): void {\n if (!this._audioContext || !this._sourceNode) return;\n\n // Buffer size must be a power of 2: 256, 512, 1024, 2048, 4096, 8192, 16384.\n const bufferSize = 4096;\n const processor = this._audioContext.createScriptProcessor(bufferSize, 1, 1);\n\n processor.onaudioprocess = (event: AudioProcessingEvent) => {\n if (!this._isStarted) return;\n\n const inputData = event.inputBuffer.getChannelData(0);\n\n // Resample to target rate if needed.\n const resampled =\n inputSampleRate !== this._sampleRate\n ? resample(inputData, inputSampleRate, this._sampleRate)\n : new Float32Array(inputData);\n\n // Feed resampled audio into frame-sized chunks.\n this._feedAudio(resampled);\n };\n\n this._sourceNode.connect(processor);\n processor.connect(this._audioContext.destination);\n this._workletNode = processor;\n\n this._log('Audio pipeline set up (ScriptProcessorNode)');\n }\n\n /**\n * Accumulate resampled audio into FRAME_SIZE chunks and process each full frame.\n */\n private _feedAudio(samples: Float32Array): void {\n let offset = 0;\n\n while (offset < samples.length) {\n const remaining = FRAME_SIZE - this._frameBufferOffset;\n const available = samples.length - offset;\n const toCopy = Math.min(remaining, available);\n\n this._frameBuffer.set(\n samples.subarray(offset, offset + toCopy),\n this._frameBufferOffset,\n );\n this._frameBufferOffset += toCopy;\n offset += toCopy;\n\n if (this._frameBufferOffset === FRAME_SIZE) {\n const frame = new Float32Array(this._frameBuffer);\n this._frameBufferOffset = 0;\n\n // Serialise inference calls to avoid overlapping ONNX sessions.\n this._processingPromise = this._processingPromise.then(() =>\n this._handleFrame(frame),\n );\n }\n }\n }\n\n /**\n * Process a single FRAME_SIZE frame: run inference and update speech state.\n */\n private async _handleFrame(frame: Float32Array): Promise<void> {\n if (!this._isStarted || !this._session) return;\n\n let probability: number;\n try {\n probability = await this.processFrame(frame);\n } catch (err) {\n if (this._debug) {\n // eslint-disable-next-line no-console\n console.error(`${LOG_PREFIX} Inference error:`, err);\n }\n return;\n }\n\n // Noise floor calibration phase.\n if (this._isCalibrating) {\n this._calibrationSamples.push(probability);\n\n if (this._calibrationSamples.length >= this._calibrationFramesNeeded) {\n this._finishCalibration();\n }\n return;\n }\n\n // Speech state machine.\n const isSpeechFrame = probability >= this._calibratedThreshold;\n\n if (isSpeechFrame) {\n this._consecutiveSpeechFrames++;\n this._consecutiveSilenceFrames = 0;\n\n if (!this._isSpeaking && this._consecutiveSpeechFrames >= this._minSpeechFrames) {\n this._isSpeaking = true;\n this._log('Speech started, probability:', probability.toFixed(3));\n this._emit({\n type: 'speech-start',\n timestamp: Date.now(),\n probability,\n });\n }\n } else {\n this._consecutiveSilenceFrames++;\n // Do NOT reset _consecutiveSpeechFrames here — only reset when speech-end fires.\n\n if (this._isSpeaking && this._consecutiveSilenceFrames >= this._silenceFrames) {\n this._isSpeaking = false;\n this._consecutiveSpeechFrames = 0;\n this._log('Speech ended, probability:', probability.toFixed(3));\n this._emit({\n type: 'speech-end',\n timestamp: Date.now(),\n probability,\n });\n }\n }\n }\n\n private _finishCalibration(): void {\n if (this._calibrationSamples.length === 0) {\n this._isCalibrating = false;\n return;\n }\n\n // Compute average noise floor probability.\n const sum = this._calibrationSamples.reduce((a, b) => a + b, 0);\n const avgNoise = sum / this._calibrationSamples.length;\n\n // If the ambient noise floor is high, nudge the threshold above it.\n // We add a margin so we don't constantly trigger on background noise.\n const NOISE_MARGIN = 0.15;\n if (avgNoise + NOISE_MARGIN > this._threshold) {\n this._calibratedThreshold = Math.min(avgNoise + NOISE_MARGIN, 0.95);\n this._log(\n 'Noise floor is high. Adjusted threshold from',\n this._threshold.toFixed(3),\n 'to',\n this._calibratedThreshold.toFixed(3),\n '(avg noise:',\n avgNoise.toFixed(3) + ')',\n );\n } else {\n this._calibratedThreshold = this._threshold;\n this._log('Noise floor OK, avg:', avgNoise.toFixed(3), '— keeping threshold at', this._threshold.toFixed(3));\n }\n\n // Recompute frame counters in case threshold changed min speech behaviour.\n this._minSpeechFrames = Math.ceil(this._minSpeechDurationMs / this._frameDurationMs);\n this._silenceFrames = Math.ceil(this._silenceDurationMs / this._frameDurationMs);\n\n this._isCalibrating = false;\n this._calibrationSamples = [];\n }\n\n private _teardownAudioPipeline(): void {\n if (this._workletNode) {\n try {\n this._workletNode.disconnect();\n } catch {\n // Ignore disconnect errors.\n }\n if ('onaudioprocess' in this._workletNode) {\n (this._workletNode as ScriptProcessorNode).onaudioprocess = null;\n }\n this._workletNode = null;\n }\n\n if (this._sourceNode) {\n try {\n this._sourceNode.disconnect();\n } catch {\n // Ignore.\n }\n this._sourceNode = null;\n }\n\n if (this._audioContext && this._ownsAudioContext) {\n try {\n void this._audioContext.close();\n } catch {\n // Ignore.\n }\n this._audioContext = null;\n this._ownsAudioContext = false;\n }\n\n this._stream = null;\n }\n\n // -------------------------------------------------------------------------\n // Private: ONNX state helpers\n // -------------------------------------------------------------------------\n\n /** Reset the LSTM hidden and cell states to zeros. */\n private _resetStates(): void {\n // Dispose any existing tensors to free memory.\n this._h?.dispose();\n this._c?.dispose();\n\n const zeros = new Float32Array(2 * STATE_SIZE).fill(0);\n this._h = new ort.Tensor('float32', zeros.slice(0, STATE_SIZE), [2, 1, 64]);\n this._c = new ort.Tensor('float32', zeros.slice(STATE_SIZE), [2, 1, 64]);\n }\n\n // -------------------------------------------------------------------------\n // Private: Logging\n // -------------------------------------------------------------------------\n\n private _log(...args: unknown[]): void {\n if (!this._debug) return;\n // eslint-disable-next-line no-console\n console.log(LOG_PREFIX, ...args);\n }\n}\n\n// ---------------------------------------------------------------------------\n// Convenience factory\n// ---------------------------------------------------------------------------\n\n/**\n * Create and initialise a SileroVAD instance in one call.\n *\n * ```ts\n * const vad = await createVAD({ debug: true });\n * vad.onSpeechStart(() => console.log('speaking'));\n * vad.start(stream);\n * ```\n */\nexport async function createVAD(options?: VADOptions): Promise<SileroVAD> {\n const vad = new SileroVAD(options);\n await vad.init();\n return vad;\n}\n\n// Re-export the frame size constant so consumers can align their buffers.\nexport { FRAME_SIZE, TARGET_SAMPLE_RATE };\n\n// Type-only declaration for environments that provide webkitAudioContext.\ndeclare global {\n // eslint-disable-next-line no-var\n var webkitAudioContext: typeof AudioContext | undefined;\n}\n"],"mappings":";AACA,YAAY,SAAS;AAEd,IAAM,cAAc;AAM3B,IAAM,aAAa;AAGnB,IAAM,oBACJ;AAGF,IAAM,aAAa,iBAAiB,WAAW;AAC/C,IAAM,kBAAkB;AAGxB,IAAM,aAAa;AAGnB,IAAM,qBAAqB;AAG3B,IAAM,0BAA0B;AAGhC,IAAM,aAAa;AAmCnB,eAAe,qBAAkD;AAC/D,MAAI,OAAO,WAAW,YAAa,QAAO;AAC1C,MAAI;AACF,UAAM,QAAQ,MAAM,OAAO,KAAK,UAAU;AAC1C,UAAM,WAAW,MAAM,MAAM,MAAM,eAAe;AAClD,WAAO,WAAW,SAAS,YAAY,IAAI;AAAA,EAC7C,QAAQ;AAEN,WAAO;AAAA,EACT;AACF;AAEA,eAAe,iBAAiB,MAAkC;AAChE,MAAI,OAAO,WAAW,YAAa;AACnC,MAAI;AACF,UAAM,QAAQ,MAAM,OAAO,KAAK,UAAU;AAC1C,UAAM,MAAM,IAAI,iBAAiB,IAAI,SAAS,IAAI,CAAC;AAAA,EACrD,QAAQ;AAAA,EAER;AACF;AAUA,SAAS,SACP,OACA,WACA,YACc;AACd,MAAI,cAAc,WAAY,QAAO;AACrC,QAAM,QAAQ,YAAY;AAC1B,QAAM,eAAe,KAAK,MAAM,MAAM,SAAS,KAAK;AACpD,QAAM,SAAS,IAAI,aAAa,YAAY;AAC5C,WAAS,IAAI,GAAG,IAAI,cAAc,KAAK;AACrC,UAAM,WAAW,IAAI;AACrB,UAAM,WAAW,KAAK,MAAM,QAAQ;AACpC,UAAM,UAAU,KAAK,IAAI,WAAW,GAAG,MAAM,SAAS,CAAC;AACvD,UAAM,OAAO,WAAW;AACxB,WAAO,CAAC,IAAK,MAAM,QAAQ,KAAgB,IAAI,QAAS,MAAM,OAAO,IAAe;AAAA,EACtF;AACA,SAAO;AACT;AAMO,IAAM,YAAN,MAAgB;AAAA;AAAA,EAEJ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGT,WAAwC;AAAA,EACxC,KAAwB;AAAA,EACxB,KAAwB;AAAA;AAAA,EAGxB,gBAAqC;AAAA,EACrC,oBAAoB;AAAA,EACpB,cAAiD;AAAA,EACjD,eAA8D;AAAA,EAC9D,UAA8B;AAAA;AAAA,EAG9B,eAA6B,IAAI,aAAa,CAAC;AAAA,EAC/C,qBAAqB;AAAA;AAAA,EAGrB,WAAW;AAAA,EACX,cAAc;AAAA,EACd,aAAa;AAAA,EACb,eAAe;AAAA;AAAA,EAGf,2BAA2B;AAAA,EAC3B,4BAA4B;AAAA,EAC5B;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGA,iBAAiB;AAAA,EACjB,sBAAgC,CAAC;AAAA,EACjC,2BAA2B;AAAA,EAC3B;AAAA;AAAA,EAGA,aAAkD,oBAAI,IAAI;AAAA;AAAA,EAG1D,qBAAoC,QAAQ,QAAQ;AAAA,EAE5D,YAAY,SAAsB;AAChC,SAAK,aAAa,SAAS,aAAa;AACxC,SAAK,uBAAuB,SAAS,uBAAuB;AAC5D,SAAK,qBAAqB,SAAS,qBAAqB;AACxD,SAAK,cAAc,SAAS,cAAc;AAC1C,SAAK,SAAS,SAAS,SAAS;AAChC,SAAK,YAAY,SAAS,YAAY;AACtC,SAAK,uBAAuB,KAAK;AAGjC,SAAK,mBAAoB,aAAa,KAAK,cAAe;AAC1D,SAAK,mBAAmB,KAAK,KAAK,KAAK,uBAAuB,KAAK,gBAAgB;AACnF,SAAK,iBAAiB,KAAK,KAAK,KAAK,qBAAqB,KAAK,gBAAgB;AAE/E,SAAK,KAAK,wBAAwB;AAAA,MAChC,WAAW,KAAK;AAAA,MAChB,qBAAqB,KAAK;AAAA,MAC1B,mBAAmB,KAAK;AAAA,MACxB,YAAY,KAAK;AAAA,MACjB,UAAU,KAAK;AAAA,IACjB,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,OAAsB;AAC1B,QAAI,KAAK,cAAc;AACrB,YAAM,IAAI,MAAM,GAAG,UAAU,4BAA4B;AAAA,IAC3D;AACA,QAAI,KAAK,UAAU;AACjB,WAAK,KAAK,qCAAgC;AAC1C;AAAA,IACF;AAEA,SAAK,KAAK,iBAAiB;AAG3B,QAAI,cAAc,MAAM,mBAAmB;AAC3C,QAAI,aAAa;AACf,WAAK,KAAK,6BAA6B;AAAA,IACzC,OAAO;AACL,WAAK,KAAK,uBAAuB,KAAK,SAAS;AAC/C,YAAM,WAAW,MAAM,MAAM,KAAK,SAAS;AAC3C,UAAI,CAAC,SAAS,IAAI;AAChB,cAAM,IAAI;AAAA,UACR,GAAG,UAAU,2BAA2B,SAAS,MAAM,IAAI,SAAS,UAAU;AAAA,QAChF;AAAA,MACF;AACA,oBAAc,MAAM,SAAS,YAAY;AACzC,WAAK,KAAK,wBAAwB,YAAY,YAAY,OAAO;AAGjE,YAAM,iBAAiB,WAAW;AAClC,WAAK,KAAK,0BAA0B;AAAA,IACtC;AAGA,SAAK,WAAW,MAAU,qBAAiB,OAAO,aAAa;AAAA,MAC7D,oBAAoB,CAAC,MAAM;AAAA,MAC3B,wBAAwB;AAAA,IAC1B,CAAC;AAGD,SAAK,aAAa;AAElB,SAAK,WAAW;AAChB,SAAK,KAAK,wBAAwB;AAElC,SAAK,MAAM;AAAA,MACT,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,IACtB,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,aAAa,WAA0C;AAC3D,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,GAAG,UAAU,uCAAuC;AAAA,IACtE;AACA,QAAI,UAAU,WAAW,YAAY;AACnC,YAAM,IAAI;AAAA,QACR,GAAG,UAAU,aAAa,UAAU,iBAAiB,UAAU,MAAM;AAAA,MACvE;AAAA,IACF;AAEA,UAAM,cAAc,IAAQ,WAAO,WAAW,WAAW,CAAC,GAAG,UAAU,CAAC;AACxE,UAAM,WAAW,IAAQ,WAAO,SAAS,cAAc,KAAK,CAAC,OAAO,KAAK,WAAW,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;AAE5F,UAAM,QAAoC;AAAA,MACxC,OAAO;AAAA,MACP,IAAI;AAAA,MACJ,GAAG,KAAK;AAAA,MACR,GAAG,KAAK;AAAA,IACV;AAEA,UAAM,UAAU,MAAM,KAAK,SAAS,IAAI,KAAK;AAG7C,SAAK,KAAK,QAAQ,IAAI;AACtB,SAAK,KAAK,QAAQ,IAAI;AAEtB,UAAM,cAAe,QAAQ,QAAQ,EAAiB,KAAK,CAAC;AAC5D,WAAO;AAAA,EACT;AAAA;AAAA,EAGA,MAAM,QAA2B;AAC/B,QAAI,KAAK,cAAc;AACrB,YAAM,IAAI,MAAM,GAAG,UAAU,6BAA6B;AAAA,IAC5D;AACA,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,GAAG,UAAU,uCAAuC;AAAA,IACtE;AACA,QAAI,KAAK,YAAY;AACnB,WAAK,KAAK,wDAAmD;AAC7D,WAAK,KAAK;AAAA,IACZ;AAEA,SAAK,KAAK,6BAA6B;AACvC,SAAK,UAAU;AACf,SAAK,aAAa;AAGlB,SAAK,cAAc;AACnB,SAAK,2BAA2B;AAChC,SAAK,4BAA4B;AACjC,SAAK,eAAe,IAAI,aAAa,UAAU;AAC/C,SAAK,qBAAqB;AAG1B,SAAK,aAAa;AAGlB,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAC5B,SAAK,2BAA2B,KAAK,IAAI,GAAG,KAAK;AAAA,MAC9C,0BAA0B,MAAQ,KAAK,cAAc;AAAA,IACxD,CAAC;AACD,SAAK,KAAK,+BAA+B,KAAK,0BAA0B,QAAQ;AAGhF,SAAK,oBAAoB,MAAM;AAAA,EACjC;AAAA;AAAA,EAGA,OAAa;AACX,QAAI,CAAC,KAAK,WAAY;AAEtB,SAAK,KAAK,cAAc;AAGxB,SAAK,uBAAuB;AAG5B,QAAI,KAAK,aAAa;AACpB,WAAK,cAAc;AACnB,WAAK,MAAM;AAAA,QACT,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,aAAa;AAAA,MACf,CAAC;AAAA,IACH;AAGA,SAAK,aAAa;AAClB,SAAK,cAAc;AACnB,SAAK,2BAA2B;AAChC,SAAK,4BAA4B;AACjC,SAAK,qBAAqB;AAC1B,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAC5B,SAAK,qBAAqB,QAAQ,QAAQ;AAE1C,SAAK,KAAK,aAAa;AAAA,EACzB;AAAA;AAAA,EAGA,cAAc,UAAmC;AAC/C,WAAO,KAAK,IAAI,gBAAgB,QAAQ;AAAA,EAC1C;AAAA;AAAA,EAGA,YAAY,UAAmC;AAC7C,WAAO,KAAK,IAAI,cAAc,QAAQ;AAAA,EACxC;AAAA;AAAA,EAGA,QAAQ,UAAmC;AACzC,WAAO,KAAK,IAAI,aAAa,QAAQ;AAAA,EACvC;AAAA;AAAA,EAGA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,aAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,SAA6B;AAC/B,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAyB;AAC7B,QAAI,KAAK,aAAc;AACvB,SAAK,KAAK,eAAe;AAEzB,SAAK,KAAK;AAEV,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,SAAS,QAAQ;AAC5B,WAAK,WAAW;AAAA,IAClB;AAGA,SAAK,IAAI,QAAQ;AACjB,SAAK,IAAI,QAAQ;AACjB,SAAK,KAAK;AACV,SAAK,KAAK;AAEV,SAAK,WAAW;AAChB,SAAK,eAAe;AACpB,SAAK,WAAW,MAAM;AAEtB,SAAK,KAAK,WAAW;AAAA,EACvB;AAAA;AAAA;AAAA;AAAA,EAMQ,IAAI,MAAoB,UAAmC;AACjE,QAAI,MAAM,KAAK,WAAW,IAAI,IAAI;AAClC,QAAI,CAAC,KAAK;AACR,YAAM,oBAAI,IAAI;AACd,WAAK,WAAW,IAAI,MAAM,GAAG;AAAA,IAC/B;AACA,QAAI,IAAI,QAAQ;AAChB,WAAO,MAAM;AACX,UAAK,OAAO,QAAQ;AAAA,IACtB;AAAA,EACF;AAAA,EAEQ,MAAM,OAAuB;AACnC,UAAM,MAAM,KAAK,WAAW,IAAI,MAAM,IAAI;AAC1C,QAAI,CAAC,IAAK;AACV,eAAW,MAAM,KAAK;AACpB,UAAI;AACF,WAAG,KAAK;AAAA,MACV,SAAS,KAAK;AAEZ,gBAAQ,MAAM,GAAG,UAAU,aAAa,MAAM,IAAI,cAAc,GAAG;AAAA,MACrE;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAMQ,oBAAoB,QAA2B;AAErD,UAAM,SAAS,OAAO,eAAe;AACrC,UAAM,gBAAgB,OAAO,CAAC,GAAG,YAAY;AAC7C,UAAM,kBAAkB,eAAe,cAAc;AAErD,SAAK,KAAK,sBAAsB,eAAe;AAI/C,QAAI,OAAO,iBAAiB,eAAe,OAAO,uBAAuB,aAAa;AACpF,YAAM,IAAI,MAAM,GAAG,UAAU,oDAAoD;AAAA,IACnF;AAEA,UAAM,oBACJ,OAAO,iBAAiB,cACpB;AAAA;AAAA,MAEC,WAAmB;AAAA;AAE1B,SAAK,gBAAgB,IAAI,kBAAkB,EAAE,YAAY,gBAAgB,CAAC;AAC1E,SAAK,oBAAoB;AAEzB,SAAK,cAAc,KAAK,cAAc,wBAAwB,MAAM;AAGpE,SAAK,sBAAsB,eAAe;AAAA,EAC5C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,sBAAsB,iBAA+B;AAC3D,QAAI,CAAC,KAAK,iBAAiB,CAAC,KAAK,YAAa;AAG9C,UAAM,aAAa;AACnB,UAAM,YAAY,KAAK,cAAc,sBAAsB,YAAY,GAAG,CAAC;AAE3E,cAAU,iBAAiB,CAAC,UAAgC;AAC1D,UAAI,CAAC,KAAK,WAAY;AAEtB,YAAM,YAAY,MAAM,YAAY,eAAe,CAAC;AAGpD,YAAM,YACJ,oBAAoB,KAAK,cACrB,SAAS,WAAW,iBAAiB,KAAK,WAAW,IACrD,IAAI,aAAa,SAAS;AAGhC,WAAK,WAAW,SAAS;AAAA,IAC3B;AAEA,SAAK,YAAY,QAAQ,SAAS;AAClC,cAAU,QAAQ,KAAK,cAAc,WAAW;AAChD,SAAK,eAAe;AAEpB,SAAK,KAAK,6CAA6C;AAAA,EACzD;AAAA;AAAA;AAAA;AAAA,EAKQ,WAAW,SAA6B;AAC9C,QAAI,SAAS;AAEb,WAAO,SAAS,QAAQ,QAAQ;AAC9B,YAAM,YAAY,aAAa,KAAK;AACpC,YAAM,YAAY,QAAQ,SAAS;AACnC,YAAM,SAAS,KAAK,IAAI,WAAW,SAAS;AAE5C,WAAK,aAAa;AAAA,QAChB,QAAQ,SAAS,QAAQ,SAAS,MAAM;AAAA,QACxC,KAAK;AAAA,MACP;AACA,WAAK,sBAAsB;AAC3B,gBAAU;AAEV,UAAI,KAAK,uBAAuB,YAAY;AAC1C,cAAM,QAAQ,IAAI,aAAa,KAAK,YAAY;AAChD,aAAK,qBAAqB;AAG1B,aAAK,qBAAqB,KAAK,mBAAmB;AAAA,UAAK,MACrD,KAAK,aAAa,KAAK;AAAA,QACzB;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,aAAa,OAAoC;AAC7D,QAAI,CAAC,KAAK,cAAc,CAAC,KAAK,SAAU;AAExC,QAAI;AACJ,QAAI;AACF,oBAAc,MAAM,KAAK,aAAa,KAAK;AAAA,IAC7C,SAAS,KAAK;AACZ,UAAI,KAAK,QAAQ;AAEf,gBAAQ,MAAM,GAAG,UAAU,qBAAqB,GAAG;AAAA,MACrD;AACA;AAAA,IACF;AAGA,QAAI,KAAK,gBAAgB;AACvB,WAAK,oBAAoB,KAAK,WAAW;AAEzC,UAAI,KAAK,oBAAoB,UAAU,KAAK,0BAA0B;AACpE,aAAK,mBAAmB;AAAA,MAC1B;AACA;AAAA,IACF;AAGA,UAAM,gBAAgB,eAAe,KAAK;AAE1C,QAAI,eAAe;AACjB,WAAK;AACL,WAAK,4BAA4B;AAEjC,UAAI,CAAC,KAAK,eAAe,KAAK,4BAA4B,KAAK,kBAAkB;AAC/E,aAAK,cAAc;AACnB,aAAK,KAAK,gCAAgC,YAAY,QAAQ,CAAC,CAAC;AAChE,aAAK,MAAM;AAAA,UACT,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF,OAAO;AACL,WAAK;AAGL,UAAI,KAAK,eAAe,KAAK,6BAA6B,KAAK,gBAAgB;AAC7E,aAAK,cAAc;AACnB,aAAK,2BAA2B;AAChC,aAAK,KAAK,8BAA8B,YAAY,QAAQ,CAAC,CAAC;AAC9D,aAAK,MAAM;AAAA,UACT,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,qBAA2B;AACjC,QAAI,KAAK,oBAAoB,WAAW,GAAG;AACzC,WAAK,iBAAiB;AACtB;AAAA,IACF;AAGA,UAAM,MAAM,KAAK,oBAAoB,OAAO,CAAC,GAAG,MAAM,IAAI,GAAG,CAAC;AAC9D,UAAM,WAAW,MAAM,KAAK,oBAAoB;AAIhD,UAAM,eAAe;AACrB,QAAI,WAAW,eAAe,KAAK,YAAY;AAC7C,WAAK,uBAAuB,KAAK,IAAI,WAAW,cAAc,IAAI;AAClE,WAAK;AAAA,QACH;AAAA,QACA,KAAK,WAAW,QAAQ,CAAC;AAAA,QACzB;AAAA,QACA,KAAK,qBAAqB,QAAQ,CAAC;AAAA,QACnC;AAAA,QACA,SAAS,QAAQ,CAAC,IAAI;AAAA,MACxB;AAAA,IACF,OAAO;AACL,WAAK,uBAAuB,KAAK;AACjC,WAAK,KAAK,wBAAwB,SAAS,QAAQ,CAAC,GAAG,+BAA0B,KAAK,WAAW,QAAQ,CAAC,CAAC;AAAA,IAC7G;AAGA,SAAK,mBAAmB,KAAK,KAAK,KAAK,uBAAuB,KAAK,gBAAgB;AACnF,SAAK,iBAAiB,KAAK,KAAK,KAAK,qBAAqB,KAAK,gBAAgB;AAE/E,SAAK,iBAAiB;AACtB,SAAK,sBAAsB,CAAC;AAAA,EAC9B;AAAA,EAEQ,yBAA+B;AACrC,QAAI,KAAK,cAAc;AACrB,UAAI;AACF,aAAK,aAAa,WAAW;AAAA,MAC/B,QAAQ;AAAA,MAER;AACA,UAAI,oBAAoB,KAAK,cAAc;AACzC,QAAC,KAAK,aAAqC,iBAAiB;AAAA,MAC9D;AACA,WAAK,eAAe;AAAA,IACtB;AAEA,QAAI,KAAK,aAAa;AACpB,UAAI;AACF,aAAK,YAAY,WAAW;AAAA,MAC9B,QAAQ;AAAA,MAER;AACA,WAAK,cAAc;AAAA,IACrB;AAEA,QAAI,KAAK,iBAAiB,KAAK,mBAAmB;AAChD,UAAI;AACF,aAAK,KAAK,cAAc,MAAM;AAAA,MAChC,QAAQ;AAAA,MAER;AACA,WAAK,gBAAgB;AACrB,WAAK,oBAAoB;AAAA,IAC3B;AAEA,SAAK,UAAU;AAAA,EACjB;AAAA;AAAA;AAAA;AAAA;AAAA,EAOQ,eAAqB;AAE3B,SAAK,IAAI,QAAQ;AACjB,SAAK,IAAI,QAAQ;AAEjB,UAAM,QAAQ,IAAI,aAAa,IAAI,UAAU,EAAE,KAAK,CAAC;AACrD,SAAK,KAAK,IAAQ,WAAO,WAAW,MAAM,MAAM,GAAG,UAAU,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC;AAC1E,SAAK,KAAK,IAAQ,WAAO,WAAW,MAAM,MAAM,UAAU,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC;AAAA,EACzE;AAAA;AAAA;AAAA;AAAA,EAMQ,QAAQ,MAAuB;AACrC,QAAI,CAAC,KAAK,OAAQ;AAElB,YAAQ,IAAI,YAAY,GAAG,IAAI;AAAA,EACjC;AACF;AAeA,eAAsB,UAAU,SAA0C;AACxE,QAAM,MAAM,IAAI,UAAU,OAAO;AACjC,QAAM,IAAI,KAAK;AACf,SAAO;AACT;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@guidekit/vad",
|
|
3
|
-
"version": "0.1.0-beta.
|
|
3
|
+
"version": "0.1.0-beta.2",
|
|
4
4
|
"description": "Silero VAD model for GuideKit SDK — voice activity detection via ONNX Runtime",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -16,17 +16,9 @@
|
|
|
16
16
|
"files": [
|
|
17
17
|
"dist",
|
|
18
18
|
"models",
|
|
19
|
-
"
|
|
19
|
+
"LICENSE",
|
|
20
20
|
"README.md"
|
|
21
21
|
],
|
|
22
|
-
"scripts": {
|
|
23
|
-
"build": "tsup",
|
|
24
|
-
"dev": "tsup --watch",
|
|
25
|
-
"test": "echo 'No tests yet'",
|
|
26
|
-
"test:unit": "echo 'No tests yet'",
|
|
27
|
-
"typecheck": "tsc --noEmit",
|
|
28
|
-
"clean": "rm -rf dist"
|
|
29
|
-
},
|
|
30
22
|
"dependencies": {
|
|
31
23
|
"onnxruntime-web": "^1.21.0"
|
|
32
24
|
},
|
|
@@ -36,5 +28,34 @@
|
|
|
36
28
|
"vitest": "^3.0.0"
|
|
37
29
|
},
|
|
38
30
|
"sideEffects": false,
|
|
39
|
-
"license": "MIT"
|
|
40
|
-
|
|
31
|
+
"license": "MIT",
|
|
32
|
+
"author": "Riazul Islam",
|
|
33
|
+
"keywords": [
|
|
34
|
+
"guidekit",
|
|
35
|
+
"vad",
|
|
36
|
+
"voice-activity-detection",
|
|
37
|
+
"silero",
|
|
38
|
+
"onnx",
|
|
39
|
+
"audio"
|
|
40
|
+
],
|
|
41
|
+
"engines": {
|
|
42
|
+
"node": ">=18"
|
|
43
|
+
},
|
|
44
|
+
"repository": {
|
|
45
|
+
"type": "git",
|
|
46
|
+
"url": "https://github.com/riaz37/guidekit.git",
|
|
47
|
+
"directory": "packages/vad"
|
|
48
|
+
},
|
|
49
|
+
"homepage": "https://github.com/riaz37/guidekit#readme",
|
|
50
|
+
"bugs": {
|
|
51
|
+
"url": "https://github.com/riaz37/guidekit/issues"
|
|
52
|
+
},
|
|
53
|
+
"scripts": {
|
|
54
|
+
"build": "tsup",
|
|
55
|
+
"dev": "tsup --watch",
|
|
56
|
+
"test": "echo 'No tests yet'",
|
|
57
|
+
"test:unit": "echo 'No tests yet'",
|
|
58
|
+
"typecheck": "tsc --noEmit",
|
|
59
|
+
"clean": "rm -rf dist"
|
|
60
|
+
}
|
|
61
|
+
}
|