osense-vad 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/LICENSE.md +21 -0
  2. package/README.md +403 -0
  3. package/dist/common/asset-path.d.ts +2 -0
  4. package/dist/common/asset-path.d.ts.map +1 -0
  5. package/dist/common/asset-path.js +17 -0
  6. package/dist/common/asset-path.js.map +1 -0
  7. package/dist/common/default-model-fetcher.d.ts +2 -0
  8. package/dist/common/default-model-fetcher.d.ts.map +1 -0
  9. package/dist/common/default-model-fetcher.js +8 -0
  10. package/dist/common/default-model-fetcher.js.map +1 -0
  11. package/dist/common/frame-processor.d.ts +86 -0
  12. package/dist/common/frame-processor.d.ts.map +1 -0
  13. package/dist/common/frame-processor.js +180 -0
  14. package/dist/common/frame-processor.js.map +1 -0
  15. package/dist/common/index.d.ts +13 -0
  16. package/dist/common/index.d.ts.map +1 -0
  17. package/dist/common/index.js +53 -0
  18. package/dist/common/index.js.map +1 -0
  19. package/dist/common/logging.d.ts +8 -0
  20. package/dist/common/logging.d.ts.map +1 -0
  21. package/dist/common/logging.js +16 -0
  22. package/dist/common/logging.js.map +1 -0
  23. package/dist/common/messages.d.ts +10 -0
  24. package/dist/common/messages.d.ts.map +1 -0
  25. package/dist/common/messages.js +14 -0
  26. package/dist/common/messages.js.map +1 -0
  27. package/dist/common/models/common.d.ts +14 -0
  28. package/dist/common/models/common.d.ts.map +1 -0
  29. package/dist/common/models/common.js +3 -0
  30. package/dist/common/models/common.js.map +1 -0
  31. package/dist/common/models/index.d.ts +9 -0
  32. package/dist/common/models/index.d.ts.map +1 -0
  33. package/dist/common/models/index.js +11 -0
  34. package/dist/common/models/index.js.map +1 -0
  35. package/dist/common/models/legacy.d.ts +14 -0
  36. package/dist/common/models/legacy.d.ts.map +1 -0
  37. package/dist/common/models/legacy.js +52 -0
  38. package/dist/common/models/legacy.js.map +1 -0
  39. package/dist/common/models/v5.d.ts +13 -0
  40. package/dist/common/models/v5.d.ts.map +1 -0
  41. package/dist/common/models/v5.js +45 -0
  42. package/dist/common/models/v5.js.map +1 -0
  43. package/dist/common/models/v6.d.ts +22 -0
  44. package/dist/common/models/v6.d.ts.map +1 -0
  45. package/dist/common/models/v6.js +54 -0
  46. package/dist/common/models/v6.js.map +1 -0
  47. package/dist/common/non-real-time-vad.d.ts +23 -0
  48. package/dist/common/non-real-time-vad.d.ts.map +1 -0
  49. package/dist/common/non-real-time-vad.js +127 -0
  50. package/dist/common/non-real-time-vad.js.map +1 -0
  51. package/dist/common/resampler.d.ts +16 -0
  52. package/dist/common/resampler.d.ts.map +1 -0
  53. package/dist/common/resampler.js +63 -0
  54. package/dist/common/resampler.js.map +1 -0
  55. package/dist/common/utils.d.ts +8 -0
  56. package/dist/common/utils.d.ts.map +1 -0
  57. package/dist/common/utils.js +118 -0
  58. package/dist/common/utils.js.map +1 -0
  59. package/dist/index.d.ts +13 -0
  60. package/dist/index.d.ts.map +1 -0
  61. package/dist/index.js +74 -0
  62. package/dist/index.js.map +1 -0
  63. package/dist/real-time-vad.d.ts +63 -0
  64. package/dist/real-time-vad.d.ts.map +1 -0
  65. package/dist/real-time-vad.js +171 -0
  66. package/dist/real-time-vad.js.map +1 -0
  67. package/dist/silero_vad_legacy.onnx +0 -0
  68. package/dist/silero_vad_v5.onnx +0 -0
  69. package/package.json +66 -0
  70. package/silero_vad_legacy.onnx +0 -0
  71. package/silero_vad_v5.onnx +0 -0
  72. package/silero_vad_v6.onnx +0 -0
package/LICENSE.md ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Agent Voice Response
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,403 @@
1
+ # Agent Voice Response - AVR VAD - Silero Voice Activity Detection for Node.js
2
+
3
+ [![Discord](https://img.shields.io/discord/1347239846632226998?label=Discord&logo=discord)](https://discord.gg/DFTU69Hg74)
4
+ [![GitHub Repo stars](https://img.shields.io/github/stars/agentvoiceresponse/avr-vad?style=social)](https://github.com/agentvoiceresponse/avr-vad)
5
+ [![npm version](https://img.shields.io/npm/v/avr-vad.svg)](https://www.npmjs.com/package/avr-vad)
6
+ [![npm downloads](https://img.shields.io/npm/dm/avr-vad.svg)](https://www.npmjs.com/package/avr-vad)
7
+ [![Ko-fi](https://img.shields.io/badge/Support%20us%20on-Ko--fi-ff5e5b.svg)](https://ko-fi.com/agentvoiceresponse)
8
+
9
+ 🎤 A Node.js library for Voice Activity Detection using the Silero VAD model.
10
+
11
+ ## ✨ Features
12
+
13
+ - 🚀 **Based on Silero VAD**: Uses the pre-trained Silero ONNX model (v5 and legacy versions) for accurate results
14
+ - 🎯 **Real-time processing**: Supports real-time frame-by-frame processing
15
+ - ⚡ **Non-real-time processing**: Batch processing for audio files and streams
16
+ - 🔧 **Configurable**: Customizable thresholds and parameters for different needs
17
+ - 🎵 **Audio processing**: Includes utilities for resampling and audio manipulation
18
+ - 📊 **Multiple models**: Support for both Silero VAD v5 and legacy models
19
+ - 💾 **Bundled models**: Models are included in the package, no external downloads required
20
+ - 📝 **TypeScript**: Fully typed with TypeScript
21
+
22
+ ## 🚀 Installation
23
+
24
+ ```bash
25
+ npm install avr-vad
26
+ ```
27
+
28
+ ## 📖 Quick Start
29
+
30
+ ### Real-time Processing
31
+
32
+ ```typescript
33
+ import { RealTimeVAD } from 'avr-vad';
34
+
35
+ // Initialize the VAD with default options (Silero v5 model)
36
+ const vad = await RealTimeVAD.new({
37
+ model: 'v5', // or 'legacy'
38
+ positiveSpeechThreshold: 0.5,
39
+ negativeSpeechThreshold: 0.35,
40
+ preSpeechPadFrames: 1,
41
+ redemptionFrames: 8,
42
+ frameSamples: 1536,
43
+ minSpeechFrames: 3
44
+ });
45
+
46
+ // Process audio frames in real-time
47
+ const audioFrame = getAudioFrameFromMicrophone(); // Float32Array of 1536 samples at 16kHz
48
+ const result = await vad.processFrame(audioFrame);
49
+
50
+ console.log(`Speech probability: ${result.probability}`);
51
+ console.log(`Speech detected: ${result.msg === 'SPEECH_START' || result.msg === 'SPEECH_CONTINUE'}`);
52
+
53
+ // Clean up when done
54
+ vad.destroy();
55
+ ```
56
+
57
+ ### Non-Real-time Processing
58
+
59
+ ```typescript
60
+ import { NonRealTimeVAD } from 'avr-vad';
61
+
62
+ // Initialize for batch processing
63
+ const vad = await NonRealTimeVAD.new({
64
+ model: 'v5',
65
+ positiveSpeechThreshold: 0.5,
66
+ negativeSpeechThreshold: 0.35
67
+ });
68
+
69
+ // Process entire audio buffer
70
+ const audioData = loadAudioData(); // Float32Array at 16kHz
71
+ const results = await vad.processAudio(audioData);
72
+
73
+ // Get speech segments
74
+ const speechSegments = vad.getSpeechSegments(results);
75
+ console.log(`Found ${speechSegments.length} speech segments`);
76
+
77
+ speechSegments.forEach((segment, i) => {
78
+ console.log(`Segment ${i + 1}: ${segment.start}ms - ${segment.end}ms`);
79
+ });
80
+
81
+ // Clean up
82
+ vad.destroy();
83
+ ```
84
+
85
+ ## ⚙️ Configuration
86
+
87
+ ### Real-time VAD Options
88
+
89
+ ```typescript
90
+ interface RealTimeVADOptions {
91
+ /** Model version to use ('v5' | 'legacy') */
92
+ model?: 'v5' | 'legacy';
93
+
94
+ /** Threshold for detecting speech start */
95
+ positiveSpeechThreshold?: number;
96
+
97
+ /** Threshold for detecting speech end */
98
+ negativeSpeechThreshold?: number;
99
+
100
+ /** Frames to include before speech detection */
101
+ preSpeechPadFrames?: number;
102
+
103
+ /** Frames to wait before ending speech */
104
+ redemptionFrames?: number;
105
+
106
+ /** Number of samples per frame (usually 1536 for 16kHz) */
107
+ frameSamples?: number;
108
+
109
+ /** Minimum frames for valid speech */
110
+ minSpeechFrames?: number;
111
+ }
112
+ ```
113
+
114
+ ### Non-Real-time VAD Options
115
+
116
+ ```typescript
117
+ interface NonRealTimeVADOptions {
118
+ /** Model version to use ('v5' | 'legacy') */
119
+ model?: 'v5' | 'legacy';
120
+
121
+ /** Threshold for detecting speech start */
122
+ positiveSpeechThreshold?: number;
123
+
124
+ /** Threshold for detecting speech end */
125
+ negativeSpeechThreshold?: number;
126
+ }
127
+ ```
128
+
129
+ ### Default Values
130
+
131
+ ```typescript
132
+ // Real-time VAD defaults
133
+ const defaultRealTimeOptions = {
134
+ model: 'v5',
135
+ positiveSpeechThreshold: 0.5,
136
+ negativeSpeechThreshold: 0.35,
137
+ preSpeechPadFrames: 1,
138
+ redemptionFrames: 8,
139
+ frameSamples: 1536,
140
+ minSpeechFrames: 3
141
+ };
142
+
143
+ // Non-real-time VAD defaults
144
+ const defaultNonRealTimeOptions = {
145
+ model: 'v5',
146
+ positiveSpeechThreshold: 0.5,
147
+ negativeSpeechThreshold: 0.35
148
+ };
149
+ ```
150
+
151
+ ## 📊 Results and Messages
152
+
153
+ ### VAD Messages
154
+
155
+ The VAD returns different message types to indicate speech state changes:
156
+
157
+ ```typescript
158
+ enum Message {
159
+ ERROR = 'ERROR',
160
+ SPEECH_START = 'SPEECH_START',
161
+ SPEECH_CONTINUE = 'SPEECH_CONTINUE',
162
+ SPEECH_END = 'SPEECH_END',
163
+ SILENCE = 'SILENCE'
164
+ }
165
+ ```
166
+
167
+ ### Processing Results
168
+
169
+ ```typescript
170
+ interface VADResult {
171
+ /** Speech probability (0.0 - 1.0) */
172
+ probability: number;
173
+
174
+ /** Message indicating speech state */
175
+ msg: Message;
176
+
177
+ /** Audio data if speech segment ended */
178
+ audio?: Float32Array;
179
+ }
180
+ ```
181
+
182
+ ### Speech Segments
183
+
184
+ ```typescript
185
+ interface SpeechSegment {
186
+ /** Start time in milliseconds */
187
+ start: number;
188
+
189
+ /** End time in milliseconds */
190
+ end: number;
191
+
192
+ /** Speech probability for this segment */
193
+ probability: number;
194
+ }
195
+ ```
196
+
197
+ ## 🔧 Audio Utilities
198
+
199
+ The library includes various audio processing utilities:
200
+
201
+ ```typescript
202
+ import { utils, Resampler } from 'avr-vad';
203
+
204
+ // Resample audio to 16kHz (required for VAD)
205
+ const resampler = new Resampler({
206
+ nativeSampleRate: 44100,
207
+ targetSampleRate: 16000,
208
+ targetFrameSize: 1536
209
+ });
210
+
211
+ const resampledFrame = resampler.process(audioFrame);
212
+
213
+ // Other utilities
214
+ const frameSize = utils.frameSize; // Get frame size for current sample rate
215
+ const audioBuffer = utils.concatArrays([frame1, frame2]); // Concatenate audio arrays
216
+ ```
217
+
218
+ ## 🎯 Advanced Examples
219
+
220
+ ### Real-time Speech Detection with Callbacks
221
+
222
+ ```typescript
223
+ import { RealTimeVAD, Message } from 'avr-vad';
224
+
225
+ class SpeechDetector {
226
+ private vad: RealTimeVAD;
227
+ private onSpeechStart?: (audio: Float32Array) => void;
228
+ private onSpeechEnd?: (audio: Float32Array) => void;
229
+
230
+ constructor(callbacks: {
231
+ onSpeechStart?: (audio: Float32Array) => void;
232
+ onSpeechEnd?: (audio: Float32Array) => void;
233
+ }) {
234
+ this.onSpeechStart = callbacks.onSpeechStart;
235
+ this.onSpeechEnd = callbacks.onSpeechEnd;
236
+ }
237
+
238
+ async initialize() {
239
+ this.vad = await RealTimeVAD.new({
240
+ positiveSpeechThreshold: 0.5,
241
+ negativeSpeechThreshold: 0.35
242
+ onSpeechStart: this.onSpeechStart,
243
+ onSpeechEnd: this.onSpeechEnd
244
+ });
245
+ }
246
+
247
+ async processFrame(audioFrame: Float32Array) {
248
+ const result = await this.vad.processFrame(audioFrame);
249
+ return result;
250
+ }
251
+
252
+ destroy() {
253
+ this.vad?.destroy();
254
+ }
255
+ }
256
+
257
+ // Usage
258
+ const detector = new SpeechDetector({
259
+ onSpeechStart: (audio) => console.log(`Speech started with ${audio.length} samples`),
260
+ onSpeechEnd: (audio) => console.log(`Speech ended with ${audio.length} samples`)
261
+ });
262
+
263
+ await detector.initialize();
264
+ ```
265
+
266
+ ### Batch Processing Audio File
267
+
268
+ ```typescript
269
+ import { NonRealTimeVAD, utils } from 'avr-vad';
270
+ import * as fs from 'fs';
271
+
272
+ async function processAudioFile(filePath: string) {
273
+ // Load audio data (you'll need your own audio loading logic)
274
+ const audioData = loadWavFile(filePath); // Float32Array at 16kHz
275
+
276
+ const vad = await NonRealTimeVAD.new({
277
+ model: 'v5',
278
+ positiveSpeechThreshold: 0.6,
279
+ negativeSpeechThreshold: 0.4
280
+ });
281
+
282
+ const results = await vad.processAudio(audioData);
283
+ const segments = vad.getSpeechSegments(results);
284
+
285
+ console.log(`Processing ${filePath}:`);
286
+ console.log(`Total audio duration: ${(audioData.length / 16000).toFixed(2)}s`);
287
+ console.log(`Speech segments found: ${segments.length}`);
288
+
289
+ segments.forEach((segment, i) => {
290
+ const duration = ((segment.end - segment.start) / 1000).toFixed(2);
291
+ console.log(` Segment ${i + 1}: ${segment.start}ms - ${segment.end}ms (${duration}s)`);
292
+ });
293
+
294
+ vad.destroy();
295
+ return segments;
296
+ }
297
+ ```
298
+
299
+ ## 📝 Development
300
+
301
+ ### Requirements
302
+
303
+ - Node.js >= 16.0.0
304
+ - TypeScript >= 5.0.0
305
+
306
+ ### Build
307
+
308
+ ```bash
309
+ npm run build
310
+ ```
311
+
312
+ ### Test
313
+
314
+ ```bash
315
+ npm test
316
+ ```
317
+
318
+ ### Scripts
319
+
320
+ ```bash
321
+ npm run lint # Run ESLint
322
+ npm run clean # Clean build directory
323
+ npm run prepare # Build before npm install (automatically run)
324
+ ```
325
+
326
+ ## 📁 Project Structure
327
+
328
+ ```
329
+ avr-vad/
330
+ ├── src/
331
+ │ ├── index.ts # Main exports
332
+ │ ├── real-time-vad.ts # Real-time VAD implementation
333
+ │ └── common/
334
+ │ ├── index.ts # Common exports
335
+ │ ├── frame-processor.ts # Core ONNX processing
336
+ │ ├── non-real-time-vad.ts # Batch processing VAD
337
+ │ ├── utils.ts # Utility functions
338
+ │ ├── resampler.ts # Audio resampling
339
+ ├── dist/ # Compiled JavaScript
340
+ ├── test/ # Test files
341
+ ├── silero_vad_v5.onnx # Silero VAD v5 model
342
+ ├── silero_vad_legacy.onnx # Silero VAD legacy model
343
+ └── package.json
344
+ ```
345
+
346
+ ## 🔧 Troubleshooting
347
+
348
+ ### Audio Format Requirements
349
+
350
+ The Silero VAD model requires:
351
+ - **Sample rate**: 16kHz
352
+ - **Channels**: Mono (single channel)
353
+ - **Format**: Float32Array with values between -1.0 and 1.0
354
+ - **Frame size**: 1536 samples (96ms at 16kHz)
355
+
356
+ ### Model Selection
357
+
358
+ - **v5 model**: Latest version with improved accuracy
359
+ - **legacy model**: Original model for compatibility
360
+
361
+ Use the `Resampler` utility to convert audio to the required format:
362
+
363
+ ```typescript
364
+ import { Resampler } from 'avr-vad';
365
+
366
+ const resampler = new Resampler({
367
+ nativeSampleRate: 44100, // Your audio sample rate
368
+ targetSampleRate: 16000, // Required by VAD
369
+ targetFrameSize: 1536 // Required frame size
370
+ });
371
+ ```
372
+
373
+ ### Performance Tips
374
+
375
+ - Use appropriate thresholds for your use case
376
+ - Consider using the legacy model for lower resource usage
377
+ - For real-time applications, ensure your audio processing pipeline can handle 16kHz/1536 samples per frame
378
+ - Use `redemptionFrames` to avoid choppy speech detection
379
+
380
+ ## Acknowledgments
381
+
382
+ - [Silero Models](https://github.com/snakers4/silero-vad) for the excellent VAD model
383
+ - [ONNX Runtime](https://onnxruntime.ai/) for model inference
384
+ - The open source community for supporting libraries
385
+
386
+ ## Support & Community
387
+
388
+ * **Website:** [https://agentvoiceresponse.com](https://agentvoiceresponse.com) - Official website.
389
+ * **GitHub:** [https://github.com/agentvoiceresponse](https://github.com/agentvoiceresponse) - Report issues, contribute code.
390
+ * **Discord:** [https://discord.gg/DFTU69Hg74](https://discord.gg/DFTU69Hg74) - Join the community discussion.
391
+ * **Docker Hub:** [https://hub.docker.com/u/agentvoiceresponse](https://hub.docker.com/u/agentvoiceresponse) - Find Docker images.
392
+ * **NPM:** [https://www.npmjs.com/~agentvoiceresponse](https://www.npmjs.com/~agentvoiceresponse) - Browse our packages.
393
+ * **Wiki:** [https://wiki.agentvoiceresponse.com/en/home](https://wiki.agentvoiceresponse.com/en/home) - Project documentation and guides.
394
+
395
+ ## Support AVR
396
+
397
+ AVR is free and open-source. If you find it valuable, consider supporting its development:
398
+
399
+ <a href="https://ko-fi.com/agentvoiceresponse" target="_blank"><img src="https://ko-fi.com/img/githubbutton_sm.svg" alt="Support us on Ko-fi"></a>
400
+
401
+ ## License
402
+
403
+ MIT License - see the [LICENSE.md](LICENSE.md) file for details.
@@ -0,0 +1,2 @@
1
+ export declare const baseAssetPath: string;
2
+ //# sourceMappingURL=asset-path.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"asset-path.d.ts","sourceRoot":"","sources":["../../src/common/asset-path.ts"],"names":[],"mappings":"AAeA,eAAO,MAAM,aAAa,QAAW,CAAC"}
@@ -0,0 +1,17 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.baseAssetPath = void 0;
4
+ // nextjs@14 bundler may attempt to execute this during SSR and crash
5
+ const isWeb = typeof window !== "undefined" && typeof window.document !== "undefined";
6
+ const currentScript = isWeb
7
+ ? window.document.currentScript
8
+ : null;
9
+ let basePath = "/";
10
+ if (currentScript) {
11
+ basePath = currentScript.src
12
+ .replace(/#.*$/, "")
13
+ .replace(/\?.*$/, "")
14
+ .replace(/\/[^/]+$/, "/");
15
+ }
16
+ exports.baseAssetPath = basePath;
17
+ //# sourceMappingURL=asset-path.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"asset-path.js","sourceRoot":"","sources":["../../src/common/asset-path.ts"],"names":[],"mappings":";;;AAAA,qEAAqE;AACrE,MAAM,KAAK,GACV,OAAO,MAAM,KAAK,WAAW,IAAI,OAAO,MAAM,CAAC,QAAQ,KAAK,WAAW,CAAC;AACzE,MAAM,aAAa,GAAG,KAAK;IAC1B,CAAC,CAAE,MAAM,CAAC,QAAQ,CAAC,aAAmC;IACtD,CAAC,CAAC,IAAI,CAAC;AAER,IAAI,QAAQ,GAAG,GAAG,CAAC;AACnB,IAAI,aAAa,EAAE,CAAC;IACnB,QAAQ,GAAG,aAAa,CAAC,GAAG;SAC1B,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;SACnB,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;SACpB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;AAC5B,CAAC;AAEY,QAAA,aAAa,GAAG,QAAQ,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare const defaultModelFetcher: (path: string) => Promise<ArrayBuffer>;
2
+ //# sourceMappingURL=default-model-fetcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"default-model-fetcher.d.ts","sourceRoot":"","sources":["../../src/common/default-model-fetcher.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,mBAAmB,GAAI,MAAM,MAAM,yBAE/C,CAAC"}
@@ -0,0 +1,8 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.defaultModelFetcher = void 0;
4
+ const defaultModelFetcher = (path) => {
5
+ return fetch(path).then((model) => model.arrayBuffer());
6
+ };
7
+ exports.defaultModelFetcher = defaultModelFetcher;
8
+ //# sourceMappingURL=default-model-fetcher.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"default-model-fetcher.js","sourceRoot":"","sources":["../../src/common/default-model-fetcher.ts"],"names":[],"mappings":";;;AAAO,MAAM,mBAAmB,GAAG,CAAC,IAAY,EAAE,EAAE;IAClD,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;AAC1D,CAAC,CAAC;AAFW,QAAA,mBAAmB,uBAE9B"}
@@ -0,0 +1,86 @@
1
+ import { Message } from "./messages";
2
+ import type { SpeechProbabilities } from "./models";
3
+ export interface FrameProcessorOptions {
4
+ /** Threshold over which values returned by the Silero VAD model will be considered as positively indicating speech.
5
+ * The Silero VAD model is run on each frame. This number should be between 0 and 1.
6
+ */
7
+ positiveSpeechThreshold: number;
8
+ /** Threshold under which values returned by the Silero VAD model will be considered as indicating an absence of speech.
9
+ * Note that the creators of the Silero VAD have historically set this number at 0.15 less than `positiveSpeechThreshold`.
10
+ */
11
+ negativeSpeechThreshold: number;
12
+ /** After a VAD value under the `negativeSpeechThreshold` is observed, the algorithm will wait `redemptionFrames` frames
13
+ * before running `onSpeechEnd`. If the model returns a value over `positiveSpeechThreshold` during this grace period, then
14
+ * the algorithm will consider the previously-detected "speech end" as having been a false negative.
15
+ */
16
+ redemptionFrames: number;
17
+ /** Number of audio samples (under a sample rate of 16000) to comprise one "frame" to feed to the Silero VAD model.
18
+ * The `frame` serves as a unit of measurement of lengths of audio segments and many other parameters are defined in terms of
19
+ * frames. The authors of the Silero VAD model offer the following warning:
20
+ * > WARNING! Silero VAD models were trained using 512, 1024, 1536 samples for 16000 sample rate and 256, 512, 768 samples for 8000 sample rate.
21
+ * > Values other than these may affect model perfomance!!
22
+ * In this context, audio fed to the VAD model always has sample rate 16000. It is probably a good idea to leave this at 1536.
23
+ */
24
+ frameSamples: number;
25
+ /** Number of frames to prepend to the audio segment that will be passed to `onSpeechEnd`. */
26
+ preSpeechPadFrames: number;
27
+ /** If an audio segment is detected as a speech segment according to initial algorithm but it has fewer than `minSpeechFrames`,
28
+ * it will be discarded and `onVADMisfire` will be run instead of `onSpeechEnd`.
29
+ */
30
+ minSpeechFrames: number;
31
+ /**
32
+ * If true, when the user pauses the VAD, it may trigger `onSpeechEnd`.
33
+ */
34
+ submitUserSpeechOnPause: boolean;
35
+ }
36
+ export declare const defaultLegacyFrameProcessorOptions: FrameProcessorOptions;
37
+ export declare const defaultV5FrameProcessorOptions: FrameProcessorOptions;
38
+ /**
39
+ * Default options for Silero VAD v6.2
40
+ * Optimized for telephony and lower quality audio
41
+ */
42
+ export declare const defaultV6FrameProcessorOptions: FrameProcessorOptions;
43
+ export declare function validateOptions(options: FrameProcessorOptions): void;
44
+ export interface FrameProcessorInterface {
45
+ resume: () => void;
46
+ process: (arr: Float32Array, handleEvent: (event: FrameProcessorEvent) => any) => Promise<any>;
47
+ endSegment: (handleEvent: (event: FrameProcessorEvent) => any) => {
48
+ msg?: Message;
49
+ audio?: Float32Array;
50
+ };
51
+ }
52
+ export declare class FrameProcessor implements FrameProcessorInterface {
53
+ modelProcessFunc: (frame: Float32Array) => Promise<SpeechProbabilities>;
54
+ modelResetFunc: () => any;
55
+ options: FrameProcessorOptions;
56
+ speaking: boolean;
57
+ audioBuffer: {
58
+ frame: Float32Array;
59
+ isSpeech: boolean;
60
+ }[];
61
+ redemptionCounter: number;
62
+ speechFrameCount: number;
63
+ active: boolean;
64
+ speechRealStartFired: boolean;
65
+ constructor(modelProcessFunc: (frame: Float32Array) => Promise<SpeechProbabilities>, modelResetFunc: () => any, options: FrameProcessorOptions);
66
+ reset: () => void;
67
+ pause: (handleEvent: (event: FrameProcessorEvent) => any) => void;
68
+ resume: () => void;
69
+ endSegment: (handleEvent: (event: FrameProcessorEvent) => any) => {};
70
+ process: (frame: Float32Array, handleEvent: (event: FrameProcessorEvent) => any) => Promise<void>;
71
+ }
72
+ export type FrameProcessorEvent = {
73
+ msg: Message.VADMisfire;
74
+ } | {
75
+ msg: Message.SpeechStart;
76
+ } | {
77
+ msg: Message.SpeechRealStart;
78
+ } | {
79
+ msg: Message.SpeechEnd;
80
+ audio: Float32Array;
81
+ } | {
82
+ msg: Message.FrameProcessed;
83
+ probs: SpeechProbabilities;
84
+ frame: Float32Array;
85
+ };
86
+ //# sourceMappingURL=frame-processor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"frame-processor.d.ts","sourceRoot":"","sources":["../../src/common/frame-processor.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AACrC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,UAAU,CAAC;AAIpD,MAAM,WAAW,qBAAqB;IACpC;;OAEG;IACH,uBAAuB,EAAE,MAAM,CAAC;IAEhC;;OAEG;IACH,uBAAuB,EAAE,MAAM,CAAC;IAEhC;;;OAGG;IACH,gBAAgB,EAAE,MAAM,CAAC;IAEzB;;;;;;OAMG;IACH,YAAY,EAAE,MAAM,CAAC;IAErB,6FAA6F;IAC7F,kBAAkB,EAAE,MAAM,CAAC;IAE3B;;OAEG;IACH,eAAe,EAAE,MAAM,CAAC;IAExB;;OAEG;IACH,uBAAuB,EAAE,OAAO,CAAC;CAClC;AAED,eAAO,MAAM,kCAAkC,EAAE,qBAQhD,CAAC;AAEF,eAAO,MAAM,8BAA8B,EAAE,qBAQ5C,CAAC;AAEF;;;GAGG;AACH,eAAO,MAAM,8BAA8B,EAAE,qBAQ5C,CAAC;AAEF,wBAAgB,eAAe,CAAC,OAAO,EAAE,qBAAqB,QAwB7D;AAED,MAAM,WAAW,uBAAuB;IACtC,MAAM,EAAE,MAAM,IAAI,CAAC;IACnB,OAAO,EAAE,CACP,GAAG,EAAE,YAAY,EACjB,WAAW,EAAE,CAAC,KAAK,EAAE,mBAAmB,KAAK,GAAG,KAC7C,OAAO,CAAC,GAAG,CAAC,CAAC;IAClB,UAAU,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,EAAE,mBAAmB,KAAK,GAAG,KAAK;QAChE,GAAG,CAAC,EAAE,OAAO,CAAC;QACd,KAAK,CAAC,EAAE,YAAY,CAAC;KACtB,CAAC;CACH;AAkBD,qBAAa,cAAe,YAAW,uBAAuB;IASnD,gBAAgB,EAAE,CACvB,KAAK,EAAE,YAAY,KAChB,OAAO,CAAC,mBAAmB,CAAC;IAC1B,cAAc,EAAE,MAAM,GAAG;IACzB,OAAO,EAAE,qBAAqB;IAZvC,QAAQ,UAAS;IACjB,WAAW,EAAE;QAAE,KAAK,EAAE,YAAY,CAAC;QAAC,QAAQ,EAAE,OAAO,CAAA;KAAE,EAAE,CAAC;IAC1D,iBAAiB,SAAK;IACtB,gBAAgB,SAAK;IACrB,MAAM,UAAS;IACf,oBAAoB,UAAS;gBAGpB,gBAAgB,EAAE,CACvB,KAAK,EAAE,YAAY,KAChB,OAAO,CAAC,mBAAmB,CAAC,EAC1B,cAAc,EAAE,MAAM,GAAG,EACzB,OAAO,EAAE,qBAAqB;IAMvC,KAAK,aAOH;IAEF,KAAK,GAAI,aAAa,CAAC,KAAK,EAAE,mBAAmB,KAAK,GAAG,UAOvD;IAEF,MAAM,aAEJ;IAEF,UAAU,GAAI,aAAa,CAAC,KAAK,EAAE,mBAAmB,KAAK,GAAG,QAkB5D;IAEF,OAAO,GACL,OAAO,YAAY,EACnB,aAAa,CAAC,KAAK,EAAE,mBAAmB,KAAK,GAAG,mBAiEhD;CACH;AAED,MAAM,MAAM,mBAAmB,GAC3B;IACE,GAAG,EAAE,OAAO,CAAC,UAAU,CAAC;CACzB,GACD;IACE,GAAG,EAAE,OAAO,CAAC,WAAW,CAAC;CAC1B,GACD;IACE,GAAG,EAAE,OAAO,CAAC,eAAe,CAAC;CAC9B,GACD;IACE,GAAG,EAAE,OAAO,CAAC,SAAS,CAAC;IACvB,KAAK,EAAE,YAAY,CAAC;CACrB,GACD;IACE,GAAG,EAAE,OAAO,CAAC,cAAc,CAAC;IAC5B,KAAK,EAAE,mBAAmB,CAAC;IAC3B,KAAK,EAAE,YAAY,CAAC;CACrB,CAAC"}