@prabhjeet.me/wakeywakey 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts ADDED
@@ -0,0 +1,675 @@
1
+ import * as i0 from '@angular/core';
2
+ import { InjectionToken, OnInit, OnDestroy, EventEmitter } from '@angular/core';
3
+ import { Subject } from 'rxjs';
4
+ import * as _prabhjeet_me_wakeywakey from '@prabhjeet.me/wakeywakey';
5
+ import { InferenceSession } from 'onnxruntime-web';
6
+
7
+ /**
8
+ * Audio utility
9
+ */
10
+ declare class AudioUtil {
11
+ /**
12
+ * Create wav blob url from audio chunk
13
+ *
14
+ * @param chunks Audio chunk
15
+ * @param sampleRate Sample rate
16
+ * @returns
17
+ */
18
+ static createWavBlob(chunks: Float32Array[], sampleRate?: number): string | null;
19
+ }
20
+
21
+ interface OrbConfig {
22
+ /**
23
+ * Height and width (px)
24
+ */
25
+ size?: number;
26
+ }
27
+
28
+ interface AudioConfig {
29
+ /**
30
+ * Audio gain
31
+ */
32
+ gain: number;
33
+ /**
34
+ * Voice activity detection threshold
35
+ *
36
+ * Default: 0.5
37
+ */
38
+ vadThreshold?: number;
39
+ /**
40
+ * Silence duration
41
+ *
42
+ * Default: 1000ms
43
+ */
44
+ silenceDuration?: number;
45
+ /**
46
+ * Use RNN to suppress noise
47
+ */
48
+ noiseSuppression?: {
49
+ /**
50
+ * Enable noise suppression
51
+ */
52
+ enable?: boolean;
53
+ /**
54
+ * RNNoise worklet js
55
+ *
56
+ * Default: [basePath]/worklets/workletProcessor.js
57
+ */
58
+ worklet?: string;
59
+ /**
60
+ * RNNoise wasm file
61
+ *
62
+ * Default: [basePath]/wasm/rnnoise.wasm
63
+ */
64
+ rnnoise?: string;
65
+ /**
66
+ * RNNoise simd wasm file
67
+ *
68
+ * Default: [basePath]/wasm/rnnoise_simd.wasm
69
+ */
70
+ rnnoise_simd?: string;
71
+ };
72
+ /**
73
+ * Paths
74
+ */
75
+ sound?: {
76
+ /**
77
+ * Enable sound
78
+ *
79
+ * Default: true
80
+ */
81
+ enable?: boolean;
82
+ /**
83
+ * Path of sound to be played when wake word is detected
84
+ *
85
+ * Default: [basePath]/sounds/up.mp3
86
+ */
87
+ up?: string;
88
+ /**
89
+ * Path of sound to be played when system is done recording and silence is detected
90
+ *
91
+ * Default: [basePath]/sounds/down.mp3
92
+ */
93
+ down?: string;
94
+ };
95
+ }
96
+
97
+ interface OnnxConfig {
98
+ model: {
99
+ /**
100
+ * Mel Spectrogram model (.onnx)
101
+ *
102
+ * Default: [basePath]/models/melspectrogram.onnx
103
+ */
104
+ melspectrogram?: string;
105
+ /**
106
+ * Embedding model (.onnx)
107
+ *
108
+ * Default: [basePath]/models/embedding_model.onnx
109
+ */
110
+ embedding_model?: string;
111
+ /**
112
+ * Silero VAD (.onnx)
113
+ *
114
+ * Default: [basePath]/models/silero_vad_v4.onnx
115
+ */
116
+ silero_vad?: string;
117
+ /**
118
+ * Wake word model (.onnx)
119
+ */
120
+ wakeword: string;
121
+ };
122
+ /**
123
+ * Path that contains onnx wasm runtime files
124
+ *
125
+ * Default: [basePath]/wasm
126
+ */
127
+ runtimePath?: string;
128
+ /**
129
+ * Wake word inference threshold
130
+ *
131
+ * Default: 0.5
132
+ */
133
+ wakewordInferenceThreshold?: number;
134
+ }
135
+ type InferenceModels = keyof OnnxConfig['model'];
136
+
137
+ /**
138
+ * Wakey wakey configuration
139
+ */
140
+ interface Config {
141
+ /**
142
+ * Audio config
143
+ */
144
+ audio: AudioConfig;
145
+ /**
146
+ * Onnx config
147
+ */
148
+ onnx: OnnxConfig;
149
+ /**
150
+ * Orb config
151
+ */
152
+ orb?: OrbConfig;
153
+ /**
154
+ * In sliding window, there is a possibility of detecting wakeword mode than once.
155
+ * This allows a cool down time before processing subsequent detections
156
+ *
157
+ * Default: 1000 (1 seconds)
158
+ */
159
+ throttleTime?: number;
160
+ /**
161
+ * DEFAULT: [WAKEWORD] -> Start Recording -> Silence (Spoken chunk & transcript) -> Done -> [WAKEWORD] .....
162
+ * CHAT: [WAKEWORD] -> Start Recording -> Silence -> [Speaking] -> Start Recording -> Silence .....
163
+ */
164
+ mode?: 'DEFAULT' | 'VOICE_CHAT';
165
+ /**
166
+ * Base asset path. This path will be used to access required resources
167
+ *
168
+ * Default: /wakeywakey
169
+ */
170
+ basePath?: string;
171
+ }
172
+ /**
173
+ * Wakey wakey config token
174
+ */
175
+ declare const CONFIG: InjectionToken<Config>;
176
+
177
+ /**
178
+ * Microphone processor data
179
+ */
180
+ interface MicrophoneProcessorData {
181
+ /**
182
+ * Audio sample
183
+ */
184
+ sample: Float32Array;
185
+ /**
186
+ * RMS value of sample
187
+ */
188
+ rms: number;
189
+ /**
190
+ * Decibel of sample
191
+ */
192
+ db: number;
193
+ /**
194
+ * Normalized decibel (0-1)
195
+ */
196
+ dbNormalized: number;
197
+ }
198
+
199
+ /**
200
+ * Speech event emitter
201
+ */
202
+ interface SpeechEvent extends MicrophoneProcessorData {
203
+ /**
204
+ * VAD score of input
205
+ */
206
+ vadScore: number;
207
+ /**
208
+ * Has voice activity
209
+ */
210
+ get hasVoiceActivity(): boolean;
211
+ }
212
+ /**
213
+ * Wake work detected event
214
+ */
215
+ interface WakeWordEvent extends SpeechEvent {
216
+ /**
217
+ * Inference score
218
+ */
219
+ inferenceScore: number;
220
+ /**
221
+ * Chunk of detected wakeword
222
+ */
223
+ chunk: Float32Array[];
224
+ }
225
+ /**
226
+ * Silence event
227
+ */
228
+ interface SilenceEvent {
229
+ /**
230
+ * Chunk of detected wakeword
231
+ */
232
+ chunk: Float32Array;
233
+ /**
234
+ * Transcript of speech
235
+ */
236
+ transcript: string;
237
+ /**
238
+ * For DEFAULT mode: always false
239
+ * For VOICE_CHAT mode: true if constant chat is going on, false if stopped
240
+ */
241
+ interimResponse: boolean;
242
+ }
243
+
244
+ declare class WakeyWakeyComponent implements OnInit, OnDestroy {
245
+ /**
246
+ * Fires when library loaded
247
+ */
248
+ ready: EventEmitter<void>;
249
+ /**
250
+ * Fires when there is an error
251
+ */
252
+ exception: EventEmitter<Error>;
253
+ /**
254
+ * Fires when speech is detected
255
+ */
256
+ speech: EventEmitter<SpeechEvent>;
257
+ /**
258
+ * Fires when wake word is detected
259
+ */
260
+ wakeword: EventEmitter<WakeWordEvent>;
261
+ /**
262
+ * Fires when recording starts (after wake word detection)
263
+ */
264
+ recording: EventEmitter<void>;
265
+ /**
266
+ * Fires silence is detected
267
+ */
268
+ silence: EventEmitter<SilenceEvent>;
269
+ /**
270
+ * Dependencies
271
+ */
272
+ private readonly _config;
273
+ /**
274
+ * Dependencies
275
+ */
276
+ private readonly _platform;
277
+ private readonly _event;
278
+ private readonly _audio;
279
+ private readonly _model;
280
+ /**
281
+ * Subscriptions
282
+ */
283
+ private readonly _subs;
284
+ ngOnInit(): void;
285
+ /**
286
+ * Fire face wakeword event
287
+ */
288
+ fireWakeWord(): void;
289
+ ngOnDestroy(): void;
290
+ /**
291
+ * Execute
292
+ */
293
+ private _execute;
294
+ /**
295
+ * Listen events
296
+ */
297
+ private _listenEvents;
298
+ static ɵfac: i0.ɵɵFactoryDeclaration<WakeyWakeyComponent, never>;
299
+ static ɵcmp: i0.ɵɵComponentDeclaration<WakeyWakeyComponent, "wakeywakey", never, {}, { "ready": "ready"; "exception": "exception"; "speech": "speech"; "wakeword": "wakeword"; "recording": "recording"; "silence": "silence"; }, never, never, true, never>;
300
+ }
301
+
302
+ declare class AudioService implements OnDestroy {
303
+ /**
304
+ * Dependencies
305
+ */
306
+ private readonly __speaker;
307
+ private readonly _config;
308
+ private readonly _event;
309
+ private readonly _mic;
310
+ private readonly _vad;
311
+ private readonly _pipeline;
312
+ private readonly _speechRecognition;
313
+ private readonly _subs;
314
+ private _endCurrentRecording;
315
+ /**
316
+ * Recording state
317
+ */
318
+ private _isRecording;
319
+ /**
320
+ * Is process is initialized (detected wakeword)
321
+ */
322
+ private _isInitialized;
323
+ get isRecording(): boolean;
324
+ ngOnDestroy(): void;
325
+ /**
326
+ * Initialize audio
327
+ */
328
+ init(): Promise<void>;
329
+ /**
330
+ * Force start recording (without wakeword)
331
+ */
332
+ forceStartRecording(): void;
333
+ /**
334
+ * Force end recording
335
+ */
336
+ forceEndRecording(): void;
337
+ /**
338
+ * Toggle recording
339
+ */
340
+ toggleRecording(): void;
341
+ /**
342
+ * Identifies the wakeword and emits the event
343
+ */
344
+ private _listenForWakeword;
345
+ /**
346
+ * New logic: Captures the full command audio after a wakeword
347
+ */
348
+ private _captureCommandAfterWakeword;
349
+ /**
350
+ * Helper to flatten array of buffers into a single Float32Array
351
+ */
352
+ private _flatten;
353
+ /**
354
+ * Wakeword stream
355
+ * @returns
356
+ */
357
+ private _getWakeWordStream;
358
+ static ɵfac: i0.ɵɵFactoryDeclaration<AudioService, never>;
359
+ static ɵprov: i0.ɵɵInjectableDeclaration<AudioService>;
360
+ }
361
+
362
+ declare class MicrophoneService implements OnDestroy {
363
+ /**
364
+ * Dependencies
365
+ */
366
+ private readonly _event;
367
+ private readonly _config;
368
+ /**
369
+ * Audio data subject
370
+ */
371
+ private readonly _data;
372
+ /**
373
+ * List of available microphones
374
+ */
375
+ private _microphones;
376
+ /**
377
+ * Media steam
378
+ */
379
+ private _stream;
380
+ /**
381
+ * Audio context
382
+ */
383
+ private _audioContext?;
384
+ constructor();
385
+ /**
386
+ * List of available microphones
387
+ */
388
+ get microphones(): MediaDeviceInfo[];
389
+ /**
390
+ * Microphone data
391
+ */
392
+ get data(): Subject<MicrophoneProcessorData>;
393
+ /**
394
+ * Set input source
395
+ */
396
+ set source(deviceId: string);
397
+ ngOnDestroy(): void;
398
+ /**
399
+ * Initialize
400
+ *
401
+ * @param deviceId Input device id (from microphone list)
402
+ */
403
+ private _init;
404
+ /**
405
+ * Monitor audio
406
+ *
407
+ * @returns chunk subject
408
+ */
409
+ private _monitor;
410
+ /**
411
+ * Save microphones
412
+ */
413
+ private _microphoneList;
414
+ /**
415
+ * Prepare worklet node
416
+ */
417
+ private _workletNode;
418
+ static ɵfac: i0.ɵɵFactoryDeclaration<MicrophoneService, never>;
419
+ static ɵprov: i0.ɵɵInjectableDeclaration<MicrophoneService>;
420
+ }
421
+
422
+ declare class SpeakerService implements OnDestroy {
423
+ /**
424
+ * Dependencies
425
+ */
426
+ private readonly _config;
427
+ private readonly _platform;
428
+ private readonly _event;
429
+ private readonly _subs;
430
+ private _upSound;
431
+ private _downSound;
432
+ constructor();
433
+ ngOnDestroy(): void;
434
+ /**
435
+ * Play on sound
436
+ */
437
+ playUp(): void;
438
+ /**
439
+ * Play off sound
440
+ */
441
+ playDown(): void;
442
+ /**
443
+ * Load subscriptions
444
+ */
445
+ private _loadSubscriptions;
446
+ static ɵfac: i0.ɵɵFactoryDeclaration<SpeakerService, never>;
447
+ static ɵprov: i0.ɵɵInjectableDeclaration<SpeakerService>;
448
+ }
449
+
450
+ declare class SpeechRecognitionService implements OnDestroy {
451
+ /**
452
+ * Dependencies
453
+ */
454
+ private readonly _event;
455
+ private readonly _platform;
456
+ private _recognitionClass;
457
+ /**
458
+ * Instance
459
+ */
460
+ private _recognition;
461
+ /**
462
+ * Transcript
463
+ */
464
+ private _transcript;
465
+ /**
466
+ * Get transcript
467
+ */
468
+ get transcript(): string;
469
+ ngOnDestroy(): void;
470
+ /**
471
+ * Clear transcript
472
+ */
473
+ reset(): void;
474
+ init(): void;
475
+ static ɵfac: i0.ɵɵFactoryDeclaration<SpeechRecognitionService, never>;
476
+ static ɵprov: i0.ɵɵInjectableDeclaration<SpeechRecognitionService>;
477
+ }
478
+
479
+ declare class VadService {
480
+ /**
481
+ * Dependencies
482
+ */
483
+ private readonly _event;
484
+ private readonly _model;
485
+ /**
486
+ * VAD Shape
487
+ */
488
+ private _shape;
489
+ /**
490
+ * VAD LSTM hidden & cell state
491
+ */
492
+ private _state;
493
+ /**
494
+ * Get session
495
+ */
496
+ private get _session();
497
+ /**
498
+ * Initialize
499
+ */
500
+ init(): void;
501
+ /**
502
+ * Get VAD score
503
+ *
504
+ * @param chunk
505
+ * @returns
506
+ */
507
+ score(sample: Float32Array): Promise<number>;
508
+ /**
509
+ * Get shape of vad session
510
+ *
511
+ * Ex: [2, 1, 64]
512
+ */
513
+ private _getShape;
514
+ static ɵfac: i0.ɵɵFactoryDeclaration<VadService, never>;
515
+ static ɵprov: i0.ɵɵInjectableDeclaration<VadService>;
516
+ }
517
+
518
+ declare class ConfigService {
519
+ private readonly _config;
520
+ /**
521
+ * Audio config
522
+ */
523
+ get audio(): _prabhjeet_me_wakeywakey.WakeyWakeyAudioConfig;
524
+ /**
525
+ * Onnx config
526
+ */
527
+ get onnx(): _prabhjeet_me_wakeywakey.WakeyWakeyOnnxConfig;
528
+ /**
529
+ * Orb config
530
+ */
531
+ get orb(): OrbConfig | undefined;
532
+ /**
533
+ * Throttle time
534
+ */
535
+ get throttleTime(): number | undefined;
536
+ /**
537
+ * Mode
538
+ */
539
+ get mode(): "DEFAULT" | "VOICE_CHAT" | undefined;
540
+ /**
541
+ * Base path of assets
542
+ */
543
+ get basePath(): string;
544
+ static ɵfac: i0.ɵɵFactoryDeclaration<ConfigService, never>;
545
+ static ɵprov: i0.ɵɵInjectableDeclaration<ConfigService>;
546
+ }
547
+
548
+ declare class EventService {
549
+ /**
550
+ * Fires when library loaded
551
+ */
552
+ readonly ready: Subject<void>;
553
+ /**
554
+ * Fires when there is a message to log
555
+ */
556
+ readonly log: Subject<string>;
557
+ /**
558
+ * Fires when there is an error
559
+ */
560
+ readonly exception: Subject<Error>;
561
+ /**
562
+ * Fires when speech is detected
563
+ */
564
+ readonly speech: Subject<SpeechEvent>;
565
+ /**
566
+ * Fires when wake word is detected
567
+ */
568
+ readonly wakeword: Subject<WakeWordEvent>;
569
+ /**
570
+ * Fires when recording starts (after wake word detection)
571
+ */
572
+ readonly recording: Subject<void>;
573
+ /**
574
+ * Fires silence is detected
575
+ */
576
+ readonly silence: Subject<SilenceEvent>;
577
+ static ɵfac: i0.ɵɵFactoryDeclaration<EventService, never>;
578
+ static ɵprov: i0.ɵɵInjectableDeclaration<EventService>;
579
+ }
580
+
581
+ declare class ModelService {
582
+ /**
583
+ * Inference session
584
+ */
585
+ private _inferenceSession;
586
+ /**
587
+ * Get melspectrogram inference session
588
+ */
589
+ get melSpectrogram(): InferenceSession;
590
+ /**
591
+ * Get embedding inference session
592
+ */
593
+ get embedding(): InferenceSession;
594
+ /**
595
+ * Get Silero VAD inference session
596
+ */
597
+ get sileroVAD(): InferenceSession;
598
+ /**
599
+ * Get wakeword inference session
600
+ */
601
+ get wakeword(): InferenceSession;
602
+ /**
603
+ * Set session instance
604
+ */
605
+ set session(sessions: Record<InferenceModels, InferenceSession | undefined>);
606
+ static ɵfac: i0.ɵɵFactoryDeclaration<ModelService, never>;
607
+ static ɵprov: i0.ɵɵInjectableDeclaration<ModelService>;
608
+ }
609
+
610
+ declare class PipelineService {
611
+ /**
612
+ * Dependencies
613
+ */
614
+ private readonly _model;
615
+ private readonly MEL_WINDOW_SIZE;
616
+ private readonly MEL_HOP_SIZE;
617
+ private readonly EMBEDDING_COUNT;
618
+ private readonly FEATURE_DIM;
619
+ private readonly MEL_BINS;
620
+ /**
621
+ * Historical buffer of embeddings representing the last ~1-2 seconds of audio context.
622
+ * Initialized with empty (zero) vectors.
623
+ */
624
+ private readonly _embeddingQueue;
625
+ /**
626
+ * Buffer of calculated Mel Spectrogram frames waiting to be processed.
627
+ */
628
+ private readonly _melFrameQueue;
629
+ /**
630
+ * Main entry point: Processes a new chunk of audio and returns a detection score.
631
+ */
632
+ run(speech: SpeechEvent): Promise<number>;
633
+ /**
634
+ * STAGE 1: Converts raw audio samples into Mel Frequency bins.
635
+ */
636
+ private _generateMelSpectrogram;
637
+ /**
638
+ * STAGE 2: Extracts features (embeddings) from a window of Mel frames.
639
+ */
640
+ private _processWindowToEmbeddings;
641
+ /**
642
+ * STAGE 3: Final classification score based on temporal embedding sequence.
643
+ */
644
+ private _getWakeWordScore;
645
+ static ɵfac: i0.ɵɵFactoryDeclaration<PipelineService, never>;
646
+ static ɵprov: i0.ɵɵInjectableDeclaration<PipelineService>;
647
+ }
648
+
649
+ declare class PlatformService {
650
+ private readonly _platform;
651
+ /**
652
+ * Is browser
653
+ */
654
+ get isBrowser(): boolean;
655
+ /**
656
+ * Is server
657
+ */
658
+ get isServer(): boolean;
659
+ static ɵfac: i0.ɵɵFactoryDeclaration<PlatformService, never>;
660
+ static ɵprov: i0.ɵɵInjectableDeclaration<PlatformService>;
661
+ }
662
+
663
+ /**
664
+ * Provide wakey wakey configuration
665
+ *
666
+ * @param config Wakey Wakey configuration
667
+ * @returns
668
+ */
669
+ declare function provideWakeyWakey(config: Config): (typeof ConfigService | typeof EventService | typeof ModelService | typeof PipelineService | typeof MicrophoneService | typeof PlatformService | typeof SpeakerService | typeof SpeechRecognitionService | typeof VadService | typeof AudioService | i0.EnvironmentProviders | {
670
+ provide: i0.InjectionToken<Config>;
671
+ useValue: Config;
672
+ })[];
673
+
674
+ export { CONFIG as WAKEYWAKEY_CONFIG, AudioUtil as WakeyWakeyAudioUtil, WakeyWakeyComponent, provideWakeyWakey };
675
+ export type { AudioConfig as WakeyWakeyAudioConfig, Config as WakeyWakeyConfig, OnnxConfig as WakeyWakeyOnnxConfig, SilenceEvent as WakeyWakeySilenceEvent, SpeechEvent as WakeyWakeySpeechEvent, WakeWordEvent as WakeyWakeyWordEvent };
package/package.json ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "name": "@prabhjeet.me/wakeywakey",
3
+ "version": "1.0.0",
4
+ "repository": {
5
+ "url": "https://github.com/prabhjeet-me/WakeyWakey"
6
+ },
7
+ "peerDependencies": {
8
+ "@angular/common": "^20.3.0",
9
+ "@angular/core": "^20.3.0",
10
+ "onnxruntime-web": "^1.24.1",
11
+ "three": "^0.183.0"
12
+ },
13
+ "dependencies": {
14
+ "tslib": "^2.3.0"
15
+ },
16
+ "sideEffects": false,
17
+ "module": "fesm2022/prabhjeet.me-wakeywakey.mjs",
18
+ "typings": "index.d.ts",
19
+ "exports": {
20
+ "./package.json": {
21
+ "default": "./package.json"
22
+ },
23
+ ".": {
24
+ "types": "./index.d.ts",
25
+ "default": "./fesm2022/prabhjeet.me-wakeywakey.mjs"
26
+ }
27
+ }
28
+ }