@prabhjeet.me/wakeywakey 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -0
- package/assets/models/alexa_v0.1.onnx +0 -0
- package/assets/models/embedding_model.onnx +0 -0
- package/assets/models/hey_jarvis_v0.1.onnx +0 -0
- package/assets/models/hey_marvin_v0.1.onnx +0 -0
- package/assets/models/hey_mycroft_v0.1.onnx +0 -0
- package/assets/models/melspectrogram.onnx +0 -0
- package/assets/models/silero_vad_v4.onnx +0 -0
- package/assets/sounds/down.mp3 +0 -0
- package/assets/sounds/up.mp3 +0 -0
- package/assets/wasm/ort-wasm-simd-threaded.asyncify.mjs +116 -0
- package/assets/wasm/ort-wasm-simd-threaded.asyncify.wasm +0 -0
- package/assets/wasm/ort-wasm-simd-threaded.jsep.mjs +106 -0
- package/assets/wasm/ort-wasm-simd-threaded.jsep.wasm +0 -0
- package/assets/wasm/ort-wasm-simd-threaded.jspi.mjs +110 -0
- package/assets/wasm/ort-wasm-simd-threaded.jspi.wasm +0 -0
- package/assets/wasm/ort-wasm-simd-threaded.mjs +59 -0
- package/assets/wasm/ort-wasm-simd-threaded.wasm +0 -0
- package/assets/wasm/rnnoise.wasm +0 -0
- package/assets/wasm/rnnoise_simd.wasm +0 -0
- package/assets/worklets/workletProcessor.js +13 -0
- package/fesm2022/prabhjeet.me-wakeywakey.mjs +1411 -0
- package/fesm2022/prabhjeet.me-wakeywakey.mjs.map +1 -0
- package/index.d.ts +675 -0
- package/package.json +28 -0
|
@@ -0,0 +1,1411 @@
|
|
|
1
|
+
import * as i0 from '@angular/core';
|
|
2
|
+
import { InjectionToken, inject, Injectable, PLATFORM_ID, HostListener, ViewChild, Component, EventEmitter, Output, provideAppInitializer } from '@angular/core';
|
|
3
|
+
import { Subject, withLatestFrom, concatMap, filter, map, distinctUntilChanged, switchMap, EMPTY, tap, ignoreElements, timer, merge, take, throttleTime, share, delay, takeUntil, scan } from 'rxjs';
|
|
4
|
+
import { SubSink } from 'subsink';
|
|
5
|
+
import * as THREE from 'three';
|
|
6
|
+
import { Tensor, env, InferenceSession } from 'onnxruntime-web';
|
|
7
|
+
import { loadRnnoise, RnnoiseWorkletNode } from '@sapphi-red/web-noise-suppressor';
|
|
8
|
+
import { isPlatformBrowser, isPlatformServer } from '@angular/common';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Audio sample rate
|
|
12
|
+
*/
|
|
13
|
+
const SAMPLE_RATE = 16000;
|
|
14
|
+
/**
|
|
15
|
+
* Audio worklet name
|
|
16
|
+
*/
|
|
17
|
+
const MICROPHONE_PROCESSOR_NAME = 'microphone-buffer-processor';
|
|
18
|
+
/**
|
|
19
|
+
* Audio worklet processor
|
|
20
|
+
*/
|
|
21
|
+
const MICROPHONE_PROCESSOR = `
|
|
22
|
+
class MicrophoneBufferProcessor extends AudioWorkletProcessor {
|
|
23
|
+
// collect 1280 samples before sending them to the main thread
|
|
24
|
+
// Web Audio processes 128 frames per call
|
|
25
|
+
// batching 10 small chunks into 1 bigger chunk
|
|
26
|
+
frameBlockSize = 128 * 10; // total samples per emitted chunk
|
|
27
|
+
|
|
28
|
+
// for storing small chunks
|
|
29
|
+
accumulatedSamples = new Float32Array(this.frameBlockSize);
|
|
30
|
+
|
|
31
|
+
// cursor
|
|
32
|
+
writeIndex = 0;
|
|
33
|
+
|
|
34
|
+
constructor() {
|
|
35
|
+
super();
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
dbNormalized(db) {
|
|
39
|
+
const minDb = -60;
|
|
40
|
+
const maxDb = 0;
|
|
41
|
+
|
|
42
|
+
const clamped = Math.max(minDb, Math.min(db, maxDb));
|
|
43
|
+
return (clamped - minDb) / (maxDb - minDb);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// runs every 128 frames. ~ every 2.67 ms (at 48kHz), ~ 8ms (at 16KHz)
|
|
47
|
+
process(inputList) {
|
|
48
|
+
const firstInput = inputList[0];
|
|
49
|
+
|
|
50
|
+
// no input connected
|
|
51
|
+
if (!firstInput || firstInput.length === 0) {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// raw waveform values
|
|
56
|
+
const firstChannelSamples = firstInput[0]; // Float32Array(128)
|
|
57
|
+
|
|
58
|
+
let sum = 0;
|
|
59
|
+
|
|
60
|
+
// 0 -> 128
|
|
61
|
+
for (let i = 0; i < firstChannelSamples.length; i++) {
|
|
62
|
+
// dump each sample in accumulated sample array
|
|
63
|
+
this.accumulatedSamples[this.writeIndex++] = firstChannelSamples[i];
|
|
64
|
+
sum += firstChannelSamples[i] * firstChannelSamples[i];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// once buffer is full, break
|
|
68
|
+
if (this.writeIndex === this.frameBlockSize) {
|
|
69
|
+
const rms = Math.sqrt(sum / firstChannelSamples.length);
|
|
70
|
+
const db = 20 * Math.log10(rms ?? 0.00001);
|
|
71
|
+
this.writeIndex = 0;
|
|
72
|
+
|
|
73
|
+
// send to main thread
|
|
74
|
+
this.port.postMessage({
|
|
75
|
+
sample: this.accumulatedSamples.slice(),
|
|
76
|
+
rms,
|
|
77
|
+
db,
|
|
78
|
+
dbNormalized: this.dbNormalized(db),
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return true;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
registerProcessor("${MICROPHONE_PROCESSOR_NAME}", MicrophoneBufferProcessor);
|
|
87
|
+
`;
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Audio utility
|
|
91
|
+
*/
|
|
92
|
+
class AudioUtil {
|
|
93
|
+
/**
|
|
94
|
+
* Create wav blob url from audio chunk
|
|
95
|
+
*
|
|
96
|
+
* @param chunks Audio chunk
|
|
97
|
+
* @param sampleRate Sample rate
|
|
98
|
+
* @returns
|
|
99
|
+
*/
|
|
100
|
+
static createWavBlob(chunks, sampleRate = SAMPLE_RATE) {
|
|
101
|
+
const totalLength = chunks.reduce((len, chunk) => len + chunk.length, 0);
|
|
102
|
+
if (!totalLength)
|
|
103
|
+
return null;
|
|
104
|
+
// Merge chunks
|
|
105
|
+
const combined = new Float32Array(totalLength);
|
|
106
|
+
let offset = 0;
|
|
107
|
+
for (const chunk of chunks) {
|
|
108
|
+
combined.set(chunk, offset);
|
|
109
|
+
offset += chunk.length;
|
|
110
|
+
}
|
|
111
|
+
// Convert to 16-bit PCM
|
|
112
|
+
const pcmData = new Int16Array(totalLength);
|
|
113
|
+
for (let i = 0; i < totalLength; i++) {
|
|
114
|
+
const s = Math.max(-1, Math.min(1, combined[i]));
|
|
115
|
+
pcmData[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
|
|
116
|
+
}
|
|
117
|
+
const wavHeader = new ArrayBuffer(44);
|
|
118
|
+
const view = new DataView(wavHeader);
|
|
119
|
+
const channels = 1;
|
|
120
|
+
const bitsPerSample = 16;
|
|
121
|
+
const byteRate = sampleRate * channels * (bitsPerSample / 8);
|
|
122
|
+
const blockAlign = channels * (bitsPerSample / 8);
|
|
123
|
+
const writeString = (offset, str) => {
|
|
124
|
+
for (let i = 0; i < str.length; i++) {
|
|
125
|
+
view.setUint8(offset + i, str.charCodeAt(i));
|
|
126
|
+
}
|
|
127
|
+
};
|
|
128
|
+
writeString(0, 'RIFF');
|
|
129
|
+
view.setUint32(4, 36 + pcmData.byteLength, true);
|
|
130
|
+
writeString(8, 'WAVE');
|
|
131
|
+
writeString(12, 'fmt ');
|
|
132
|
+
view.setUint32(16, 16, true);
|
|
133
|
+
view.setUint16(20, 1, true); // PCM
|
|
134
|
+
view.setUint16(22, channels, true);
|
|
135
|
+
view.setUint32(24, sampleRate, true);
|
|
136
|
+
view.setUint32(28, byteRate, true);
|
|
137
|
+
view.setUint16(32, blockAlign, true);
|
|
138
|
+
view.setUint16(34, bitsPerSample, true);
|
|
139
|
+
writeString(36, 'data');
|
|
140
|
+
view.setUint32(40, pcmData.byteLength, true);
|
|
141
|
+
const wavBlob = new Blob([wavHeader, pcmData], { type: 'audio/wav' });
|
|
142
|
+
return URL.createObjectURL(wavBlob);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Wakey wakey config token
|
|
148
|
+
*/
|
|
149
|
+
const CONFIG = new InjectionToken('WAKEYWAKEY_CONFIG');
|
|
150
|
+
|
|
151
|
+
class ConfigService {
|
|
152
|
+
constructor() {
|
|
153
|
+
this._config = inject(CONFIG);
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Audio config
|
|
157
|
+
*/
|
|
158
|
+
get audio() {
|
|
159
|
+
return this._config.audio;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Onnx config
|
|
163
|
+
*/
|
|
164
|
+
get onnx() {
|
|
165
|
+
return this._config.onnx;
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Orb config
|
|
169
|
+
*/
|
|
170
|
+
get orb() {
|
|
171
|
+
return this._config.orb;
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Throttle time
|
|
175
|
+
*/
|
|
176
|
+
get throttleTime() {
|
|
177
|
+
return this._config.throttleTime;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Mode
|
|
181
|
+
*/
|
|
182
|
+
get mode() {
|
|
183
|
+
return this._config.mode;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Base path of assets
|
|
187
|
+
*/
|
|
188
|
+
get basePath() {
|
|
189
|
+
return this._config.basePath || '/wakeywakey';
|
|
190
|
+
}
|
|
191
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: ConfigService, deps: [], target: i0.ɵɵFactoryTarget.Injectable }); }
|
|
192
|
+
static { this.ɵprov = i0.ɵɵngDeclareInjectable({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: ConfigService }); }
|
|
193
|
+
}
|
|
194
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: ConfigService, decorators: [{
|
|
195
|
+
type: Injectable
|
|
196
|
+
}] });
|
|
197
|
+
|
|
198
|
+
class EventService {
|
|
199
|
+
constructor() {
|
|
200
|
+
/**
|
|
201
|
+
* Fires when library loaded
|
|
202
|
+
*/
|
|
203
|
+
this.ready = new Subject();
|
|
204
|
+
/**
|
|
205
|
+
* Fires when there is a message to log
|
|
206
|
+
*/
|
|
207
|
+
this.log = new Subject();
|
|
208
|
+
/**
|
|
209
|
+
* Fires when there is an error
|
|
210
|
+
*/
|
|
211
|
+
this.exception = new Subject();
|
|
212
|
+
/**
|
|
213
|
+
* Fires when speech is detected
|
|
214
|
+
*/
|
|
215
|
+
this.speech = new Subject();
|
|
216
|
+
/**
|
|
217
|
+
* Fires when wake word is detected
|
|
218
|
+
*/
|
|
219
|
+
this.wakeword = new Subject();
|
|
220
|
+
/**
|
|
221
|
+
* Fires when recording starts (after wake word detection)
|
|
222
|
+
*/
|
|
223
|
+
this.recording = new Subject();
|
|
224
|
+
/**
|
|
225
|
+
* Fires silence is detected
|
|
226
|
+
*/
|
|
227
|
+
this.silence = new Subject();
|
|
228
|
+
}
|
|
229
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: EventService, deps: [], target: i0.ɵɵFactoryTarget.Injectable }); }
|
|
230
|
+
static { this.ɵprov = i0.ɵɵngDeclareInjectable({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: EventService }); }
|
|
231
|
+
}
|
|
232
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: EventService, decorators: [{
|
|
233
|
+
type: Injectable
|
|
234
|
+
}] });
|
|
235
|
+
|
|
236
|
+
const DEFAULT_INFERENCE_SCORE = 0.5;
|
|
237
|
+
|
|
238
|
+
const VAD_HANGOVER_FRAMES = 12;
|
|
239
|
+
|
|
240
|
+
class ModelService {
|
|
241
|
+
constructor() {
|
|
242
|
+
/**
|
|
243
|
+
* Inference session
|
|
244
|
+
*/
|
|
245
|
+
this._inferenceSession = {
|
|
246
|
+
melspectrogram: undefined,
|
|
247
|
+
embedding_model: undefined,
|
|
248
|
+
silero_vad: undefined,
|
|
249
|
+
wakeword: undefined,
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Get melspectrogram inference session
|
|
254
|
+
*/
|
|
255
|
+
get melSpectrogram() {
|
|
256
|
+
return this._inferenceSession['melspectrogram'];
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Get embedding inference session
|
|
260
|
+
*/
|
|
261
|
+
get embedding() {
|
|
262
|
+
return this._inferenceSession['embedding_model'];
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Get Silero VAD inference session
|
|
266
|
+
*/
|
|
267
|
+
get sileroVAD() {
|
|
268
|
+
return this._inferenceSession['silero_vad'];
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Get wakeword inference session
|
|
272
|
+
*/
|
|
273
|
+
get wakeword() {
|
|
274
|
+
return this._inferenceSession['wakeword'];
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Set session instance
|
|
278
|
+
*/
|
|
279
|
+
set session(sessions) {
|
|
280
|
+
this._inferenceSession = sessions;
|
|
281
|
+
}
|
|
282
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: ModelService, deps: [], target: i0.ɵɵFactoryTarget.Injectable }); }
|
|
283
|
+
static { this.ɵprov = i0.ɵɵngDeclareInjectable({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: ModelService }); }
|
|
284
|
+
}
|
|
285
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: ModelService, decorators: [{
|
|
286
|
+
type: Injectable
|
|
287
|
+
}] });
|
|
288
|
+
|
|
289
|
+
class PipelineService {
|
|
290
|
+
constructor() {
|
|
291
|
+
/**
|
|
292
|
+
* Dependencies
|
|
293
|
+
*/
|
|
294
|
+
this._model = inject(ModelService);
|
|
295
|
+
// Configuration Constants
|
|
296
|
+
this.MEL_WINDOW_SIZE = 76; // Number of mel frames needed for one embedding
|
|
297
|
+
this.MEL_HOP_SIZE = 8; // How many frames to skip (the slide) after inference
|
|
298
|
+
this.EMBEDDING_COUNT = 16; // Number of embeddings kept in memory (temporal context)
|
|
299
|
+
this.FEATURE_DIM = 96; // Size of each embedding vector
|
|
300
|
+
this.MEL_BINS = 32; // Number of frequency bins per mel frame
|
|
301
|
+
/**
|
|
302
|
+
* Historical buffer of embeddings representing the last ~1-2 seconds of audio context.
|
|
303
|
+
* Initialized with empty (zero) vectors.
|
|
304
|
+
*/
|
|
305
|
+
this._embeddingQueue = Array.from({ length: this.EMBEDDING_COUNT }, () => new Float32Array(this.FEATURE_DIM).fill(0));
|
|
306
|
+
/**
|
|
307
|
+
* Buffer of calculated Mel Spectrogram frames waiting to be processed.
|
|
308
|
+
*/
|
|
309
|
+
this._melFrameQueue = [];
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Main entry point: Processes a new chunk of audio and returns a detection score.
|
|
313
|
+
*/
|
|
314
|
+
async run(speech) {
|
|
315
|
+
// 1. Convert raw PCM audio into Mel Spectrogram frames and add to queue
|
|
316
|
+
await this._generateMelSpectrogram(speech.sample);
|
|
317
|
+
let latestScore = 0;
|
|
318
|
+
// 2. Process the queue using a sliding window approach
|
|
319
|
+
// While we have enough frames to form a full input window (76 frames)...
|
|
320
|
+
while (this._melFrameQueue.length >= this.MEL_WINDOW_SIZE) {
|
|
321
|
+
// Extract a single feature vector (embedding) from the current window
|
|
322
|
+
const combinedEmbeddings = await this._processWindowToEmbeddings();
|
|
323
|
+
// Classify the sequence of embeddings to see if the wake-word is present
|
|
324
|
+
latestScore = await this._getWakeWordScore(combinedEmbeddings);
|
|
325
|
+
// Slide the window: Remove the oldest 8 frames to make room for new audio
|
|
326
|
+
this._melFrameQueue.splice(0, this.MEL_HOP_SIZE);
|
|
327
|
+
}
|
|
328
|
+
return latestScore;
|
|
329
|
+
}
|
|
330
|
+
/**
|
|
331
|
+
* STAGE 1: Converts raw audio samples into Mel Frequency bins.
|
|
332
|
+
*/
|
|
333
|
+
async _generateMelSpectrogram(samples) {
|
|
334
|
+
const session = this._model.melSpectrogram;
|
|
335
|
+
// Wrap raw audio in an ONNX Tensor [Batch: 1, Samples: N]
|
|
336
|
+
const inputTensor = new Tensor('float32', samples, [1, samples.length]);
|
|
337
|
+
// Run the Mel-Spectrogram model
|
|
338
|
+
const output = await session.run({ [session.inputNames[0]]: inputTensor });
|
|
339
|
+
const rawMelData = output[session.outputNames[0]].data;
|
|
340
|
+
/**
|
|
341
|
+
* Post-processing & Normalization:
|
|
342
|
+
* The model output is scaled to fit the expected input range of the embedding model.
|
|
343
|
+
* Logic: (value / 10) + 2.0
|
|
344
|
+
*/
|
|
345
|
+
const normalizedMel = rawMelData.map((val) => val / 10.0 + 2.0);
|
|
346
|
+
// Slice the flat output array into individual frames of 32 bins and queue them
|
|
347
|
+
// Each inference usually produces 5 frames for the given 1280 audio samples
|
|
348
|
+
for (let i = 0; i < 5; i++) {
|
|
349
|
+
const start = i * this.MEL_BINS;
|
|
350
|
+
const end = start + this.MEL_BINS;
|
|
351
|
+
this._melFrameQueue.push(normalizedMel.subarray(start, end));
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* STAGE 2: Extracts features (embeddings) from a window of Mel frames.
|
|
356
|
+
*/
|
|
357
|
+
async _processWindowToEmbeddings() {
|
|
358
|
+
const session = this._model.embedding;
|
|
359
|
+
// Take the first 76 frames from our queue
|
|
360
|
+
const windowFrames = this._melFrameQueue.slice(0, this.MEL_WINDOW_SIZE);
|
|
361
|
+
// Flatten the frames into a single continuous array for the Tensor
|
|
362
|
+
const flattenedInput = new Float32Array(this.MEL_WINDOW_SIZE * this.MEL_BINS);
|
|
363
|
+
windowFrames.forEach((frame, i) => {
|
|
364
|
+
flattenedInput.set(frame, i * this.MEL_BINS);
|
|
365
|
+
});
|
|
366
|
+
// Run the Embedding model [1, 76, 32, 1]
|
|
367
|
+
const inputTensor = new Tensor('float32', flattenedInput, [1, 76, 32, 1]);
|
|
368
|
+
const output = await session.run({ [session.inputNames[0]]: inputTensor });
|
|
369
|
+
const newEmbedding = output[session.outputNames[0]].data;
|
|
370
|
+
// Update the rolling embedding queue (First-In, First-Out)
|
|
371
|
+
this._embeddingQueue.shift();
|
|
372
|
+
this._embeddingQueue.push(new Float32Array(newEmbedding));
|
|
373
|
+
// Flatten the last 16 embeddings into one large vector for the final classifier
|
|
374
|
+
const combinedBuffer = new Float32Array(this.EMBEDDING_COUNT * this.FEATURE_DIM);
|
|
375
|
+
this._embeddingQueue.forEach((emb, i) => {
|
|
376
|
+
combinedBuffer.set(emb, i * this.FEATURE_DIM);
|
|
377
|
+
});
|
|
378
|
+
return combinedBuffer;
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* STAGE 3: Final classification score based on temporal embedding sequence.
|
|
382
|
+
*/
|
|
383
|
+
async _getWakeWordScore(embeddings) {
|
|
384
|
+
const session = this._model.wakeword;
|
|
385
|
+
// Shape: [Batch: 1, Sequence: 16, Features: 96]
|
|
386
|
+
const inputTensor = new Tensor('float32', embeddings, [
|
|
387
|
+
1,
|
|
388
|
+
this.EMBEDDING_COUNT,
|
|
389
|
+
this.FEATURE_DIM,
|
|
390
|
+
]);
|
|
391
|
+
const results = await session.run({ [session.inputNames[0]]: inputTensor });
|
|
392
|
+
// Extract the scalar probability score from the output tensor
|
|
393
|
+
return results[session.outputNames[0]].data[0];
|
|
394
|
+
}
|
|
395
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: PipelineService, deps: [], target: i0.ɵɵFactoryTarget.Injectable }); }
|
|
396
|
+
static { this.ɵprov = i0.ɵɵngDeclareInjectable({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: PipelineService }); }
|
|
397
|
+
}
|
|
398
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: PipelineService, decorators: [{
|
|
399
|
+
type: Injectable
|
|
400
|
+
}] });
|
|
401
|
+
|
|
402
|
+
const DEFAULT_SILENCE_DURATION = 1000;
|
|
403
|
+
|
|
404
|
+
class MicrophoneService {
|
|
405
|
+
constructor() {
|
|
406
|
+
/**
|
|
407
|
+
* Dependencies
|
|
408
|
+
*/
|
|
409
|
+
this._event = inject(EventService);
|
|
410
|
+
this._config = inject(ConfigService);
|
|
411
|
+
/**
|
|
412
|
+
* Audio data subject
|
|
413
|
+
*/
|
|
414
|
+
this._data = new Subject();
|
|
415
|
+
/**
|
|
416
|
+
* List of available microphones
|
|
417
|
+
*/
|
|
418
|
+
this._microphones = [];
|
|
419
|
+
// Init mic
|
|
420
|
+
this._init();
|
|
421
|
+
}
|
|
422
|
+
/**
|
|
423
|
+
* List of available microphones
|
|
424
|
+
*/
|
|
425
|
+
get microphones() {
|
|
426
|
+
return this._microphones;
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* Microphone data
|
|
430
|
+
*/
|
|
431
|
+
get data() {
|
|
432
|
+
return this._data;
|
|
433
|
+
}
|
|
434
|
+
/**
|
|
435
|
+
* Set input source
|
|
436
|
+
*/
|
|
437
|
+
set source(deviceId) {
|
|
438
|
+
this._init(deviceId);
|
|
439
|
+
}
|
|
440
|
+
ngOnDestroy() {
|
|
441
|
+
// close audio context
|
|
442
|
+
this._audioContext?.close();
|
|
443
|
+
}
|
|
444
|
+
/**
|
|
445
|
+
* Initialize
|
|
446
|
+
*
|
|
447
|
+
* @param deviceId Input device id (from microphone list)
|
|
448
|
+
*/
|
|
449
|
+
async _init(deviceId) {
|
|
450
|
+
try {
|
|
451
|
+
// cleanup
|
|
452
|
+
this.ngOnDestroy();
|
|
453
|
+
// request permission
|
|
454
|
+
this._stream = await navigator.mediaDevices.getUserMedia({
|
|
455
|
+
audio: !deviceId
|
|
456
|
+
? {
|
|
457
|
+
noiseSuppression: false,
|
|
458
|
+
echoCancellation: false,
|
|
459
|
+
}
|
|
460
|
+
: { deviceId: { exact: deviceId } },
|
|
461
|
+
});
|
|
462
|
+
this._event.log.next(`${MicrophoneService.name}: Microphone permission granted (deviceid: '${deviceId ?? 'default'}')!`);
|
|
463
|
+
// save list of microphones
|
|
464
|
+
this._microphones = await this._microphoneList();
|
|
465
|
+
// monitor audio
|
|
466
|
+
this._monitor();
|
|
467
|
+
return true;
|
|
468
|
+
}
|
|
469
|
+
catch (error) {
|
|
470
|
+
this._event.exception.next(error);
|
|
471
|
+
}
|
|
472
|
+
return false;
|
|
473
|
+
}
|
|
474
|
+
/**
|
|
475
|
+
* Monitor audio
|
|
476
|
+
*
|
|
477
|
+
* @returns chunk subject
|
|
478
|
+
*/
|
|
479
|
+
async _monitor() {
|
|
480
|
+
const worklet = await this._workletNode();
|
|
481
|
+
// on message
|
|
482
|
+
worklet.port.onmessage = async (event) => {
|
|
483
|
+
const data = event.data ?? null;
|
|
484
|
+
if (!data)
|
|
485
|
+
return;
|
|
486
|
+
// emit chunk
|
|
487
|
+
this._data.next(data);
|
|
488
|
+
};
|
|
489
|
+
return this._data;
|
|
490
|
+
}
|
|
491
|
+
/**
|
|
492
|
+
* Save microphones
|
|
493
|
+
*/
|
|
494
|
+
async _microphoneList() {
|
|
495
|
+
const devices = await navigator.mediaDevices.enumerateDevices();
|
|
496
|
+
return devices.filter((device) => device.kind === 'audioinput');
|
|
497
|
+
}
|
|
498
|
+
/**
|
|
499
|
+
* Prepare worklet node
|
|
500
|
+
*/
|
|
501
|
+
async _workletNode() {
|
|
502
|
+
// Create audio context
|
|
503
|
+
this._audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
|
|
504
|
+
if (this._config.audio.noiseSuppression) {
|
|
505
|
+
await this._audioContext.audioWorklet.addModule(this._config.audio.noiseSuppression.worklet ??
|
|
506
|
+
`${this._config.basePath}/worklets/workletProcessor.js`);
|
|
507
|
+
}
|
|
508
|
+
// Load custom worklet
|
|
509
|
+
const blob = new Blob([MICROPHONE_PROCESSOR], { type: 'application/javascript' });
|
|
510
|
+
const workletURL = URL.createObjectURL(blob);
|
|
511
|
+
await this._audioContext.audioWorklet.addModule(workletURL);
|
|
512
|
+
URL.revokeObjectURL(workletURL);
|
|
513
|
+
// Create Nodes
|
|
514
|
+
const source = this._audioContext.createMediaStreamSource(this._stream);
|
|
515
|
+
// Gain Node
|
|
516
|
+
const gainNode = this._audioContext.createGain();
|
|
517
|
+
gainNode.gain.value = this._config.audio.gain;
|
|
518
|
+
if (this._config.audio.noiseSuppression) {
|
|
519
|
+
// Load RNNoise dependencies
|
|
520
|
+
const rnnoiseWasmBinary = await loadRnnoise({
|
|
521
|
+
url: this._config.audio.noiseSuppression.rnnoise ??
|
|
522
|
+
`${this._config.basePath}/wasm/rnnoise.wasm`,
|
|
523
|
+
simdUrl: this._config.audio.noiseSuppression.rnnoise_simd ??
|
|
524
|
+
`${this._config.basePath}/wasm/rnnoise_simd.wasm`,
|
|
525
|
+
});
|
|
526
|
+
// RNNoise Node
|
|
527
|
+
const rnnoiseNode = new RnnoiseWorkletNode(this._audioContext, {
|
|
528
|
+
wasmBinary: rnnoiseWasmBinary,
|
|
529
|
+
maxChannels: 1, // Standard for mono microphone input
|
|
530
|
+
});
|
|
531
|
+
source.connect(rnnoiseNode);
|
|
532
|
+
rnnoiseNode.connect(gainNode);
|
|
533
|
+
}
|
|
534
|
+
else {
|
|
535
|
+
source.connect(gainNode);
|
|
536
|
+
}
|
|
537
|
+
// Custom Worklet Node
|
|
538
|
+
const workletNode = new AudioWorkletNode(this._audioContext, MICROPHONE_PROCESSOR_NAME);
|
|
539
|
+
// Connect the Graph: Source -> RNNoise (if noise suppression) -> Gain -> Custom Worklet
|
|
540
|
+
gainNode.connect(workletNode);
|
|
541
|
+
return workletNode;
|
|
542
|
+
}
|
|
543
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: MicrophoneService, deps: [], target: i0.ɵɵFactoryTarget.Injectable }); }
|
|
544
|
+
static { this.ɵprov = i0.ɵɵngDeclareInjectable({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: MicrophoneService }); }
|
|
545
|
+
}
|
|
546
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: MicrophoneService, decorators: [{
|
|
547
|
+
type: Injectable
|
|
548
|
+
}], ctorParameters: () => [] });
|
|
549
|
+
|
|
550
|
+
class PlatformService {
|
|
551
|
+
constructor() {
|
|
552
|
+
this._platform = inject(PLATFORM_ID);
|
|
553
|
+
}
|
|
554
|
+
/**
|
|
555
|
+
* Is browser
|
|
556
|
+
*/
|
|
557
|
+
get isBrowser() {
|
|
558
|
+
return isPlatformBrowser(this._platform);
|
|
559
|
+
}
|
|
560
|
+
/**
|
|
561
|
+
* Is server
|
|
562
|
+
*/
|
|
563
|
+
get isServer() {
|
|
564
|
+
return isPlatformServer(this._platform);
|
|
565
|
+
}
|
|
566
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: PlatformService, deps: [], target: i0.ɵɵFactoryTarget.Injectable }); }
|
|
567
|
+
static { this.ɵprov = i0.ɵɵngDeclareInjectable({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: PlatformService }); }
|
|
568
|
+
}
|
|
569
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: PlatformService, decorators: [{
|
|
570
|
+
type: Injectable
|
|
571
|
+
}] });
|
|
572
|
+
|
|
573
|
+
class SpeakerService {
|
|
574
|
+
constructor() {
|
|
575
|
+
/**
|
|
576
|
+
* Dependencies
|
|
577
|
+
*/
|
|
578
|
+
this._config = inject(ConfigService);
|
|
579
|
+
this._platform = inject(PlatformService);
|
|
580
|
+
this._event = inject(EventService);
|
|
581
|
+
this._subs = new SubSink();
|
|
582
|
+
if (this._config.audio.sound?.enable === false)
|
|
583
|
+
return;
|
|
584
|
+
// Audio is only available in browser context
|
|
585
|
+
if (this._platform.isBrowser) {
|
|
586
|
+
this._upSound = new Audio(this._config.audio.sound?.up ?? `${this._config.basePath}/sounds/up.mp3`);
|
|
587
|
+
this._downSound = new Audio(this._config.audio.sound?.down ?? `${this._config.basePath}/sounds/down.mp3`);
|
|
588
|
+
this._upSound.preload = this._downSound.preload = 'auto';
|
|
589
|
+
this._loadSubscriptions();
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
ngOnDestroy() {
|
|
593
|
+
this._subs.unsubscribe();
|
|
594
|
+
}
|
|
595
|
+
/**
|
|
596
|
+
* Play on sound
|
|
597
|
+
*/
|
|
598
|
+
playUp() {
|
|
599
|
+
if (this._config.audio.sound?.enable === false)
|
|
600
|
+
return;
|
|
601
|
+
this._upSound.play();
|
|
602
|
+
}
|
|
603
|
+
/**
|
|
604
|
+
* Play off sound
|
|
605
|
+
*/
|
|
606
|
+
playDown() {
|
|
607
|
+
if (this._config.audio.sound?.enable === false)
|
|
608
|
+
return;
|
|
609
|
+
this._downSound.play();
|
|
610
|
+
}
|
|
611
|
+
/**
|
|
612
|
+
* Load subscriptions
|
|
613
|
+
*/
|
|
614
|
+
_loadSubscriptions() {
|
|
615
|
+
this._subs.sink = this._event.wakeword.subscribe(() => {
|
|
616
|
+
this.playUp();
|
|
617
|
+
});
|
|
618
|
+
// If default, on silence, play down
|
|
619
|
+
this._subs.sink = this._event.silence.subscribe((ev) => {
|
|
620
|
+
if (!ev.interimResponse)
|
|
621
|
+
this.playDown();
|
|
622
|
+
});
|
|
623
|
+
}
|
|
624
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: SpeakerService, deps: [], target: i0.ɵɵFactoryTarget.Injectable }); }
|
|
625
|
+
static { this.ɵprov = i0.ɵɵngDeclareInjectable({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: SpeakerService }); }
|
|
626
|
+
}
|
|
627
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: SpeakerService, decorators: [{
|
|
628
|
+
type: Injectable
|
|
629
|
+
}], ctorParameters: () => [] });
|
|
630
|
+
|
|
631
|
+
class SpeechRecognitionService {
|
|
632
|
+
constructor() {
|
|
633
|
+
/**
|
|
634
|
+
* Dependencies
|
|
635
|
+
*/
|
|
636
|
+
this._event = inject(EventService);
|
|
637
|
+
this._platform = inject(PlatformService);
|
|
638
|
+
/**
|
|
639
|
+
* Transcript
|
|
640
|
+
*/
|
|
641
|
+
this._transcript = '';
|
|
642
|
+
}
|
|
643
|
+
/**
|
|
644
|
+
* Get transcript
|
|
645
|
+
*/
|
|
646
|
+
get transcript() {
|
|
647
|
+
return this._transcript;
|
|
648
|
+
}
|
|
649
|
+
ngOnDestroy() {
|
|
650
|
+
this.reset(); // clear transcript
|
|
651
|
+
if (this._platform.isBrowser)
|
|
652
|
+
this._recognition.stop();
|
|
653
|
+
}
|
|
654
|
+
/**
|
|
655
|
+
* Clear transcript
|
|
656
|
+
*/
|
|
657
|
+
reset() {
|
|
658
|
+
this._transcript = '';
|
|
659
|
+
}
|
|
660
|
+
init() {
|
|
661
|
+
this._recognitionClass = window.SpeechRecognition || window.webkitSpeechRecognition;
|
|
662
|
+
if (!SpeechRecognition) {
|
|
663
|
+
this._event.exception.next(new Error('Speech recognition is not supported'));
|
|
664
|
+
return;
|
|
665
|
+
}
|
|
666
|
+
this._recognition = new this._recognitionClass(); // initialize
|
|
667
|
+
// 2. Configuration
|
|
668
|
+
this._recognition.lang = 'en-US'; // Set language
|
|
669
|
+
this._recognition.continuous = true; // Keep listening even if the user pauses
|
|
670
|
+
this._recognition.interimResults = true; // Show results while the user is still speaking
|
|
671
|
+
// 3. Handle Results
|
|
672
|
+
this._recognition.onresult = (event) => {
|
|
673
|
+
this._transcript = '';
|
|
674
|
+
for (let i = event.resultIndex; i < event.results.length; i++)
|
|
675
|
+
this._transcript += event.results[i][0].transcript;
|
|
676
|
+
};
|
|
677
|
+
this._recognition.start(); // start
|
|
678
|
+
}
|
|
679
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: SpeechRecognitionService, deps: [], target: i0.ɵɵFactoryTarget.Injectable }); }
|
|
680
|
+
static { this.ɵprov = i0.ɵɵngDeclareInjectable({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: SpeechRecognitionService }); }
|
|
681
|
+
}
|
|
682
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: SpeechRecognitionService, decorators: [{
|
|
683
|
+
type: Injectable
|
|
684
|
+
}] });
|
|
685
|
+
|
|
686
|
+
class VadService {
|
|
687
|
+
constructor() {
|
|
688
|
+
/**
|
|
689
|
+
* Dependencies
|
|
690
|
+
*/
|
|
691
|
+
this._event = inject(EventService);
|
|
692
|
+
this._model = inject(ModelService);
|
|
693
|
+
/**
|
|
694
|
+
* VAD Shape
|
|
695
|
+
*/
|
|
696
|
+
this._shape = [];
|
|
697
|
+
/**
|
|
698
|
+
* VAD LSTM hidden & cell state
|
|
699
|
+
*/
|
|
700
|
+
this._state = null;
|
|
701
|
+
}
|
|
702
|
+
/**
|
|
703
|
+
* Get session
|
|
704
|
+
*/
|
|
705
|
+
get _session() {
|
|
706
|
+
return this._model.sileroVAD;
|
|
707
|
+
}
|
|
708
|
+
/**
|
|
709
|
+
* Initialize
|
|
710
|
+
*/
|
|
711
|
+
init() {
|
|
712
|
+
this._shape = this._getShape();
|
|
713
|
+
this._state = {
|
|
714
|
+
hidden: new Tensor('float32', new Float32Array(128).fill(0), this._shape),
|
|
715
|
+
cell: new Tensor('float32', new Float32Array(128).fill(0), this._shape),
|
|
716
|
+
};
|
|
717
|
+
}
|
|
718
|
+
/**
|
|
719
|
+
* Get VAD score
|
|
720
|
+
*
|
|
721
|
+
* @param chunk
|
|
722
|
+
* @returns
|
|
723
|
+
*/
|
|
724
|
+
async score(sample) {
|
|
725
|
+
if (!this._state)
|
|
726
|
+
throw new Error(`${VadService.name}: Undefined LSTM state.`);
|
|
727
|
+
try {
|
|
728
|
+
const tensor = new Tensor('float32', sample, [1, sample.length]);
|
|
729
|
+
const sampleRate = new Tensor('int64', [BigInt(SAMPLE_RATE)], []);
|
|
730
|
+
// run inference
|
|
731
|
+
const response = await this._session.run({
|
|
732
|
+
input: tensor,
|
|
733
|
+
sr: sampleRate,
|
|
734
|
+
h: this._state.hidden,
|
|
735
|
+
c: this._state.cell,
|
|
736
|
+
});
|
|
737
|
+
// Update memory
|
|
738
|
+
this._state.hidden = response['hn'];
|
|
739
|
+
this._state.cell = response['cn'];
|
|
740
|
+
return response['output'].data[0];
|
|
741
|
+
}
|
|
742
|
+
catch (error) {
|
|
743
|
+
this._event.exception.next(error);
|
|
744
|
+
return 0;
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
/**
|
|
748
|
+
* Get shape of vad session
|
|
749
|
+
*
|
|
750
|
+
* Ex: [2, 1, 64]
|
|
751
|
+
*/
|
|
752
|
+
_getShape() {
|
|
753
|
+
const { shape } = (this._session.inputMetadata.find((e) => e.name === 'h') ??
|
|
754
|
+
{});
|
|
755
|
+
const vadState = [];
|
|
756
|
+
if (shape) {
|
|
757
|
+
for (const sh of shape) {
|
|
758
|
+
if (typeof sh === 'string')
|
|
759
|
+
vadState.push(1);
|
|
760
|
+
else
|
|
761
|
+
vadState.push(sh);
|
|
762
|
+
}
|
|
763
|
+
return vadState;
|
|
764
|
+
}
|
|
765
|
+
throw new Error(`${VadService.name}: Unable to identify shape of the session.`);
|
|
766
|
+
}
|
|
767
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: VadService, deps: [], target: i0.ɵɵFactoryTarget.Injectable }); }
|
|
768
|
+
static { this.ɵprov = i0.ɵɵngDeclareInjectable({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: VadService }); }
|
|
769
|
+
}
|
|
770
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: VadService, decorators: [{
|
|
771
|
+
type: Injectable
|
|
772
|
+
}] });
|
|
773
|
+
|
|
774
|
+
/**
|
|
775
|
+
* DefaulT VAD threshold
|
|
776
|
+
*/
|
|
777
|
+
const DEFAULT_VAD_THRESHOLD = 0.5;
|
|
778
|
+
|
|
779
|
+
class AudioService {
|
|
780
|
+
constructor() {
|
|
781
|
+
/**
|
|
782
|
+
* Dependencies
|
|
783
|
+
*/
|
|
784
|
+
this.__speaker = inject(SpeakerService); // Initialize
|
|
785
|
+
this._config = inject(ConfigService);
|
|
786
|
+
this._event = inject(EventService);
|
|
787
|
+
this._mic = inject(MicrophoneService);
|
|
788
|
+
this._vad = inject(VadService);
|
|
789
|
+
this._pipeline = inject(PipelineService);
|
|
790
|
+
this._speechRecognition = inject(SpeechRecognitionService);
|
|
791
|
+
this._subs = new SubSink();
|
|
792
|
+
this._endCurrentRecording = false;
|
|
793
|
+
/**
|
|
794
|
+
* Recording state
|
|
795
|
+
*/
|
|
796
|
+
this._isRecording = false;
|
|
797
|
+
/**
|
|
798
|
+
* Is process is initialized (detected wakeword)
|
|
799
|
+
*/
|
|
800
|
+
this._isInitialized = false;
|
|
801
|
+
}
|
|
802
|
+
get isRecording() {
|
|
803
|
+
return this._isRecording;
|
|
804
|
+
}
|
|
805
|
+
ngOnDestroy() {
|
|
806
|
+
this._subs.unsubscribe();
|
|
807
|
+
}
|
|
808
|
+
/**
|
|
809
|
+
* Initialize audio
|
|
810
|
+
*/
|
|
811
|
+
async init() {
|
|
812
|
+
const data = await this._mic.data;
|
|
813
|
+
// Init VAD
|
|
814
|
+
this._vad.init();
|
|
815
|
+
this._speechRecognition.init();
|
|
816
|
+
// Fire ready event
|
|
817
|
+
this._event.ready.next();
|
|
818
|
+
data.subscribe(async (data) => {
|
|
819
|
+
// eslint-disable-next-line @typescript-eslint/no-this-alias
|
|
820
|
+
const that = this;
|
|
821
|
+
// Fire a speech event
|
|
822
|
+
this._event.speech.next({
|
|
823
|
+
...data,
|
|
824
|
+
vadScore: await this._vad.score(data.sample),
|
|
825
|
+
get hasVoiceActivity() {
|
|
826
|
+
return this.vadScore > (that._config.audio.vadThreshold ?? DEFAULT_VAD_THRESHOLD);
|
|
827
|
+
},
|
|
828
|
+
});
|
|
829
|
+
});
|
|
830
|
+
this._listenForWakeword();
|
|
831
|
+
this._captureCommandAfterWakeword();
|
|
832
|
+
}
|
|
833
|
+
/**
|
|
834
|
+
* Force start recording (without wakeword)
|
|
835
|
+
*/
|
|
836
|
+
forceStartRecording() {
|
|
837
|
+
this._isInitialized = true;
|
|
838
|
+
this._endCurrentRecording = false;
|
|
839
|
+
// this._speechRecognition.reset(); // reset transcript
|
|
840
|
+
this._event.wakeword.next({
|
|
841
|
+
inferenceScore: 0,
|
|
842
|
+
chunk: [],
|
|
843
|
+
vadScore: 0,
|
|
844
|
+
hasVoiceActivity: false,
|
|
845
|
+
sample: new Float32Array(),
|
|
846
|
+
rms: 0,
|
|
847
|
+
db: 0,
|
|
848
|
+
dbNormalized: 0,
|
|
849
|
+
});
|
|
850
|
+
}
|
|
851
|
+
/**
|
|
852
|
+
* Force end recording
|
|
853
|
+
*/
|
|
854
|
+
forceEndRecording() {
|
|
855
|
+
this._endCurrentRecording = true;
|
|
856
|
+
this._event.silence.next({
|
|
857
|
+
chunk: new Float32Array(),
|
|
858
|
+
transcript: '',
|
|
859
|
+
interimResponse: false, // final response
|
|
860
|
+
});
|
|
861
|
+
}
|
|
862
|
+
/**
|
|
863
|
+
* Toggle recording
|
|
864
|
+
*/
|
|
865
|
+
toggleRecording() {
|
|
866
|
+
if (this._isInitialized) {
|
|
867
|
+
this._isInitialized = false;
|
|
868
|
+
this.forceEndRecording();
|
|
869
|
+
}
|
|
870
|
+
else
|
|
871
|
+
this.forceStartRecording();
|
|
872
|
+
}
|
|
873
|
+
/**
|
|
874
|
+
* Identifies the wakeword and emits the event
|
|
875
|
+
*/
|
|
876
|
+
_listenForWakeword() {
|
|
877
|
+
const vad$ = this._getWakeWordStream();
|
|
878
|
+
this._subs.sink = this._event.speech
|
|
879
|
+
.pipe(withLatestFrom(vad$), concatMap(async ([speech, vadState]) => {
|
|
880
|
+
const score = await this._pipeline.run(speech);
|
|
881
|
+
return { speech, score, chunk: vadState.buffer };
|
|
882
|
+
}), filter(({ score }) => score > (this._config.onnx.wakewordInferenceThreshold ?? DEFAULT_INFERENCE_SCORE)))
|
|
883
|
+
.subscribe(({ speech, score, chunk }) => {
|
|
884
|
+
this._event.wakeword.next({ ...speech, inferenceScore: score, chunk });
|
|
885
|
+
});
|
|
886
|
+
}
|
|
887
|
+
/**
|
|
888
|
+
* New logic: Captures the full command audio after a wakeword
|
|
889
|
+
*/
|
|
890
|
+
_captureCommandAfterWakeword() {
|
|
891
|
+
const SILENCE_DURATION = this._config.audio.silenceDuration ?? DEFAULT_SILENCE_DURATION;
|
|
892
|
+
const VAD_THRESHOLD = this._config.audio.vadThreshold ?? DEFAULT_VAD_THRESHOLD;
|
|
893
|
+
// --- TRIGGER 1: Wakeword ---
|
|
894
|
+
const wakewordTrigger$ = this._event.wakeword.pipe(filter(() => !this._isRecording), // Ignore wakeword if already recording
|
|
895
|
+
map(() => []));
|
|
896
|
+
// --- TRIGGER 2: Continuous VAD > THRESHOLD for 1 second ---
|
|
897
|
+
const continuousVadTrigger$ = this._event.speech.pipe(map((s) => s.vadScore > VAD_THRESHOLD), filter(() => !this._isRecording && this._isInitialized), // Ignore and prevent background buffering if already recording
|
|
898
|
+
distinctUntilChanged(), switchMap((isVoiceActive) => {
|
|
899
|
+
if (!isVoiceActive)
|
|
900
|
+
return EMPTY; // Cancel if voice stops
|
|
901
|
+
this._speechRecognition.reset();
|
|
902
|
+
const bufferedChunks = [];
|
|
903
|
+
// 1. Accumulate audio chunks silently
|
|
904
|
+
const buffer$ = this._event.speech.pipe(tap((s) => bufferedChunks.push(s.sample)), ignoreElements());
|
|
905
|
+
// 2. Timer that emits the accumulated chunks after 1 second
|
|
906
|
+
const timer$ = timer(300).pipe(map(() => bufferedChunks));
|
|
907
|
+
// Merge both. If the timer fires, take(1) stops the buffer$ stream.
|
|
908
|
+
// If isVoiceActive turns false before 1s, switchMap cancels both.
|
|
909
|
+
return merge(buffer$, timer$).pipe(take(1));
|
|
910
|
+
}));
|
|
911
|
+
// --- COMBINE TRIGGERS ---
|
|
912
|
+
const startRecordingTrigger$ = merge(wakewordTrigger$.pipe(tap(() => {
|
|
913
|
+
if (!this._isInitialized)
|
|
914
|
+
this._isInitialized = true; // initialized
|
|
915
|
+
})), continuousVadTrigger$).pipe(throttleTime(1000));
|
|
916
|
+
// --- MAIN RECORDING PIPELINE ---
|
|
917
|
+
this._subs.sink = startRecordingTrigger$
|
|
918
|
+
.pipe(tap(() => {
|
|
919
|
+
this._isRecording = true;
|
|
920
|
+
this._speechRecognition.reset();
|
|
921
|
+
this._event.recording.next(); // recording event
|
|
922
|
+
}), switchMap((bufferedChunks) => {
|
|
923
|
+
// Initialize our command chunks with anything captured during the 1s VAD wait
|
|
924
|
+
const commandChunks = [...bufferedChunks];
|
|
925
|
+
const speech$ = this._event.speech.pipe(tap((speech) => commandChunks.push(speech.sample)), share());
|
|
926
|
+
const silence$ = speech$.pipe(map((s) => s.vadScore < VAD_THRESHOLD), distinctUntilChanged());
|
|
927
|
+
// 1. Normal silence timeout logic
|
|
928
|
+
const normalSilenceTimeout$ = silence$.pipe(delay(500), switchMap((isSilent) => {
|
|
929
|
+
if (!isSilent) {
|
|
930
|
+
return EMPTY; // if voice cancel the timer
|
|
931
|
+
}
|
|
932
|
+
// silence started, start timer
|
|
933
|
+
return timer(SILENCE_DURATION).pipe(takeUntil(silence$.pipe(filter((silent) => !silent))));
|
|
934
|
+
}));
|
|
935
|
+
// 2. Force complete logic checking the variable
|
|
936
|
+
const forceComplete$ = speech$.pipe(filter(() => this._endCurrentRecording));
|
|
937
|
+
// 3. Complete whenever the timer fires OR the flag is set to true
|
|
938
|
+
return merge(normalSilenceTimeout$, forceComplete$).pipe(take(1), map(() => this._flatten(commandChunks)));
|
|
939
|
+
}))
|
|
940
|
+
.subscribe({
|
|
941
|
+
next: (chunk) => {
|
|
942
|
+
const interimResponse = this._config.mode === 'DEFAULT' ? false : true;
|
|
943
|
+
this._event.silence.next({
|
|
944
|
+
chunk,
|
|
945
|
+
transcript: this._speechRecognition.transcript,
|
|
946
|
+
interimResponse,
|
|
947
|
+
}); // emit silence event
|
|
948
|
+
// Default case
|
|
949
|
+
if (this._config.mode === 'DEFAULT') {
|
|
950
|
+
this._isInitialized = false;
|
|
951
|
+
this._endCurrentRecording = false; // reset flag after recording ends
|
|
952
|
+
}
|
|
953
|
+
this._isRecording = false;
|
|
954
|
+
},
|
|
955
|
+
error: (err) => {
|
|
956
|
+
this._event.exception.next(err);
|
|
957
|
+
},
|
|
958
|
+
});
|
|
959
|
+
}
|
|
960
|
+
/**
|
|
961
|
+
* Helper to flatten array of buffers into a single Float32Array
|
|
962
|
+
*/
|
|
963
|
+
_flatten(chunks) {
|
|
964
|
+
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
|
965
|
+
const result = new Float32Array(totalLength);
|
|
966
|
+
let offset = 0;
|
|
967
|
+
for (const chunk of chunks) {
|
|
968
|
+
result.set(chunk, offset);
|
|
969
|
+
offset += chunk.length;
|
|
970
|
+
}
|
|
971
|
+
return result;
|
|
972
|
+
}
|
|
973
|
+
/**
|
|
974
|
+
* Wakeword stream
|
|
975
|
+
* @returns
|
|
976
|
+
*/
|
|
977
|
+
_getWakeWordStream() {
|
|
978
|
+
return this._event.speech.pipe(scan((state, speech) => {
|
|
979
|
+
const { hasVoiceActivity, sample } = speech;
|
|
980
|
+
let { isActive, hangoverCounter, buffer } = state;
|
|
981
|
+
if (hasVoiceActivity) {
|
|
982
|
+
// If we were idle, start a fresh buffer
|
|
983
|
+
if (!isActive) {
|
|
984
|
+
buffer = [];
|
|
985
|
+
isActive = true;
|
|
986
|
+
}
|
|
987
|
+
// Reset silence tracker to 0 because we hear a voice
|
|
988
|
+
hangoverCounter = 0;
|
|
989
|
+
}
|
|
990
|
+
else if (isActive) {
|
|
991
|
+
// Increment silence tracker
|
|
992
|
+
hangoverCounter++;
|
|
993
|
+
// If we've reached the limit of allowed silent frames
|
|
994
|
+
if (hangoverCounter >= VAD_HANGOVER_FRAMES) {
|
|
995
|
+
isActive = false;
|
|
996
|
+
}
|
|
997
|
+
}
|
|
998
|
+
// Add sample if we are in an active speech window
|
|
999
|
+
if (isActive) {
|
|
1000
|
+
buffer = [...buffer, sample];
|
|
1001
|
+
}
|
|
1002
|
+
return { buffer, hangoverCounter, isActive };
|
|
1003
|
+
}, { buffer: [], hangoverCounter: -1, isActive: false }),
|
|
1004
|
+
// Only share the state when it is actively collecting
|
|
1005
|
+
share());
|
|
1006
|
+
}
|
|
1007
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: AudioService, deps: [], target: i0.ɵɵFactoryTarget.Injectable }); }
|
|
1008
|
+
static { this.ɵprov = i0.ɵɵngDeclareInjectable({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: AudioService }); }
|
|
1009
|
+
}
|
|
1010
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: AudioService, decorators: [{
|
|
1011
|
+
type: Injectable
|
|
1012
|
+
}] });
|
|
1013
|
+
|
|
1014
|
+
class OrbComponent {
|
|
1015
|
+
constructor() {
|
|
1016
|
+
this._config = inject(ConfigService);
|
|
1017
|
+
this._platform = inject(PlatformService);
|
|
1018
|
+
this._audio = inject(AudioService);
|
|
1019
|
+
this._event = inject(EventService);
|
|
1020
|
+
this._subs = new SubSink();
|
|
1021
|
+
this.originalVertices = null;
|
|
1022
|
+
this.targetIntensity = 0;
|
|
1023
|
+
this.currentIntensity = 0;
|
|
1024
|
+
this.clock = new THREE.Timer();
|
|
1025
|
+
this.elapsedTime = 0;
|
|
1026
|
+
/**
|
|
1027
|
+
* Should orb be reacting to speech
|
|
1028
|
+
*/
|
|
1029
|
+
this._isActive = false;
|
|
1030
|
+
/**
|
|
1031
|
+
* Animate
|
|
1032
|
+
*/
|
|
1033
|
+
this._animate = () => {
|
|
1034
|
+
this.animationId = requestAnimationFrame(this._animate);
|
|
1035
|
+
// 1. Get the time passed since the last frame (delta)
|
|
1036
|
+
const delta = this.clock.getDelta();
|
|
1037
|
+
// 2. Smoothly update currentIntensity
|
|
1038
|
+
this.currentIntensity += (this.targetIntensity - this.currentIntensity) * 0.05;
|
|
1039
|
+
// 3. Increment our own elapsedTime ticker.
|
|
1040
|
+
// We multiply delta by intensity so the pulse speeds up when busy,
|
|
1041
|
+
// but it won't "run away" as performance.now() grows.
|
|
1042
|
+
const speedFactor = 1 + this.currentIntensity / 20;
|
|
1043
|
+
this.elapsedTime += delta * speedFactor;
|
|
1044
|
+
// 4. Rotation (Constant per frame, scaled by intensity)
|
|
1045
|
+
this.orb.rotation.y += 0.005 + this.currentIntensity / 5000;
|
|
1046
|
+
this.orb.rotation.z += 0.002;
|
|
1047
|
+
// 5. Vertex Displacement
|
|
1048
|
+
const positionAttribute = this.orb.geometry.getAttribute('position');
|
|
1049
|
+
for (let i = 0; i < positionAttribute.count; i++) {
|
|
1050
|
+
const ix = i * 3;
|
|
1051
|
+
const iy = i * 3 + 1;
|
|
1052
|
+
const iz = i * 3 + 2;
|
|
1053
|
+
const x = this.originalVertices[ix];
|
|
1054
|
+
const y = this.originalVertices[iy];
|
|
1055
|
+
const z = this.originalVertices[iz];
|
|
1056
|
+
// We use this.elapsedTime instead of performance.now()
|
|
1057
|
+
// This creates a stable frequency regardless of how long the app has been open
|
|
1058
|
+
const wave = Math.sin(x * 2 + this.elapsedTime) *
|
|
1059
|
+
Math.cos(y * 2 + this.elapsedTime) *
|
|
1060
|
+
(this.currentIntensity / 300);
|
|
1061
|
+
const currentRadius = Math.sqrt(x * x + y * y + z * z) + wave;
|
|
1062
|
+
const finalScale = Math.min(currentRadius, 2.5) / 1.5;
|
|
1063
|
+
positionAttribute.setXYZ(i, x * finalScale, y * finalScale, z * finalScale);
|
|
1064
|
+
}
|
|
1065
|
+
positionAttribute.needsUpdate = true;
|
|
1066
|
+
// Optional: Update material feedback based on intensity
|
|
1067
|
+
const material = this.orb.material;
|
|
1068
|
+
material.emissiveIntensity = 0.2 + this.currentIntensity / 100;
|
|
1069
|
+
material.opacity = 0.3 + this.currentIntensity / 200;
|
|
1070
|
+
this.renderer.render(this.scene, this.camera);
|
|
1071
|
+
};
|
|
1072
|
+
}
|
|
1073
|
+
get isRecording() {
|
|
1074
|
+
return this._audio.isRecording;
|
|
1075
|
+
}
|
|
1076
|
+
get orbSize() {
|
|
1077
|
+
return this._config.orb?.size ?? 400;
|
|
1078
|
+
}
|
|
1079
|
+
/**
|
|
1080
|
+
* Change color or orb
|
|
1081
|
+
*
|
|
1082
|
+
* @param color
|
|
1083
|
+
* @param emissive
|
|
1084
|
+
*/
|
|
1085
|
+
changeColor(color, emissive) {
|
|
1086
|
+
const material = this.orb.material;
|
|
1087
|
+
material.color = color;
|
|
1088
|
+
material.emissive = emissive;
|
|
1089
|
+
}
|
|
1090
|
+
/**
|
|
1091
|
+
* Toggle recording
|
|
1092
|
+
*/
|
|
1093
|
+
toggleRecording() {
|
|
1094
|
+
this._audio.toggleRecording();
|
|
1095
|
+
}
|
|
1096
|
+
ngOnInit() {
|
|
1097
|
+
if (this._platform.isServer)
|
|
1098
|
+
return;
|
|
1099
|
+
this._init();
|
|
1100
|
+
this._animate();
|
|
1101
|
+
this._loadSubscribers();
|
|
1102
|
+
}
|
|
1103
|
+
ngOnChanges(changes) {
|
|
1104
|
+
if (this._platform.isServer)
|
|
1105
|
+
return;
|
|
1106
|
+
if (changes['intensity']) {
|
|
1107
|
+
this.targetIntensity = changes['intensity'].currentValue;
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
ngOnDestroy() {
|
|
1111
|
+
if (this._platform.isServer)
|
|
1112
|
+
return;
|
|
1113
|
+
cancelAnimationFrame(this.animationId);
|
|
1114
|
+
this.renderer.dispose();
|
|
1115
|
+
this.orb.geometry.dispose();
|
|
1116
|
+
this.orb.material.dispose();
|
|
1117
|
+
this._subs.unsubscribe();
|
|
1118
|
+
}
|
|
1119
|
+
handleSpacebarPress(event) {
|
|
1120
|
+
event.preventDefault(); // Prevents the default space bar action (e.g., scrolling)
|
|
1121
|
+
this.toggleRecording();
|
|
1122
|
+
}
|
|
1123
|
+
/**
|
|
1124
|
+
* Initialize
|
|
1125
|
+
*/
|
|
1126
|
+
_init() {
|
|
1127
|
+
this.scene = new THREE.Scene();
|
|
1128
|
+
this.camera = new THREE.PerspectiveCamera(45, 1, 0.1, 1000);
|
|
1129
|
+
this.camera.position.z = 3;
|
|
1130
|
+
this.renderer = new THREE.WebGLRenderer({ antialias: true, alpha: true });
|
|
1131
|
+
this.renderer.setPixelRatio(window.devicePixelRatio);
|
|
1132
|
+
this.rendererContainer.nativeElement.appendChild(this.renderer.domElement);
|
|
1133
|
+
// Initial resize to fit container
|
|
1134
|
+
this._resize();
|
|
1135
|
+
// Orb Geometry (Icosahedron for organic detail)
|
|
1136
|
+
const geometry = new THREE.IcosahedronGeometry(1.2, 32);
|
|
1137
|
+
this.originalVertices = geometry.attributes['position'].array.slice();
|
|
1138
|
+
const material = new THREE.MeshStandardMaterial({
|
|
1139
|
+
color: 'red',
|
|
1140
|
+
wireframe: true,
|
|
1141
|
+
transparent: true,
|
|
1142
|
+
opacity: 0.6,
|
|
1143
|
+
emissive: 'red',
|
|
1144
|
+
emissiveIntensity: 0.5,
|
|
1145
|
+
});
|
|
1146
|
+
this.orb = new THREE.Mesh(geometry, material);
|
|
1147
|
+
this.scene.add(this.orb);
|
|
1148
|
+
const light = new THREE.PointLight(0xffffff, 15, 10);
|
|
1149
|
+
light.position.set(2, 2, 2);
|
|
1150
|
+
this.scene.add(light);
|
|
1151
|
+
this.scene.add(new THREE.AmbientLight(0x404040));
|
|
1152
|
+
}
|
|
1153
|
+
/**
|
|
1154
|
+
* Resize container
|
|
1155
|
+
*/
|
|
1156
|
+
_resize() {
|
|
1157
|
+
const width = this.orbSize ?? this.rendererContainer.nativeElement.clientWidth;
|
|
1158
|
+
const height = this.orbSize ?? this.rendererContainer.nativeElement.clientHeight;
|
|
1159
|
+
this.renderer.setSize(width, height);
|
|
1160
|
+
this.camera.aspect = width / height;
|
|
1161
|
+
this.camera.updateProjectionMatrix();
|
|
1162
|
+
}
|
|
1163
|
+
/**
|
|
1164
|
+
* Subscriptions
|
|
1165
|
+
*/
|
|
1166
|
+
_loadSubscribers() {
|
|
1167
|
+
// Speech event
|
|
1168
|
+
this._subs.sink = this._event.speech.subscribe((e) => {
|
|
1169
|
+
if (this._isActive)
|
|
1170
|
+
this.targetIntensity = e.dbNormalized * 100;
|
|
1171
|
+
else
|
|
1172
|
+
this.targetIntensity = 0;
|
|
1173
|
+
});
|
|
1174
|
+
// Wake word event
|
|
1175
|
+
this._subs.sink = this._event.wakeword.subscribe(() => {
|
|
1176
|
+
this.changeColor(new THREE.Color(0x00d2ff), new THREE.Color(0x0066ff));
|
|
1177
|
+
this._isActive = true;
|
|
1178
|
+
});
|
|
1179
|
+
// Silence
|
|
1180
|
+
this._subs.sink = this._event.silence.subscribe((ev) => {
|
|
1181
|
+
if (!ev.interimResponse) {
|
|
1182
|
+
// on silence, set color to red
|
|
1183
|
+
this.changeColor(new THREE.Color('red'), new THREE.Color('red'));
|
|
1184
|
+
this._isActive = false;
|
|
1185
|
+
}
|
|
1186
|
+
});
|
|
1187
|
+
}
|
|
1188
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: OrbComponent, deps: [], target: i0.ɵɵFactoryTarget.Component }); }
|
|
1189
|
+
static { this.ɵcmp = i0.ɵɵngDeclareComponent({ minVersion: "14.0.0", version: "20.3.16", type: OrbComponent, isStandalone: true, selector: "app-orb-component", host: { listeners: { "window:keydown.Space": "handleSpacebarPress($event)" } }, viewQueries: [{ propertyName: "rendererContainer", first: true, predicate: ["rendererContainer"], descendants: true, static: true }], usesOnChanges: true, ngImport: i0, template: `<div
|
|
1190
|
+
#rendererContainer
|
|
1191
|
+
tabindex="0"
|
|
1192
|
+
role="button"
|
|
1193
|
+
class="orb-viewport"
|
|
1194
|
+
[style.height]="orbSize"
|
|
1195
|
+
[style.width]="orbSize"
|
|
1196
|
+
(click)="toggleRecording()"
|
|
1197
|
+
(keypress)="toggleRecording()"
|
|
1198
|
+
></div>`, isInline: true, styles: [".orb-viewport{background:transparent;cursor:pointer}\n"] }); }
|
|
1199
|
+
}
|
|
1200
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: OrbComponent, decorators: [{
|
|
1201
|
+
type: Component,
|
|
1202
|
+
args: [{ selector: 'app-orb-component', standalone: true, template: `<div
|
|
1203
|
+
#rendererContainer
|
|
1204
|
+
tabindex="0"
|
|
1205
|
+
role="button"
|
|
1206
|
+
class="orb-viewport"
|
|
1207
|
+
[style.height]="orbSize"
|
|
1208
|
+
[style.width]="orbSize"
|
|
1209
|
+
(click)="toggleRecording()"
|
|
1210
|
+
(keypress)="toggleRecording()"
|
|
1211
|
+
></div>`, styles: [".orb-viewport{background:transparent;cursor:pointer}\n"] }]
|
|
1212
|
+
}], propDecorators: { rendererContainer: [{
|
|
1213
|
+
type: ViewChild,
|
|
1214
|
+
args: ['rendererContainer', { static: true }]
|
|
1215
|
+
}], handleSpacebarPress: [{
|
|
1216
|
+
type: HostListener,
|
|
1217
|
+
args: ['window:keydown.Space', ['$event']]
|
|
1218
|
+
}] } });
|
|
1219
|
+
|
|
1220
|
+
const DEFAULT_THROTTLE_TIME = 1000;
|
|
1221
|
+
class WakeyWakeyComponent {
|
|
1222
|
+
constructor() {
|
|
1223
|
+
/**
|
|
1224
|
+
* Fires when library loaded
|
|
1225
|
+
*/
|
|
1226
|
+
this.ready = new EventEmitter();
|
|
1227
|
+
/**
|
|
1228
|
+
* Fires when there is an error
|
|
1229
|
+
*/
|
|
1230
|
+
this.exception = new EventEmitter();
|
|
1231
|
+
/**
|
|
1232
|
+
* Fires when speech is detected
|
|
1233
|
+
*/
|
|
1234
|
+
this.speech = new EventEmitter();
|
|
1235
|
+
/**
|
|
1236
|
+
* Fires when wake word is detected
|
|
1237
|
+
*/
|
|
1238
|
+
this.wakeword = new EventEmitter();
|
|
1239
|
+
/**
|
|
1240
|
+
* Fires when recording starts (after wake word detection)
|
|
1241
|
+
*/
|
|
1242
|
+
this.recording = new EventEmitter();
|
|
1243
|
+
/**
|
|
1244
|
+
* Fires silence is detected
|
|
1245
|
+
*/
|
|
1246
|
+
this.silence = new EventEmitter();
|
|
1247
|
+
/**
|
|
1248
|
+
* Dependencies
|
|
1249
|
+
*/
|
|
1250
|
+
this._config = inject(ConfigService);
|
|
1251
|
+
/**
|
|
1252
|
+
* Dependencies
|
|
1253
|
+
*/
|
|
1254
|
+
this._platform = inject(PlatformService);
|
|
1255
|
+
this._event = inject(EventService);
|
|
1256
|
+
this._audio = inject(AudioService);
|
|
1257
|
+
this._model = inject(ModelService);
|
|
1258
|
+
/**
|
|
1259
|
+
* Subscriptions
|
|
1260
|
+
*/
|
|
1261
|
+
this._subs = new SubSink();
|
|
1262
|
+
}
|
|
1263
|
+
ngOnInit() {
|
|
1264
|
+
// Execute pipeline
|
|
1265
|
+
this._execute();
|
|
1266
|
+
}
|
|
1267
|
+
/**
|
|
1268
|
+
* Fire face wakeword event
|
|
1269
|
+
*/
|
|
1270
|
+
fireWakeWord() {
|
|
1271
|
+
this._event.wakeword.next({
|
|
1272
|
+
inferenceScore: 1,
|
|
1273
|
+
chunk: [],
|
|
1274
|
+
vadScore: 1,
|
|
1275
|
+
hasVoiceActivity: false,
|
|
1276
|
+
sample: new Float32Array(),
|
|
1277
|
+
rms: 0,
|
|
1278
|
+
db: 0,
|
|
1279
|
+
dbNormalized: 0,
|
|
1280
|
+
});
|
|
1281
|
+
}
|
|
1282
|
+
ngOnDestroy() {
|
|
1283
|
+
this._subs.unsubscribe();
|
|
1284
|
+
}
|
|
1285
|
+
/**
|
|
1286
|
+
* Execute
|
|
1287
|
+
*/
|
|
1288
|
+
async _execute() {
|
|
1289
|
+
// Check if browser
|
|
1290
|
+
if (this._platform.isServer)
|
|
1291
|
+
return;
|
|
1292
|
+
// Listen events
|
|
1293
|
+
this._listenEvents();
|
|
1294
|
+
// Init audio
|
|
1295
|
+
await this._audio.init();
|
|
1296
|
+
}
|
|
1297
|
+
/**
|
|
1298
|
+
* Listen events
|
|
1299
|
+
*/
|
|
1300
|
+
_listenEvents() {
|
|
1301
|
+
// Ready event
|
|
1302
|
+
this._subs.sink = this._event.ready.subscribe((e) => {
|
|
1303
|
+
this.ready.emit(e);
|
|
1304
|
+
});
|
|
1305
|
+
// Exception event
|
|
1306
|
+
this._subs.sink = this._event.exception.subscribe((e) => {
|
|
1307
|
+
this.exception.emit(e);
|
|
1308
|
+
});
|
|
1309
|
+
// Speech event
|
|
1310
|
+
this._subs.sink = this._event.speech.subscribe((e) => {
|
|
1311
|
+
this.speech.emit(e);
|
|
1312
|
+
});
|
|
1313
|
+
// Wake word event
|
|
1314
|
+
this._subs.sink = this._event.wakeword
|
|
1315
|
+
.pipe(throttleTime(this._config.throttleTime ?? DEFAULT_THROTTLE_TIME))
|
|
1316
|
+
.subscribe((e) => {
|
|
1317
|
+
this.wakeword.emit(e);
|
|
1318
|
+
});
|
|
1319
|
+
// Recording event
|
|
1320
|
+
this._subs.sink = this._event.recording.subscribe((e) => {
|
|
1321
|
+
this.recording.emit(e);
|
|
1322
|
+
});
|
|
1323
|
+
// Silence event
|
|
1324
|
+
this._subs.sink = this._event.silence.subscribe((e) => {
|
|
1325
|
+
this.silence.emit(e);
|
|
1326
|
+
});
|
|
1327
|
+
}
|
|
1328
|
+
static { this.ɵfac = i0.ɵɵngDeclareFactory({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: WakeyWakeyComponent, deps: [], target: i0.ɵɵFactoryTarget.Component }); }
|
|
1329
|
+
static { this.ɵcmp = i0.ɵɵngDeclareComponent({ minVersion: "14.0.0", version: "20.3.16", type: WakeyWakeyComponent, isStandalone: true, selector: "wakeywakey", outputs: { ready: "ready", exception: "exception", speech: "speech", wakeword: "wakeword", recording: "recording", silence: "silence" }, providers: [], ngImport: i0, template: '<app-orb-component (orbClick)="fireWakeWord()" />', isInline: true, dependencies: [{ kind: "component", type: OrbComponent, selector: "app-orb-component" }] }); }
|
|
1330
|
+
}
|
|
1331
|
+
i0.ɵɵngDeclareClassMetadata({ minVersion: "12.0.0", version: "20.3.16", ngImport: i0, type: WakeyWakeyComponent, decorators: [{
|
|
1332
|
+
type: Component,
|
|
1333
|
+
args: [{
|
|
1334
|
+
selector: 'wakeywakey',
|
|
1335
|
+
imports: [OrbComponent],
|
|
1336
|
+
template: '<app-orb-component (orbClick)="fireWakeWord()" />',
|
|
1337
|
+
providers: [],
|
|
1338
|
+
}]
|
|
1339
|
+
}], propDecorators: { ready: [{
|
|
1340
|
+
type: Output
|
|
1341
|
+
}], exception: [{
|
|
1342
|
+
type: Output
|
|
1343
|
+
}], speech: [{
|
|
1344
|
+
type: Output
|
|
1345
|
+
}], wakeword: [{
|
|
1346
|
+
type: Output
|
|
1347
|
+
}], recording: [{
|
|
1348
|
+
type: Output
|
|
1349
|
+
}], silence: [{
|
|
1350
|
+
type: Output
|
|
1351
|
+
}] } });
|
|
1352
|
+
|
|
1353
|
+
/**
|
|
1354
|
+
* Provide wakey wakey configuration
|
|
1355
|
+
*
|
|
1356
|
+
* @param config Wakey Wakey configuration
|
|
1357
|
+
* @returns
|
|
1358
|
+
*/
|
|
1359
|
+
function provideWakeyWakey(config) {
|
|
1360
|
+
return [
|
|
1361
|
+
{
|
|
1362
|
+
provide: CONFIG,
|
|
1363
|
+
useValue: config,
|
|
1364
|
+
},
|
|
1365
|
+
ConfigService,
|
|
1366
|
+
MicrophoneService,
|
|
1367
|
+
SpeakerService,
|
|
1368
|
+
VadService,
|
|
1369
|
+
AudioService,
|
|
1370
|
+
PlatformService,
|
|
1371
|
+
EventService,
|
|
1372
|
+
PipelineService,
|
|
1373
|
+
SpeechRecognitionService,
|
|
1374
|
+
ModelService,
|
|
1375
|
+
provideAppInitializer(async () => {
|
|
1376
|
+
const _config = inject(ConfigService);
|
|
1377
|
+
const _model = inject(ModelService);
|
|
1378
|
+
const _platform = inject(PlatformService);
|
|
1379
|
+
if (_platform.isServer)
|
|
1380
|
+
return;
|
|
1381
|
+
// Set wasm path
|
|
1382
|
+
env.wasm.wasmPaths = _config.onnx.runtimePath ?? `${_config.basePath}/wasm/`;
|
|
1383
|
+
const modelPath = [
|
|
1384
|
+
_config.onnx.model.melspectrogram ?? `${_config.basePath}/models/melspectrogram.onnx`,
|
|
1385
|
+
_config.onnx.model.embedding_model ?? `${_config.basePath}/models/embedding_model.onnx`,
|
|
1386
|
+
_config.onnx.model.silero_vad ?? `${_config.basePath}/models/silero_vad_v4.onnx`,
|
|
1387
|
+
_config.onnx.model.wakeword,
|
|
1388
|
+
];
|
|
1389
|
+
// Create sessions
|
|
1390
|
+
const sessions = await Promise.all(Object.values(modelPath).map((path) => InferenceSession.create(path, { executionProviders: ['wasm'] })));
|
|
1391
|
+
// set sessions
|
|
1392
|
+
_model.session = {
|
|
1393
|
+
melspectrogram: sessions[0],
|
|
1394
|
+
embedding_model: sessions[1],
|
|
1395
|
+
silero_vad: sessions[2],
|
|
1396
|
+
wakeword: sessions[3],
|
|
1397
|
+
};
|
|
1398
|
+
}),
|
|
1399
|
+
];
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1402
|
+
/*
|
|
1403
|
+
* Public API Surface of wakeywakey
|
|
1404
|
+
*/
|
|
1405
|
+
|
|
1406
|
+
/**
|
|
1407
|
+
* Generated bundle index. Do not edit.
|
|
1408
|
+
*/
|
|
1409
|
+
|
|
1410
|
+
export { CONFIG as WAKEYWAKEY_CONFIG, AudioUtil as WakeyWakeyAudioUtil, WakeyWakeyComponent, provideWakeyWakey };
|
|
1411
|
+
//# sourceMappingURL=prabhjeet.me-wakeywakey.mjs.map
|