openwakeword-js 0.1.21 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -0
- package/dist/index.js +250 -250
- package/dist/worker.d.ts +2 -0
- package/dist/worker.js +2848 -0
- package/index.html +100 -79
- package/models/hello_deepa_old.onnx +0 -0
- package/models/test.html +468 -0
- package/openwakeword.mjs +250 -250
- package/package.json +7 -1
- package/scripts/download_models.js +2 -1
- package/src/index.ts +50 -26
- package/src/worker.ts +28 -0
- package/worker.mjs +2848 -0
package/index.html
CHANGED
|
@@ -5,13 +5,6 @@
|
|
|
5
5
|
<meta charset="utf-8" />
|
|
6
6
|
<meta content="width=device-width, initial-scale=1.0" name="viewport" />
|
|
7
7
|
<title>AI Wake Word Detector | OpenWakeWord JS</title>
|
|
8
|
-
<script type="importmap">
|
|
9
|
-
{
|
|
10
|
-
"imports": {
|
|
11
|
-
"onnxruntime-web": "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.20.1/dist/ort.mjs"
|
|
12
|
-
}
|
|
13
|
-
}
|
|
14
|
-
</script>
|
|
15
8
|
|
|
16
9
|
<!-- Fonts -->
|
|
17
10
|
<link href="https://fonts.googleapis.com" rel="preconnect" />
|
|
@@ -236,14 +229,14 @@
|
|
|
236
229
|
<label class="text-[11px] font-bold text-slate-500 uppercase tracking-widest">Confidence
|
|
237
230
|
Threshold</label>
|
|
238
231
|
<span id="thresholdVal"
|
|
239
|
-
class="text-xs font-mono text-primary bg-primary/10 px-2 py-0.5 rounded border border-primary/20">0.
|
|
232
|
+
class="text-xs font-mono text-primary bg-primary/10 px-2 py-0.5 rounded border border-primary/20">0.09</span>
|
|
240
233
|
</div>
|
|
241
234
|
<div class="slider-container relative h-6">
|
|
242
235
|
<input id="thresholdSlider" class="w-full relative z-2" max="0.99" min="0.01" step="0.01"
|
|
243
|
-
type="range" value="0.
|
|
236
|
+
type="range" value="0.09" />
|
|
244
237
|
<div id="sliderFill"
|
|
245
238
|
class="absolute left-0 top-1/2 -translate-y-1/2 h-[4px] bg-primary rounded-l-full"
|
|
246
|
-
style="width:
|
|
239
|
+
style="width: 9%;"></div>
|
|
247
240
|
</div>
|
|
248
241
|
</div>
|
|
249
242
|
|
|
@@ -294,7 +287,7 @@
|
|
|
294
287
|
class="h-44 overflow-y-auto rounded-xl bg-[#030406] p-4 font-mono text-[10px] text-slate-600 border border-white/5 custom-scrollbar leading-relaxed">
|
|
295
288
|
<div
|
|
296
289
|
class="opacity-40 animate-pulse font-bold text-blue-900 border border-blue-900/40 p-1 inline-block rounded mb-2">
|
|
297
|
-
OPENWAKEWORD-JS V0.1.
|
|
290
|
+
OPENWAKEWORD-JS V0.1.26 READY</div>
|
|
298
291
|
</div>
|
|
299
292
|
</div>
|
|
300
293
|
</details>
|
|
@@ -309,13 +302,16 @@
|
|
|
309
302
|
|
|
310
303
|
const state = {
|
|
311
304
|
isListening: false,
|
|
312
|
-
threshold: 0.
|
|
313
|
-
|
|
305
|
+
threshold: 0.09,
|
|
306
|
+
worker: null,
|
|
314
307
|
audioContext: null,
|
|
315
308
|
processor: null,
|
|
316
309
|
startTime: Date.now(),
|
|
317
310
|
lastActivationTime: 0,
|
|
318
|
-
cooldownSeconds:
|
|
311
|
+
cooldownSeconds: 0.6,
|
|
312
|
+
isWorkerReady: false,
|
|
313
|
+
audioQueue: new Float32Array(0),
|
|
314
|
+
isProcessing: false
|
|
319
315
|
};
|
|
320
316
|
|
|
321
317
|
|
|
@@ -352,25 +348,38 @@
|
|
|
352
348
|
}
|
|
353
349
|
|
|
354
350
|
async function init() {
|
|
355
|
-
if (state.
|
|
356
|
-
log('Neural
|
|
357
|
-
|
|
358
|
-
state.
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
351
|
+
if (state.worker) return true;
|
|
352
|
+
log('Initializing Neural Worker...', 'SYSTEM');
|
|
353
|
+
return new Promise((resolve) => {
|
|
354
|
+
state.worker = new Worker('./worker.mjs', { type: 'module' });
|
|
355
|
+
|
|
356
|
+
state.worker.onmessage = (e) => {
|
|
357
|
+
const { type, results, message } = e.data;
|
|
358
|
+
if (type === 'init-complete') {
|
|
359
|
+
state.isWorkerReady = true;
|
|
360
|
+
log('Neural intelligence hub connected (Off-thread).', 'SYSTEM');
|
|
361
|
+
resolve(true);
|
|
362
|
+
} else if (type === 'results') {
|
|
363
|
+
handleInferenceResults(results);
|
|
364
|
+
} else if (type === 'error') {
|
|
365
|
+
log(`Neural Fault: ${message}`, 'ERROR');
|
|
366
|
+
resolve(false);
|
|
367
|
+
}
|
|
368
|
+
};
|
|
369
|
+
|
|
370
|
+
state.worker.postMessage({
|
|
371
|
+
type: 'init',
|
|
372
|
+
data: {
|
|
373
|
+
options: {
|
|
374
|
+
wakewordModels: ['./models/hello_deepa.onnx', './models/namaste_deepa.onnx'],
|
|
375
|
+
melspectrogramModelPath: './models/melspectrogram.onnx',
|
|
376
|
+
embeddingModelPath: './models/embedding_model.onnx',
|
|
377
|
+
inferenceFramework: 'onnx',
|
|
378
|
+
wasmPaths: './models/'
|
|
379
|
+
}
|
|
380
|
+
}
|
|
366
381
|
});
|
|
367
|
-
|
|
368
|
-
log('Neural intelligence hub connected.', 'SYSTEM');
|
|
369
|
-
return true;
|
|
370
|
-
} catch (e) {
|
|
371
|
-
log(`Session fault: ${e.message}`, 'ERROR');
|
|
372
|
-
return false;
|
|
373
|
-
}
|
|
382
|
+
});
|
|
374
383
|
}
|
|
375
384
|
|
|
376
385
|
function pushDetection(name, score) {
|
|
@@ -414,6 +423,45 @@
|
|
|
414
423
|
}
|
|
415
424
|
|
|
416
425
|
|
|
426
|
+
// GLOBAL HANDLERS FOR WEB WORKER
|
|
427
|
+
function handleInferenceResults(results) {
|
|
428
|
+
const currentTime = Date.now();
|
|
429
|
+
|
|
430
|
+
// Ignore all triggers if we are in overall cooldown
|
|
431
|
+
if (currentTime - state.lastActivationTime < state.cooldownSeconds * 1000) {
|
|
432
|
+
return;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
for (const [n, s] of Object.entries(results)) {
|
|
436
|
+
if (s > state.threshold) {
|
|
437
|
+
const clean = n.split('/').pop().replace('.onnx', '');
|
|
438
|
+
|
|
439
|
+
// Detection Triggered
|
|
440
|
+
state.lastActivationTime = currentTime;
|
|
441
|
+
pushDetection(clean, s);
|
|
442
|
+
log(`Match Found: ${clean} (${s.toFixed(2)})`, 'MATCH');
|
|
443
|
+
|
|
444
|
+
// RESYNC: Clear the local audio queue to "snap" back to the present
|
|
445
|
+
state.audioQueue = new Float32Array(0);
|
|
446
|
+
break;
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
async function processQueue() {
|
|
452
|
+
if (state.isProcessing || !state.isWorkerReady) return;
|
|
453
|
+
state.isProcessing = true;
|
|
454
|
+
|
|
455
|
+
const CHUNK_SIZE = 1280;
|
|
456
|
+
while (state.audioQueue.length >= CHUNK_SIZE) {
|
|
457
|
+
const chunk = state.audioQueue.slice(0, CHUNK_SIZE);
|
|
458
|
+
state.audioQueue = state.audioQueue.slice(CHUNK_SIZE);
|
|
459
|
+
state.worker.postMessage({ type: 'predict', data: { audio: chunk } });
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
state.isProcessing = false;
|
|
463
|
+
}
|
|
464
|
+
|
|
417
465
|
async function toggle() {
|
|
418
466
|
if (state.isListening) {
|
|
419
467
|
state.isListening = false;
|
|
@@ -432,61 +480,34 @@
|
|
|
432
480
|
if (!ok) return;
|
|
433
481
|
|
|
434
482
|
try {
|
|
435
|
-
const stream = await navigator.mediaDevices.getUserMedia({
|
|
483
|
+
const stream = await navigator.mediaDevices.getUserMedia({
|
|
484
|
+
audio: {
|
|
485
|
+
echoCancellation: false,
|
|
486
|
+
noiseSuppression: false,
|
|
487
|
+
autoGainControl: false,
|
|
488
|
+
channelCount: 1,
|
|
489
|
+
sampleRate: 16000
|
|
490
|
+
}
|
|
491
|
+
});
|
|
436
492
|
state.audioContext = new AudioContext({ sampleRate: 16000 });
|
|
437
493
|
const source = state.audioContext.createMediaStreamSource(stream);
|
|
438
494
|
|
|
439
|
-
//
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
async function processQueue() {
|
|
444
|
-
if (isProcessing || audioQueue.length === 0) return;
|
|
445
|
-
isProcessing = true;
|
|
446
|
-
|
|
447
|
-
while (audioQueue.length > 0) {
|
|
448
|
-
const data = audioQueue.shift();
|
|
449
|
-
|
|
450
|
-
// Python-style Cooldown Check
|
|
451
|
-
const currentTime = Date.now();
|
|
452
|
-
if (currentTime - state.lastActivationTime < state.cooldownSeconds * 1000) {
|
|
453
|
-
continue;
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
try {
|
|
457
|
-
const results = await state.model.predict(data);
|
|
458
|
-
for (const [n, s] of Object.entries(results)) {
|
|
459
|
-
if (s > state.threshold) {
|
|
460
|
-
const clean = n.split('/').pop().replace('.onnx', '');
|
|
461
|
-
|
|
462
|
-
// Detection Triggered
|
|
463
|
-
state.lastActivationTime = Date.now();
|
|
464
|
-
pushDetection(clean, s);
|
|
465
|
-
log(`Match Found: ${clean} (${s.toFixed(2)})`, 'MATCH');
|
|
466
|
-
|
|
467
|
-
// Python-style Reset & Clear Queue
|
|
468
|
-
if (state.model.reset) state.model.reset();
|
|
469
|
-
audioQueue.length = 0; // Clear the queue
|
|
470
|
-
break; // Only trigger one wake word at a time
|
|
471
|
-
}
|
|
472
|
-
}
|
|
473
|
-
} catch (err) {
|
|
474
|
-
log(`Inference Error: ${err.message}`, 'ERROR');
|
|
475
|
-
}
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
isProcessing = false;
|
|
479
|
-
}
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
state.processor = state.audioContext.createScriptProcessor(2048, 1, 1);
|
|
495
|
+
// Increased buffer size to 4096 (approx 256ms) to give the main thread more breathing room
|
|
496
|
+
// and match closer to Python's batching behavior without sacrificing real-time feel.
|
|
497
|
+
state.processor = state.audioContext.createScriptProcessor(4096, 1, 1);
|
|
483
498
|
source.connect(state.processor);
|
|
484
499
|
state.processor.connect(state.audioContext.destination);
|
|
485
500
|
|
|
486
501
|
state.processor.onaudioprocess = (e) => {
|
|
487
502
|
if (!state.isListening) return;
|
|
488
|
-
const
|
|
489
|
-
|
|
503
|
+
const newData = new Float32Array(e.inputBuffer.getChannelData(0));
|
|
504
|
+
|
|
505
|
+
// Append new data to queue
|
|
506
|
+
const combinedInfo = new Float32Array(state.audioQueue.length + newData.length);
|
|
507
|
+
combinedInfo.set(state.audioQueue);
|
|
508
|
+
combinedInfo.set(newData, state.audioQueue.length);
|
|
509
|
+
state.audioQueue = combinedInfo;
|
|
510
|
+
|
|
490
511
|
processQueue();
|
|
491
512
|
};
|
|
492
513
|
|
|
Binary file
|