openwakeword-js 0.1.21 → 0.1.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -0
- package/dist/index.js +250 -250
- package/dist/worker.d.ts +2 -0
- package/dist/worker.js +2848 -0
- package/index.html +100 -79
- package/models/hello_deepa_old.onnx +0 -0
- package/models/test.html +468 -0
- package/openwakeword.mjs +250 -250
- package/package.json +7 -1
- package/scripts/download_models.js +2 -1
- package/src/index.ts +50 -26
- package/src/worker.ts +28 -0
- package/worker.mjs +2848 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "openwakeword-js",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.27",
|
|
4
4
|
"description": "Port of openWakeWord to JavaScript/TypeScript using ONNX Runtime",
|
|
5
5
|
"bin": {
|
|
6
6
|
"openwakeword-js-setup": "scripts/download_models.js"
|
|
@@ -22,8 +22,14 @@
|
|
|
22
22
|
"scripts",
|
|
23
23
|
"index.html",
|
|
24
24
|
"openwakeword.mjs",
|
|
25
|
+
"worker.mjs",
|
|
25
26
|
"models/hello_deepa.onnx",
|
|
27
|
+
"models/hello_deepa_old.onnx",
|
|
26
28
|
"models/namaste_deepa.onnx",
|
|
29
|
+
"models/melspectrogram.onnx",
|
|
30
|
+
"models/embedding_model.onnx",
|
|
31
|
+
"models/silero_vad.onnx",
|
|
32
|
+
"models/test.html",
|
|
27
33
|
"README.md",
|
|
28
34
|
"LICENSE"
|
|
29
35
|
],
|
|
@@ -95,12 +95,13 @@ async function main() {
|
|
|
95
95
|
console.log('\nDeploying optimized AI Listening Interface...');
|
|
96
96
|
copyIfExists(path.join(packageRoot, 'index.html'), path.join(process.cwd(), 'index.html'), 'UI');
|
|
97
97
|
copyIfExists(path.join(packageRoot, 'openwakeword.mjs'), path.join(process.cwd(), 'openwakeword.mjs'), 'Library');
|
|
98
|
+
copyIfExists(path.join(packageRoot, 'worker.mjs'), path.join(process.cwd(), 'worker.mjs'), 'Worker');
|
|
98
99
|
|
|
99
100
|
// Copy test.html too
|
|
100
101
|
copyIfExists(path.join(packageRoot, 'models', 'test.html'), path.join(MODELS_DIR, 'test.html'), 'Debug UI');
|
|
101
102
|
|
|
102
103
|
console.log('\n----------------------------------------------------');
|
|
103
|
-
console.log('SETUP COMPLETE (v0.1.
|
|
104
|
+
console.log('SETUP COMPLETE (v0.1.26)');
|
|
104
105
|
console.log('----------------------------------------------------');
|
|
105
106
|
console.log('Your precision AI wake word interface is ready.');
|
|
106
107
|
console.log('\nTo start the demo:');
|
package/src/index.ts
CHANGED
|
@@ -39,17 +39,18 @@ export class Model {
|
|
|
39
39
|
private embeddingSession: ort.InferenceSession | null = null;
|
|
40
40
|
private vadSession: ort.InferenceSession | null = null;
|
|
41
41
|
private customSessions: Map<string, ort.InferenceSession> = new Map();
|
|
42
|
+
private embeddingWindowSizes: Map<string, number> = new Map();
|
|
42
43
|
|
|
43
44
|
// Buffers
|
|
44
45
|
private melBuffer: Float32Array[] = [];
|
|
45
|
-
private embeddingBuffers:
|
|
46
|
+
private embeddingBuffers: Float32Array[] = [];
|
|
46
47
|
private predictionBuffers: Map<string, number[]> = new Map();
|
|
47
48
|
private vadBuffer: number[] = [];
|
|
48
49
|
private rawAudioRemainder: Float32Array = new Float32Array(0);
|
|
49
50
|
private melContextBuffer: Float32Array;
|
|
50
51
|
|
|
51
52
|
// Seeding history
|
|
52
|
-
private noiseSeededEmbeddings:
|
|
53
|
+
private noiseSeededEmbeddings: Float32Array[] = [];
|
|
53
54
|
|
|
54
55
|
// Constants
|
|
55
56
|
private readonly CHUNK_SIZE = 1280;
|
|
@@ -62,6 +63,7 @@ export class Model {
|
|
|
62
63
|
private readonly MAX_MEL_FRAMES = 970;
|
|
63
64
|
private readonly INITIAL_FRAMES_SUPPRESS = 5;
|
|
64
65
|
private readonly PREDICTION_BUFFER_MAX = 30;
|
|
66
|
+
private readonly GLOBAL_MAX_EMBEDDING_WINDOW = 50;
|
|
65
67
|
|
|
66
68
|
// VAD State (Silero VAD)
|
|
67
69
|
private vadStateH = new Float32Array(2 * 1 * 64).fill(0);
|
|
@@ -119,21 +121,43 @@ export class Model {
|
|
|
119
121
|
generatedEmbeddings.push(emb);
|
|
120
122
|
}
|
|
121
123
|
|
|
124
|
+
this.noiseSeededEmbeddings = generatedEmbeddings.slice(-this.GLOBAL_MAX_EMBEDDING_WINDOW).map(e => new Float32Array(e));
|
|
125
|
+
this.embeddingBuffers = this.noiseSeededEmbeddings.map(e => new Float32Array(e));
|
|
126
|
+
|
|
122
127
|
for (const modelPath of this.options.wakewordModels) {
|
|
123
128
|
const session = await ort.InferenceSession.create(modelPath);
|
|
124
129
|
const name = this.extractModelName(modelPath);
|
|
125
130
|
this.customSessions.set(name, session);
|
|
126
131
|
|
|
127
|
-
//
|
|
128
|
-
const
|
|
129
|
-
|
|
130
|
-
|
|
132
|
+
// DYNAMIC DIMENSION DETECTION (Auto-healing via dummy run)
|
|
133
|
+
const inputName = session.inputNames[0];
|
|
134
|
+
let windowSize = 24; // Standard fallback
|
|
135
|
+
|
|
136
|
+
try {
|
|
137
|
+
// We attempt a dummy inference with the default 24 frames.
|
|
138
|
+
// If the model expects 25 (or anything else), ONNX Runtime will explicitly throw an error telling us the exact expected size.
|
|
139
|
+
const dummyTensor = new ort.Tensor('float32', new Float32Array(24 * 96), [1, 24, 96]);
|
|
140
|
+
await session.run({ [inputName]: dummyTensor });
|
|
141
|
+
} catch (e: any) {
|
|
142
|
+
const msg = e.toString();
|
|
143
|
+
const match = msg.match(/Got: \d+ Expected: (\d+)/);
|
|
144
|
+
if (match) {
|
|
145
|
+
windowSize = parseInt(match[1], 10);
|
|
146
|
+
console.log(`Model [${name}] dimension auto-corrected from 24 to ${windowSize} via runtime inspection`);
|
|
147
|
+
} else if (msg.includes('Expected')) {
|
|
148
|
+
// Fallback generic parsing if format varies slightly
|
|
149
|
+
console.warn(`Model [${name}] dummy run failed, but couldn't parse exact dimension. Error: ${msg}`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
console.log(`Model [${name}] initialized with dynamic window size: ${windowSize}`);
|
|
154
|
+
this.embeddingWindowSizes.set(name, windowSize);
|
|
131
155
|
this.predictionBuffers.set(name, []);
|
|
132
156
|
}
|
|
133
157
|
|
|
134
|
-
// Final sync
|
|
158
|
+
// Final sync
|
|
135
159
|
this.isLoaded = true;
|
|
136
|
-
console.log('OpenWakeWord models loaded
|
|
160
|
+
console.log('OpenWakeWord models loaded with dynamic dimensionality support');
|
|
137
161
|
} catch (error) {
|
|
138
162
|
console.error('Failed to initialize OpenWakeWord models:', error);
|
|
139
163
|
throw error;
|
|
@@ -173,7 +197,7 @@ export class Model {
|
|
|
173
197
|
const chunk = combinedAudio.subarray(offset, offset + this.CHUNK_SIZE);
|
|
174
198
|
offset += this.CHUNK_SIZE;
|
|
175
199
|
|
|
176
|
-
// PRECISE SLIDING WINDOW
|
|
200
|
+
// PRECISE SLIDING WINDOW
|
|
177
201
|
const melInput = new Float32Array(this.CHUNK_SIZE + this.MEL_CONTEXT);
|
|
178
202
|
melInput.set(this.melContextBuffer);
|
|
179
203
|
melInput.set(chunk, this.MEL_CONTEXT);
|
|
@@ -197,12 +221,13 @@ export class Model {
|
|
|
197
221
|
while (this.melBuffer.length > this.MAX_MEL_FRAMES) this.melBuffer.shift();
|
|
198
222
|
|
|
199
223
|
const embedding = await this.runEmbeddingModel();
|
|
224
|
+
this.embeddingBuffers.push(embedding);
|
|
225
|
+
while (this.embeddingBuffers.length > this.GLOBAL_MAX_EMBEDDING_WINDOW) this.embeddingBuffers.shift();
|
|
226
|
+
|
|
200
227
|
for (const [name, session] of this.customSessions.entries()) {
|
|
201
|
-
const
|
|
202
|
-
embBuf.shift();
|
|
203
|
-
embBuf.push(embedding);
|
|
228
|
+
const windowSize = this.embeddingWindowSizes.get(name) || 24;
|
|
204
229
|
|
|
205
|
-
let score = await this.runClassifier(name, session);
|
|
230
|
+
let score = await this.runClassifier(name, session, windowSize);
|
|
206
231
|
|
|
207
232
|
if (this.vadSession && this.options.vadThreshold) {
|
|
208
233
|
const window = this.vadBuffer.slice(-7, -4);
|
|
@@ -262,11 +287,15 @@ export class Model {
|
|
|
262
287
|
return embedding;
|
|
263
288
|
}
|
|
264
289
|
|
|
265
|
-
private async runClassifier(name: string, session: ort.InferenceSession): Promise<number> {
|
|
266
|
-
const
|
|
267
|
-
const
|
|
268
|
-
|
|
269
|
-
|
|
290
|
+
private async runClassifier(name: string, session: ort.InferenceSession, windowSize: number): Promise<number> {
|
|
291
|
+
const predData = new Float32Array(windowSize * 96);
|
|
292
|
+
const startIdx = this.embeddingBuffers.length - windowSize;
|
|
293
|
+
|
|
294
|
+
for (let t = 0; t < windowSize; t++) {
|
|
295
|
+
predData.set(this.embeddingBuffers[startIdx + t], t * 96);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
const predTensor = new ort.Tensor('float32', predData, [1, windowSize, 96]);
|
|
270
299
|
const results = await session.run({ [session.inputNames[0]]: predTensor });
|
|
271
300
|
return results[session.outputNames[0]].data[0] as number;
|
|
272
301
|
}
|
|
@@ -293,7 +322,7 @@ export class Model {
|
|
|
293
322
|
|
|
294
323
|
private extractModelName(path: string): string {
|
|
295
324
|
const base = path.split('/').pop() || path;
|
|
296
|
-
return base.replace('.onnx', '').replace('.tflite', '');
|
|
325
|
+
return base.replace('.onnx', '').replace('.tflite', '').replace(/\\/g, '/');
|
|
297
326
|
}
|
|
298
327
|
|
|
299
328
|
reset() {
|
|
@@ -303,14 +332,9 @@ export class Model {
|
|
|
303
332
|
this.vadBuffer = [];
|
|
304
333
|
this.vadStateH.fill(0);
|
|
305
334
|
this.vadStateC.fill(0);
|
|
306
|
-
|
|
335
|
+
this.embeddingBuffers = this.noiseSeededEmbeddings.map(e => new Float32Array(e));
|
|
336
|
+
for (const name of this.customSessions.keys()) {
|
|
307
337
|
this.predictionBuffers.set(name, []);
|
|
308
|
-
const seeded = this.noiseSeededEmbeddings.get(name);
|
|
309
|
-
if (seeded) {
|
|
310
|
-
this.embeddingBuffers.set(name, seeded.map(e => new Float32Array(e)));
|
|
311
|
-
} else {
|
|
312
|
-
this.embeddingBuffers.set(name, Array(this.EMBEDDING_WINDOW_SIZE).fill(0).map(() => new Float32Array(96).fill(0)));
|
|
313
|
-
}
|
|
314
338
|
}
|
|
315
339
|
}
|
|
316
340
|
}
|
package/src/worker.ts
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import * as ort from 'onnxruntime-web';
|
|
2
|
+
import { Model } from './index.js';
|
|
3
|
+
|
|
4
|
+
let model: Model | null = null;
|
|
5
|
+
|
|
6
|
+
self.onmessage = async (e) => {
|
|
7
|
+
const { type, data } = e.data;
|
|
8
|
+
|
|
9
|
+
if (type === 'init') {
|
|
10
|
+
try {
|
|
11
|
+
model = new Model(data.options);
|
|
12
|
+
await model.init();
|
|
13
|
+
self.postMessage({ type: 'init-complete' });
|
|
14
|
+
} catch (err: any) {
|
|
15
|
+
self.postMessage({ type: 'error', message: err.message });
|
|
16
|
+
}
|
|
17
|
+
} else if (type === 'predict') {
|
|
18
|
+
if (!model) return;
|
|
19
|
+
try {
|
|
20
|
+
const results = await model.predict(data.audio);
|
|
21
|
+
self.postMessage({ type: 'results', results });
|
|
22
|
+
} catch (err: any) {
|
|
23
|
+
self.postMessage({ type: 'error', message: err.message });
|
|
24
|
+
}
|
|
25
|
+
} else if (type === 'reset') {
|
|
26
|
+
if (model) model.reset();
|
|
27
|
+
}
|
|
28
|
+
};
|