openwakeword-js 0.1.21 → 0.1.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "openwakeword-js",
3
- "version": "0.1.21",
3
+ "version": "0.1.27",
4
4
  "description": "Port of openWakeWord to JavaScript/TypeScript using ONNX Runtime",
5
5
  "bin": {
6
6
  "openwakeword-js-setup": "scripts/download_models.js"
@@ -22,8 +22,14 @@
22
22
  "scripts",
23
23
  "index.html",
24
24
  "openwakeword.mjs",
25
+ "worker.mjs",
25
26
  "models/hello_deepa.onnx",
27
+ "models/hello_deepa_old.onnx",
26
28
  "models/namaste_deepa.onnx",
29
+ "models/melspectrogram.onnx",
30
+ "models/embedding_model.onnx",
31
+ "models/silero_vad.onnx",
32
+ "models/test.html",
27
33
  "README.md",
28
34
  "LICENSE"
29
35
  ],
@@ -95,12 +95,13 @@ async function main() {
95
95
  console.log('\nDeploying optimized AI Listening Interface...');
96
96
  copyIfExists(path.join(packageRoot, 'index.html'), path.join(process.cwd(), 'index.html'), 'UI');
97
97
  copyIfExists(path.join(packageRoot, 'openwakeword.mjs'), path.join(process.cwd(), 'openwakeword.mjs'), 'Library');
98
+ copyIfExists(path.join(packageRoot, 'worker.mjs'), path.join(process.cwd(), 'worker.mjs'), 'Worker');
98
99
 
99
100
  // Copy test.html too
100
101
  copyIfExists(path.join(packageRoot, 'models', 'test.html'), path.join(MODELS_DIR, 'test.html'), 'Debug UI');
101
102
 
102
103
  console.log('\n----------------------------------------------------');
103
- console.log('SETUP COMPLETE (v0.1.20)');
104
+ console.log('SETUP COMPLETE (v0.1.26)');
104
105
  console.log('----------------------------------------------------');
105
106
  console.log('Your precision AI wake word interface is ready.');
106
107
  console.log('\nTo start the demo:');
package/src/index.ts CHANGED
@@ -39,17 +39,18 @@ export class Model {
39
39
  private embeddingSession: ort.InferenceSession | null = null;
40
40
  private vadSession: ort.InferenceSession | null = null;
41
41
  private customSessions: Map<string, ort.InferenceSession> = new Map();
42
+ private embeddingWindowSizes: Map<string, number> = new Map();
42
43
 
43
44
  // Buffers
44
45
  private melBuffer: Float32Array[] = [];
45
- private embeddingBuffers: Map<string, Float32Array[]> = new Map();
46
+ private embeddingBuffers: Float32Array[] = [];
46
47
  private predictionBuffers: Map<string, number[]> = new Map();
47
48
  private vadBuffer: number[] = [];
48
49
  private rawAudioRemainder: Float32Array = new Float32Array(0);
49
50
  private melContextBuffer: Float32Array;
50
51
 
51
52
  // Seeding history
52
- private noiseSeededEmbeddings: Map<string, Float32Array[]> = new Map();
53
+ private noiseSeededEmbeddings: Float32Array[] = [];
53
54
 
54
55
  // Constants
55
56
  private readonly CHUNK_SIZE = 1280;
@@ -62,6 +63,7 @@ export class Model {
62
63
  private readonly MAX_MEL_FRAMES = 970;
63
64
  private readonly INITIAL_FRAMES_SUPPRESS = 5;
64
65
  private readonly PREDICTION_BUFFER_MAX = 30;
66
+ private readonly GLOBAL_MAX_EMBEDDING_WINDOW = 50;
65
67
 
66
68
  // VAD State (Silero VAD)
67
69
  private vadStateH = new Float32Array(2 * 1 * 64).fill(0);
@@ -119,21 +121,43 @@ export class Model {
119
121
  generatedEmbeddings.push(emb);
120
122
  }
121
123
 
124
+ this.noiseSeededEmbeddings = generatedEmbeddings.slice(-this.GLOBAL_MAX_EMBEDDING_WINDOW).map(e => new Float32Array(e));
125
+ this.embeddingBuffers = this.noiseSeededEmbeddings.map(e => new Float32Array(e));
126
+
122
127
  for (const modelPath of this.options.wakewordModels) {
123
128
  const session = await ort.InferenceSession.create(modelPath);
124
129
  const name = this.extractModelName(modelPath);
125
130
  this.customSessions.set(name, session);
126
131
 
127
- // Store seeded history to be used during initial prediction and resets
128
- const history = generatedEmbeddings.slice(-this.EMBEDDING_WINDOW_SIZE).map(e => new Float32Array(e));
129
- this.noiseSeededEmbeddings.set(name, history);
130
- this.embeddingBuffers.set(name, history.map(e => new Float32Array(e)));
132
+ // DYNAMIC DIMENSION DETECTION (Auto-healing via dummy run)
133
+ const inputName = session.inputNames[0];
134
+ let windowSize = 24; // Standard fallback
135
+
136
+ try {
137
+ // We attempt a dummy inference with the default 24 frames.
138
+ // If the model expects 25 (or anything else), ONNX Runtime will explicitly throw an error telling us the exact expected size.
139
+ const dummyTensor = new ort.Tensor('float32', new Float32Array(24 * 96), [1, 24, 96]);
140
+ await session.run({ [inputName]: dummyTensor });
141
+ } catch (e: any) {
142
+ const msg = e.toString();
143
+ const match = msg.match(/Got: \d+ Expected: (\d+)/);
144
+ if (match) {
145
+ windowSize = parseInt(match[1], 10);
146
+ console.log(`Model [${name}] dimension auto-corrected from 24 to ${windowSize} via runtime inspection`);
147
+ } else if (msg.includes('Expected')) {
148
+ // Fallback generic parsing if format varies slightly
149
+ console.warn(`Model [${name}] dummy run failed, but couldn't parse exact dimension. Error: ${msg}`);
150
+ }
151
+ }
152
+
153
+ console.log(`Model [${name}] initialized with dynamic window size: ${windowSize}`);
154
+ this.embeddingWindowSizes.set(name, windowSize);
131
155
  this.predictionBuffers.set(name, []);
132
156
  }
133
157
 
134
- // Final sync: don't call reset() here as it would wipe what we just did
158
+ // Final sync
135
159
  this.isLoaded = true;
136
- console.log('OpenWakeWord models loaded and bit-perfectly aligned');
160
+ console.log('OpenWakeWord models loaded with dynamic dimensionality support');
137
161
  } catch (error) {
138
162
  console.error('Failed to initialize OpenWakeWord models:', error);
139
163
  throw error;
@@ -173,7 +197,7 @@ export class Model {
173
197
  const chunk = combinedAudio.subarray(offset, offset + this.CHUNK_SIZE);
174
198
  offset += this.CHUNK_SIZE;
175
199
 
176
- // PRECISE SLIDING WINDOW: 1280 new samples + 480 context samples
200
+ // PRECISE SLIDING WINDOW
177
201
  const melInput = new Float32Array(this.CHUNK_SIZE + this.MEL_CONTEXT);
178
202
  melInput.set(this.melContextBuffer);
179
203
  melInput.set(chunk, this.MEL_CONTEXT);
@@ -197,12 +221,13 @@ export class Model {
197
221
  while (this.melBuffer.length > this.MAX_MEL_FRAMES) this.melBuffer.shift();
198
222
 
199
223
  const embedding = await this.runEmbeddingModel();
224
+ this.embeddingBuffers.push(embedding);
225
+ while (this.embeddingBuffers.length > this.GLOBAL_MAX_EMBEDDING_WINDOW) this.embeddingBuffers.shift();
226
+
200
227
  for (const [name, session] of this.customSessions.entries()) {
201
- const embBuf = this.embeddingBuffers.get(name)!;
202
- embBuf.shift();
203
- embBuf.push(embedding);
228
+ const windowSize = this.embeddingWindowSizes.get(name) || 24;
204
229
 
205
- let score = await this.runClassifier(name, session);
230
+ let score = await this.runClassifier(name, session, windowSize);
206
231
 
207
232
  if (this.vadSession && this.options.vadThreshold) {
208
233
  const window = this.vadBuffer.slice(-7, -4);
@@ -262,11 +287,15 @@ export class Model {
262
287
  return embedding;
263
288
  }
264
289
 
265
- private async runClassifier(name: string, session: ort.InferenceSession): Promise<number> {
266
- const embBuf = this.embeddingBuffers.get(name)!;
267
- const predData = new Float32Array(this.EMBEDDING_WINDOW_SIZE * 96);
268
- for (let t = 0; t < this.EMBEDDING_WINDOW_SIZE; t++) predData.set(embBuf[t], t * 96);
269
- const predTensor = new ort.Tensor('float32', predData, [1, this.EMBEDDING_WINDOW_SIZE, 96]);
290
+ private async runClassifier(name: string, session: ort.InferenceSession, windowSize: number): Promise<number> {
291
+ const predData = new Float32Array(windowSize * 96);
292
+ const startIdx = this.embeddingBuffers.length - windowSize;
293
+
294
+ for (let t = 0; t < windowSize; t++) {
295
+ predData.set(this.embeddingBuffers[startIdx + t], t * 96);
296
+ }
297
+
298
+ const predTensor = new ort.Tensor('float32', predData, [1, windowSize, 96]);
270
299
  const results = await session.run({ [session.inputNames[0]]: predTensor });
271
300
  return results[session.outputNames[0]].data[0] as number;
272
301
  }
@@ -293,7 +322,7 @@ export class Model {
293
322
 
294
323
  private extractModelName(path: string): string {
295
324
  const base = path.split('/').pop() || path;
296
- return base.replace('.onnx', '').replace('.tflite', '');
325
+ return base.replace('.onnx', '').replace('.tflite', '').replace(/\\/g, '/');
297
326
  }
298
327
 
299
328
  reset() {
@@ -303,14 +332,9 @@ export class Model {
303
332
  this.vadBuffer = [];
304
333
  this.vadStateH.fill(0);
305
334
  this.vadStateC.fill(0);
306
- for (const name of this.embeddingBuffers.keys()) {
335
+ this.embeddingBuffers = this.noiseSeededEmbeddings.map(e => new Float32Array(e));
336
+ for (const name of this.customSessions.keys()) {
307
337
  this.predictionBuffers.set(name, []);
308
- const seeded = this.noiseSeededEmbeddings.get(name);
309
- if (seeded) {
310
- this.embeddingBuffers.set(name, seeded.map(e => new Float32Array(e)));
311
- } else {
312
- this.embeddingBuffers.set(name, Array(this.EMBEDDING_WINDOW_SIZE).fill(0).map(() => new Float32Array(96).fill(0)));
313
- }
314
338
  }
315
339
  }
316
340
  }
package/src/worker.ts ADDED
@@ -0,0 +1,28 @@
1
+ import * as ort from 'onnxruntime-web';
2
+ import { Model } from './index.js';
3
+
4
+ let model: Model | null = null;
5
+
6
+ self.onmessage = async (e) => {
7
+ const { type, data } = e.data;
8
+
9
+ if (type === 'init') {
10
+ try {
11
+ model = new Model(data.options);
12
+ await model.init();
13
+ self.postMessage({ type: 'init-complete' });
14
+ } catch (err: any) {
15
+ self.postMessage({ type: 'error', message: err.message });
16
+ }
17
+ } else if (type === 'predict') {
18
+ if (!model) return;
19
+ try {
20
+ const results = await model.predict(data.audio);
21
+ self.postMessage({ type: 'results', results });
22
+ } catch (err: any) {
23
+ self.postMessage({ type: 'error', message: err.message });
24
+ }
25
+ } else if (type === 'reset') {
26
+ if (model) model.reset();
27
+ }
28
+ };