@lumiastream/wakeword 1.1.8 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/voice.js +62 -26
- package/package.json +1 -1
package/lib/voice.js
CHANGED
|
@@ -117,7 +117,8 @@ try {
|
|
|
117
117
|
/* 2. Resolve Vosk model */
|
|
118
118
|
/* ------------------------------------------------------------------ */
|
|
119
119
|
const envModelPath = (process.env.LUMIA_VOICE_MODEL_PATH || "").trim();
|
|
120
|
-
let modelPath =
|
|
120
|
+
let modelPath =
|
|
121
|
+
envModelPath || join(here, "..", "models", "vosk-model-small-en-us-0.15");
|
|
121
122
|
modelPath = unpacked(modelPath);
|
|
122
123
|
|
|
123
124
|
if (!existsSync(modelPath))
|
|
@@ -135,9 +136,10 @@ let EXTRA_GRAMMAR = [];
|
|
|
135
136
|
|
|
136
137
|
const model = new Model(modelPath);
|
|
137
138
|
const buildRecognizer = () => {
|
|
138
|
-
const recognizer =
|
|
139
|
-
|
|
140
|
-
|
|
139
|
+
const recognizer =
|
|
140
|
+
MATCH_SENTENCE || DISABLE_GRAMMAR
|
|
141
|
+
? new Recognizer({ model, sampleRate: SAMPLE_RATE })
|
|
142
|
+
: new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
|
|
141
143
|
recognizer.setWords(true);
|
|
142
144
|
return recognizer;
|
|
143
145
|
};
|
|
@@ -161,7 +163,7 @@ if (audioDevice !== null) {
|
|
|
161
163
|
recArgs.device = "default";
|
|
162
164
|
console.error("Using default Windows audio device: default");
|
|
163
165
|
console.error(
|
|
164
|
-
"To specify a different device, use: AUDIO_DEVICE=<device_id> or pass as 3rd argument"
|
|
166
|
+
"To specify a different device, use: AUDIO_DEVICE=<device_id> or pass as 3rd argument",
|
|
165
167
|
);
|
|
166
168
|
}
|
|
167
169
|
|
|
@@ -176,16 +178,17 @@ mic.on("error", (err) => {
|
|
|
176
178
|
// You might need to adjust this value based on your specific use case.
|
|
177
179
|
let WORD_CONFIDENCE_THRESHOLD = 0.7;
|
|
178
180
|
const DEBUG_AUDIO = ["1", "true", "yes"].includes(
|
|
179
|
-
(process.env.WAKEWORD_DEBUG || "").toLowerCase()
|
|
181
|
+
(process.env.WAKEWORD_DEBUG || "").toLowerCase(),
|
|
180
182
|
);
|
|
181
183
|
const LOG_PARTIAL =
|
|
182
184
|
DEBUG_AUDIO ||
|
|
183
185
|
["1", "true", "yes"].includes(
|
|
184
|
-
(process.env.WAKEWORD_LOG_PARTIAL || "").toLowerCase()
|
|
186
|
+
(process.env.WAKEWORD_LOG_PARTIAL || "").toLowerCase(),
|
|
185
187
|
);
|
|
186
188
|
let LOG_FINAL = ["1", "true", "yes"].includes(
|
|
187
|
-
(process.env.WAKEWORD_LOG_FINAL || "").toLowerCase()
|
|
189
|
+
(process.env.WAKEWORD_LOG_FINAL || "").toLowerCase(),
|
|
188
190
|
);
|
|
191
|
+
let emittedMatchesInUtterance = new Set();
|
|
189
192
|
let lastLevelLog = 0;
|
|
190
193
|
|
|
191
194
|
function logAudioLevel(buf) {
|
|
@@ -231,40 +234,50 @@ mic.on("data", (buf) => {
|
|
|
231
234
|
console.log(
|
|
232
235
|
`Discarding low-confidence word: "${
|
|
233
236
|
wordDetail.word
|
|
234
|
-
}" (Conf: ${wordDetail.conf.toFixed(2)})
|
|
237
|
+
}" (Conf: ${wordDetail.conf.toFixed(2)})`,
|
|
235
238
|
);
|
|
236
239
|
}
|
|
237
240
|
}
|
|
238
241
|
|
|
239
242
|
const finalRecognizedText = recognizedWords.join(" ").trim();
|
|
240
243
|
const averageConfidenceAll =
|
|
241
|
-
totalConfidenceCount > 0
|
|
244
|
+
totalConfidenceCount > 0
|
|
245
|
+
? totalConfidenceAll / totalConfidenceCount
|
|
246
|
+
: 0;
|
|
242
247
|
const averageConfidence =
|
|
243
248
|
recognizedWords.length > 0
|
|
244
249
|
? totalConfidence / recognizedWords.length
|
|
245
250
|
: averageConfidenceAll;
|
|
246
251
|
|
|
247
|
-
handle(finalRecognizedText, averageConfidence, fullResult.text
|
|
252
|
+
handle(finalRecognizedText, averageConfidence, fullResult.text, {
|
|
253
|
+
isPartial: false,
|
|
254
|
+
}); // Pass both the filtered text and an average confidence
|
|
248
255
|
} else if (fullResult && fullResult.text) {
|
|
249
|
-
// Fallback for cases where setWords(true) might not fully apply
|
|
250
|
-
handle(fullResult.text.trim(), 1.0, fullResult.text
|
|
256
|
+
// Fallback for cases where setWords(true) might not fully apply
|
|
257
|
+
handle(fullResult.text.trim(), 1.0, fullResult.text, {
|
|
258
|
+
isPartial: false,
|
|
259
|
+
}); // Assume high confidence if no word-level details
|
|
251
260
|
}
|
|
252
|
-
} else
|
|
261
|
+
} else {
|
|
253
262
|
const partial = rec.partialResult();
|
|
254
|
-
if (partial?.partial) {
|
|
263
|
+
if (partial?.partial && LOG_PARTIAL) {
|
|
255
264
|
console.error(`[wakeword] partial: "${partial.partial}"`);
|
|
256
265
|
}
|
|
266
|
+
if (partial?.partial && !MATCH_SENTENCE) {
|
|
267
|
+
handle(partial.partial.trim(), 1.0, partial.partial, { isPartial: true });
|
|
268
|
+
}
|
|
257
269
|
}
|
|
258
270
|
});
|
|
259
271
|
|
|
260
|
-
function handle(processedWord, averageConfidence, originalText) {
|
|
272
|
+
function handle(processedWord, averageConfidence, originalText, options = {}) {
|
|
273
|
+
const { isPartial = false } = options;
|
|
261
274
|
if (!processedWord && !originalText) return;
|
|
262
275
|
|
|
263
276
|
const finalSentence =
|
|
264
277
|
typeof originalText === "string" && originalText.trim()
|
|
265
278
|
? originalText.trim()
|
|
266
279
|
: (processedWord ?? "").toString().trim();
|
|
267
|
-
if (LOG_FINAL && finalSentence) {
|
|
280
|
+
if (!isPartial && LOG_FINAL && finalSentence) {
|
|
268
281
|
process.stdout?.write(`final|${finalSentence}\n`);
|
|
269
282
|
}
|
|
270
283
|
|
|
@@ -279,11 +292,11 @@ function handle(processedWord, averageConfidence, originalText) {
|
|
|
279
292
|
if (!tokens?.length) return;
|
|
280
293
|
const hits = MATCH_SENTENCE
|
|
281
294
|
? allowedCommands.filter((command) =>
|
|
282
|
-
tokensContainSequence(tokens, tokenize(command))
|
|
283
|
-
|
|
295
|
+
tokensContainSequence(tokens, tokenize(command)),
|
|
296
|
+
)
|
|
284
297
|
: allowedCommands.filter((command) =>
|
|
285
|
-
tokensEqual(tokens, tokenize(command))
|
|
286
|
-
|
|
298
|
+
tokensEqual(tokens, tokenize(command)),
|
|
299
|
+
);
|
|
287
300
|
hits.forEach((hit) => matches.add(hit));
|
|
288
301
|
};
|
|
289
302
|
|
|
@@ -306,19 +319,42 @@ function handle(processedWord, averageConfidence, originalText) {
|
|
|
306
319
|
|
|
307
320
|
// If word-level confidence filtering removed all words, fall back to the
|
|
308
321
|
// original text when overall confidence is still acceptable.
|
|
309
|
-
if (
|
|
322
|
+
if (
|
|
323
|
+
!matches.size &&
|
|
324
|
+
normalizedOriginal &&
|
|
325
|
+
averageConfidence >= WORD_CONFIDENCE_THRESHOLD
|
|
326
|
+
) {
|
|
310
327
|
findMatches(originalTokens);
|
|
311
328
|
}
|
|
312
329
|
|
|
313
|
-
if (!matches.size)
|
|
330
|
+
if (!matches.size) {
|
|
331
|
+
if (!isPartial) {
|
|
332
|
+
emittedMatchesInUtterance.clear();
|
|
333
|
+
}
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
314
336
|
|
|
315
|
-
matches.
|
|
337
|
+
const uniqueMatches = [...matches].filter(
|
|
338
|
+
(match) => !emittedMatchesInUtterance.has(match),
|
|
339
|
+
);
|
|
340
|
+
if (!uniqueMatches.length) {
|
|
341
|
+
if (!isPartial) {
|
|
342
|
+
emittedMatchesInUtterance.clear();
|
|
343
|
+
}
|
|
344
|
+
return;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
uniqueMatches.forEach((match) => {
|
|
316
348
|
if (finalSentence) {
|
|
317
349
|
process.stdout?.write(`sentence|${finalSentence}\n`);
|
|
318
350
|
}
|
|
319
351
|
process.stdout?.write(`voice|${match}\n`);
|
|
320
352
|
process.stdout?.write(`confidence|${averageConfidence}\n`);
|
|
353
|
+
emittedMatchesInUtterance.add(match);
|
|
321
354
|
});
|
|
355
|
+
if (!isPartial) {
|
|
356
|
+
emittedMatchesInUtterance.clear();
|
|
357
|
+
}
|
|
322
358
|
}
|
|
323
359
|
/* ------------------------------------------------------------------ */
|
|
324
360
|
/* 6. Hot-reload grammar via stdin */
|
|
@@ -354,7 +390,7 @@ rl.on("line", (line) => {
|
|
|
354
390
|
EXTRA_GRAMMAR = phrases;
|
|
355
391
|
GRAMMAR = [...COMMANDS, ...EXTRA_GRAMMAR, UNKNOWN_TOKEN];
|
|
356
392
|
console.error(
|
|
357
|
-
`[wakeword] extra grammar updated (${phrases.length}): ${phrases.join(", ")}
|
|
393
|
+
`[wakeword] extra grammar updated (${phrases.length}): ${phrases.join(", ")}`,
|
|
358
394
|
);
|
|
359
395
|
rec = buildRecognizer();
|
|
360
396
|
return;
|
|
@@ -365,7 +401,7 @@ rl.on("line", (line) => {
|
|
|
365
401
|
COMMANDS = phrases;
|
|
366
402
|
GRAMMAR = [...COMMANDS, ...EXTRA_GRAMMAR, UNKNOWN_TOKEN];
|
|
367
403
|
console.error(
|
|
368
|
-
`[wakeword] grammar updated (${phrases.length}): ${phrases.join(", ")}
|
|
404
|
+
`[wakeword] grammar updated (${phrases.length}): ${phrases.join(", ")}`,
|
|
369
405
|
);
|
|
370
406
|
rec = buildRecognizer();
|
|
371
407
|
});
|