@lumiastream/wakeword 1.1.9 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/voice.js +33 -5
- package/package.json +1 -1
package/lib/voice.js
CHANGED
|
@@ -48,6 +48,7 @@ function unpacked(p) {
|
|
|
48
48
|
}
|
|
49
49
|
|
|
50
50
|
const UNKNOWN_TOKEN = "[unk]";
|
|
51
|
+
const UNKNOWN_TOKEN_NORMALIZED = "unk";
|
|
51
52
|
const normalizePhrase = (phrase = "") => phrase.trim().toLowerCase();
|
|
52
53
|
const toBool = (v = "") =>
|
|
53
54
|
["1", "true", "yes", "y"].includes(`${v}`.trim().toLowerCase());
|
|
@@ -61,6 +62,23 @@ const tokenize = (phrase = "") =>
|
|
|
61
62
|
.filter(Boolean);
|
|
62
63
|
const tokensEqual = (a = [], b = []) =>
|
|
63
64
|
a.length === b.length && a.every((token, idx) => token === b[idx]);
|
|
65
|
+
const trimUnknownBoundaryTokens = (tokens = []) => {
|
|
66
|
+
let start = 0;
|
|
67
|
+
let end = tokens.length;
|
|
68
|
+
while (
|
|
69
|
+
start < end &&
|
|
70
|
+
(tokens[start] === UNKNOWN_TOKEN_NORMALIZED || tokens[start] === UNKNOWN_TOKEN)
|
|
71
|
+
) {
|
|
72
|
+
start += 1;
|
|
73
|
+
}
|
|
74
|
+
while (
|
|
75
|
+
end > start &&
|
|
76
|
+
(tokens[end - 1] === UNKNOWN_TOKEN_NORMALIZED || tokens[end - 1] === UNKNOWN_TOKEN)
|
|
77
|
+
) {
|
|
78
|
+
end -= 1;
|
|
79
|
+
}
|
|
80
|
+
return tokens.slice(start, end);
|
|
81
|
+
};
|
|
64
82
|
const tokensContainSequence = (tokens = [], phraseTokens = []) => {
|
|
65
83
|
if (!phraseTokens.length || tokens.length < phraseTokens.length) return false;
|
|
66
84
|
for (let i = 0; i <= tokens.length - phraseTokens.length; i += 1) {
|
|
@@ -283,8 +301,12 @@ function handle(processedWord, averageConfidence, originalText, options = {}) {
|
|
|
283
301
|
|
|
284
302
|
const normalizedProcessed = normalizePhrase(processedWord);
|
|
285
303
|
const normalizedOriginal = normalizePhrase(originalText);
|
|
286
|
-
const processedTokens =
|
|
287
|
-
|
|
304
|
+
const processedTokens = trimUnknownBoundaryTokens(
|
|
305
|
+
tokenize(normalizedProcessed),
|
|
306
|
+
);
|
|
307
|
+
const originalTokens = trimUnknownBoundaryTokens(
|
|
308
|
+
tokenize(normalizedOriginal),
|
|
309
|
+
);
|
|
288
310
|
const matches = new Set();
|
|
289
311
|
const confidentCommands = new Set();
|
|
290
312
|
|
|
@@ -292,10 +314,16 @@ function handle(processedWord, averageConfidence, originalText, options = {}) {
|
|
|
292
314
|
if (!tokens?.length) return;
|
|
293
315
|
const hits = MATCH_SENTENCE
|
|
294
316
|
? allowedCommands.filter((command) =>
|
|
295
|
-
tokensContainSequence(
|
|
317
|
+
tokensContainSequence(
|
|
318
|
+
tokens,
|
|
319
|
+
trimUnknownBoundaryTokens(tokenize(command)),
|
|
320
|
+
),
|
|
296
321
|
)
|
|
297
322
|
: allowedCommands.filter((command) =>
|
|
298
|
-
tokensEqual(
|
|
323
|
+
tokensEqual(
|
|
324
|
+
tokens,
|
|
325
|
+
trimUnknownBoundaryTokens(tokenize(command)),
|
|
326
|
+
),
|
|
299
327
|
);
|
|
300
328
|
hits.forEach((hit) => matches.add(hit));
|
|
301
329
|
};
|
|
@@ -303,7 +331,7 @@ function handle(processedWord, averageConfidence, originalText, options = {}) {
|
|
|
303
331
|
// Only allow sentence matches for commands that were confidently recognized.
|
|
304
332
|
if (normalizedProcessed) {
|
|
305
333
|
COMMANDS.forEach((command) => {
|
|
306
|
-
const commandTokens = tokenize(command);
|
|
334
|
+
const commandTokens = trimUnknownBoundaryTokens(tokenize(command));
|
|
307
335
|
const isMatch = MATCH_SENTENCE
|
|
308
336
|
? tokensContainSequence(processedTokens, commandTokens)
|
|
309
337
|
: tokensEqual(processedTokens, commandTokens);
|