@lumiastream/wakeword 1.1.9 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/voice.js +33 -5
  2. package/package.json +1 -1
package/lib/voice.js CHANGED
@@ -48,6 +48,7 @@ function unpacked(p) {
48
48
  }
49
49
 
50
50
  const UNKNOWN_TOKEN = "[unk]";
51
+ const UNKNOWN_TOKEN_NORMALIZED = "unk";
51
52
  const normalizePhrase = (phrase = "") => phrase.trim().toLowerCase();
52
53
  const toBool = (v = "") =>
53
54
  ["1", "true", "yes", "y"].includes(`${v}`.trim().toLowerCase());
@@ -61,6 +62,23 @@ const tokenize = (phrase = "") =>
61
62
  .filter(Boolean);
62
63
  const tokensEqual = (a = [], b = []) =>
63
64
  a.length === b.length && a.every((token, idx) => token === b[idx]);
65
+ const trimUnknownBoundaryTokens = (tokens = []) => {
66
+ let start = 0;
67
+ let end = tokens.length;
68
+ while (
69
+ start < end &&
70
+ (tokens[start] === UNKNOWN_TOKEN_NORMALIZED || tokens[start] === UNKNOWN_TOKEN)
71
+ ) {
72
+ start += 1;
73
+ }
74
+ while (
75
+ end > start &&
76
+ (tokens[end - 1] === UNKNOWN_TOKEN_NORMALIZED || tokens[end - 1] === UNKNOWN_TOKEN)
77
+ ) {
78
+ end -= 1;
79
+ }
80
+ return tokens.slice(start, end);
81
+ };
64
82
  const tokensContainSequence = (tokens = [], phraseTokens = []) => {
65
83
  if (!phraseTokens.length || tokens.length < phraseTokens.length) return false;
66
84
  for (let i = 0; i <= tokens.length - phraseTokens.length; i += 1) {
@@ -283,8 +301,12 @@ function handle(processedWord, averageConfidence, originalText, options = {}) {
283
301
 
284
302
  const normalizedProcessed = normalizePhrase(processedWord);
285
303
  const normalizedOriginal = normalizePhrase(originalText);
286
- const processedTokens = tokenize(normalizedProcessed);
287
- const originalTokens = tokenize(normalizedOriginal);
304
+ const processedTokens = trimUnknownBoundaryTokens(
305
+ tokenize(normalizedProcessed),
306
+ );
307
+ const originalTokens = trimUnknownBoundaryTokens(
308
+ tokenize(normalizedOriginal),
309
+ );
288
310
  const matches = new Set();
289
311
  const confidentCommands = new Set();
290
312
 
@@ -292,10 +314,16 @@ function handle(processedWord, averageConfidence, originalText, options = {}) {
292
314
  if (!tokens?.length) return;
293
315
  const hits = MATCH_SENTENCE
294
316
  ? allowedCommands.filter((command) =>
295
- tokensContainSequence(tokens, tokenize(command)),
317
+ tokensContainSequence(
318
+ tokens,
319
+ trimUnknownBoundaryTokens(tokenize(command)),
320
+ ),
296
321
  )
297
322
  : allowedCommands.filter((command) =>
298
- tokensEqual(tokens, tokenize(command)),
323
+ tokensEqual(
324
+ tokens,
325
+ trimUnknownBoundaryTokens(tokenize(command)),
326
+ ),
299
327
  );
300
328
  hits.forEach((hit) => matches.add(hit));
301
329
  };
@@ -303,7 +331,7 @@ function handle(processedWord, averageConfidence, originalText, options = {}) {
303
331
  // Only allow sentence matches for commands that were confidently recognized.
304
332
  if (normalizedProcessed) {
305
333
  COMMANDS.forEach((command) => {
306
- const commandTokens = tokenize(command);
334
+ const commandTokens = trimUnknownBoundaryTokens(tokenize(command));
307
335
  const isMatch = MATCH_SENTENCE
308
336
  ? tokensContainSequence(processedTokens, commandTokens)
309
337
  : tokensEqual(processedTokens, commandTokens);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lumiastream/wakeword",
3
- "version": "1.1.9",
3
+ "version": "1.2.0",
4
4
  "type": "module",
5
5
  "main": "lib/index.js",
6
6
  "files": [