npm - @lumiastream/wakeword - Versions diffs - 1.1.7 → 1.1.9 - Mend

@lumiastream/wakeword 1.1.7 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/lib/voice.js +110 -31
package/package.json +1 -1

package/lib/voice.js CHANGED Viewed

@@ -51,6 +51,30 @@ const UNKNOWN_TOKEN = "[unk]";
 const normalizePhrase = (phrase = "") => phrase.trim().toLowerCase();
 const toBool = (v = "") =>
 	["1", "true", "yes", "y"].includes(`${v}`.trim().toLowerCase());
+const tokenize = (phrase = "") =>
+	(phrase ?? "")
+		.toString()
+		.toLowerCase()
+		.replace(/[^a-z0-9]+/gi, " ")
+		.trim()
+		.split(/\s+/)
+		.filter(Boolean);
+const tokensEqual = (a = [], b = []) =>
+	a.length === b.length && a.every((token, idx) => token === b[idx]);
+const tokensContainSequence = (tokens = [], phraseTokens = []) => {
+	if (!phraseTokens.length || tokens.length < phraseTokens.length) return false;
+	for (let i = 0; i <= tokens.length - phraseTokens.length; i += 1) {
+		let matches = true;
+		for (let j = 0; j < phraseTokens.length; j += 1) {
+			if (tokens[i + j] !== phraseTokens[j]) {
+				matches = false;
+				break;
+			}
+		}
+		if (matches) return true;
+	}
+	return false;
+};
 /* ------------------------------------------------------------------ */
 /* 1. Resolve SoX binary and audio device                            */
@@ -93,7 +117,8 @@ try {
 /* 2. Resolve Vosk model                                              */
 /* ------------------------------------------------------------------ */
 const envModelPath = (process.env.LUMIA_VOICE_MODEL_PATH || "").trim();
-let modelPath = envModelPath || join(here, "..", "models", "vosk-model-small-en-us-0.15");
+let modelPath =
+	envModelPath || join(here, "..", "models", "vosk-model-small-en-us-0.15");
 modelPath = unpacked(modelPath);
 if (!existsSync(modelPath))
@@ -107,12 +132,14 @@ setLogLevel(0);
 const SAMPLE_RATE = Number(process.env.SAMPLE_RATE || 16_000);
 let GRAMMAR = [UNKNOWN_TOKEN]; // seed; always keep [unk]
 let COMMANDS = [];
+let EXTRA_GRAMMAR = [];
 const model = new Model(modelPath);
 const buildRecognizer = () => {
-	const recognizer = MATCH_SENTENCE || DISABLE_GRAMMAR
-		? new Recognizer({ model, sampleRate: SAMPLE_RATE })
-		: new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
+	const recognizer =
+		MATCH_SENTENCE || DISABLE_GRAMMAR
+			? new Recognizer({ model, sampleRate: SAMPLE_RATE })
+			: new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
 	recognizer.setWords(true);
 	return recognizer;
 };
@@ -136,7 +163,7 @@ if (audioDevice !== null) {
 	recArgs.device = "default";
 	console.error("Using default Windows audio device: default");
 	console.error(
-		"To specify a different device, use: AUDIO_DEVICE=<device_id> or pass as 3rd argument"
+		"To specify a different device, use: AUDIO_DEVICE=<device_id> or pass as 3rd argument",
 	);
 }
@@ -151,16 +178,17 @@ mic.on("error", (err) => {
 // You might need to adjust this value based on your specific use case.
 let WORD_CONFIDENCE_THRESHOLD = 0.7;
 const DEBUG_AUDIO = ["1", "true", "yes"].includes(
-	(process.env.WAKEWORD_DEBUG || "").toLowerCase()
+	(process.env.WAKEWORD_DEBUG || "").toLowerCase(),
 );
 const LOG_PARTIAL =
 	DEBUG_AUDIO ||
 	["1", "true", "yes"].includes(
-		(process.env.WAKEWORD_LOG_PARTIAL || "").toLowerCase()
+		(process.env.WAKEWORD_LOG_PARTIAL || "").toLowerCase(),
 	);
 let LOG_FINAL = ["1", "true", "yes"].includes(
-	(process.env.WAKEWORD_LOG_FINAL || "").toLowerCase()
+	(process.env.WAKEWORD_LOG_FINAL || "").toLowerCase(),
 );
+let emittedMatchesInUtterance = new Set();
 let lastLevelLog = 0;
 function logAudioLevel(buf) {
@@ -206,62 +234,79 @@ mic.on("data", (buf) => {
 					console.log(
 						`Discarding low-confidence word: "${
 							wordDetail.word
-						}" (Conf: ${wordDetail.conf.toFixed(2)})`
+						}" (Conf: ${wordDetail.conf.toFixed(2)})`,
 					);
 				}
 			}
 			const finalRecognizedText = recognizedWords.join(" ").trim();
 			const averageConfidenceAll =
-				totalConfidenceCount > 0 ? totalConfidenceAll / totalConfidenceCount : 0;
+				totalConfidenceCount > 0
+					? totalConfidenceAll / totalConfidenceCount
+					: 0;
 			const averageConfidence =
 				recognizedWords.length > 0
 					? totalConfidence / recognizedWords.length
 					: averageConfidenceAll;
-			handle(finalRecognizedText, averageConfidence, fullResult.text); // Pass both the filtered text and an average confidence
+			handle(finalRecognizedText, averageConfidence, fullResult.text, {
+				isPartial: false,
+			}); // Pass both the filtered text and an average confidence
 		} else if (fullResult && fullResult.text) {
-			// Fallback for cases where setWords(true) might not fully apply or for partial results
-			handle(fullResult.text.trim(), 1.0, fullResult.text); // Assume high confidence if no word-level details
+			// Fallback for cases where setWords(true) might not fully apply
+			handle(fullResult.text.trim(), 1.0, fullResult.text, {
+				isPartial: false,
+			}); // Assume high confidence if no word-level details
 		}
-	} else if (LOG_PARTIAL) {
+	} else {
 		const partial = rec.partialResult();
-		if (partial?.partial) {
+		if (partial?.partial && LOG_PARTIAL) {
 			console.error(`[wakeword] partial: "${partial.partial}"`);
 		}
+		if (partial?.partial && !MATCH_SENTENCE) {
+			handle(partial.partial.trim(), 1.0, partial.partial, { isPartial: true });
+		}
 	}
 });
-function handle(processedWord, averageConfidence, originalText) {
+function handle(processedWord, averageConfidence, originalText, options = {}) {
+	const { isPartial = false } = options;
 	if (!processedWord && !originalText) return;
 	const finalSentence =
 		typeof originalText === "string" && originalText.trim()
 			? originalText.trim()
 			: (processedWord ?? "").toString().trim();
-	if (LOG_FINAL && finalSentence) {
+	if (!isPartial && LOG_FINAL && finalSentence) {
 		process.stdout?.write(`final|${finalSentence}\n`);
 	}
 	const normalizedProcessed = normalizePhrase(processedWord);
 	const normalizedOriginal = normalizePhrase(originalText);
+	const processedTokens = tokenize(normalizedProcessed);
+	const originalTokens = tokenize(normalizedOriginal);
 	const matches = new Set();
 	const confidentCommands = new Set();
-	const findMatches = (text, allowedCommands = COMMANDS) => {
-		if (!text || text.includes(UNKNOWN_TOKEN)) return;
+	const findMatches = (tokens, allowedCommands = COMMANDS) => {
+		if (!tokens?.length) return;
 		const hits = MATCH_SENTENCE
-			? allowedCommands.filter((command) => text.includes(command))
-			: allowedCommands.filter((command) => text === command);
+			? allowedCommands.filter((command) =>
+					tokensContainSequence(tokens, tokenize(command)),
+				)
+			: allowedCommands.filter((command) =>
+					tokensEqual(tokens, tokenize(command)),
+				);
 		hits.forEach((hit) => matches.add(hit));
 	};
 	// Only allow sentence matches for commands that were confidently recognized.
 	if (normalizedProcessed) {
 		COMMANDS.forEach((command) => {
+			const commandTokens = tokenize(command);
 			const isMatch = MATCH_SENTENCE
-				? normalizedProcessed.includes(command)
-				: normalizedProcessed === command;
+				? tokensContainSequence(processedTokens, commandTokens)
+				: tokensEqual(processedTokens, commandTokens);
 			if (isMatch) {
 				confidentCommands.add(command);
 			}
@@ -269,24 +314,47 @@ function handle(processedWord, averageConfidence, originalText) {
 	}
 	// Try the filtered text first, then fall back to the raw sentence only for confident commands.
-	findMatches(normalizedProcessed);
-	findMatches(normalizedOriginal, [...confidentCommands]);
+	findMatches(processedTokens);
+	findMatches(originalTokens, [...confidentCommands]);
 	// If word-level confidence filtering removed all words, fall back to the
 	// original text when overall confidence is still acceptable.
-	if (!matches.size && normalizedOriginal && averageConfidence >= WORD_CONFIDENCE_THRESHOLD) {
-		findMatches(normalizedOriginal);
+	if (
+		!matches.size &&
+		normalizedOriginal &&
+		averageConfidence >= WORD_CONFIDENCE_THRESHOLD
+	) {
+		findMatches(originalTokens);
 	}
-	if (!matches.size) return;
+	if (!matches.size) {
+		if (!isPartial) {
+			emittedMatchesInUtterance.clear();
+		}
+		return;
+	}
-	matches.forEach((match) => {
+	const uniqueMatches = [...matches].filter(
+		(match) => !emittedMatchesInUtterance.has(match),
+	);
+	if (!uniqueMatches.length) {
+		if (!isPartial) {
+			emittedMatchesInUtterance.clear();
+		}
+		return;
+	}
+	uniqueMatches.forEach((match) => {
 		if (finalSentence) {
 			process.stdout?.write(`sentence|${finalSentence}\n`);
 		}
 		process.stdout?.write(`voice|${match}\n`);
 		process.stdout?.write(`confidence|${averageConfidence}\n`);
+		emittedMatchesInUtterance.add(match);
 	});
+	if (!isPartial) {
+		emittedMatchesInUtterance.clear();
+	}
 }
 /* ------------------------------------------------------------------ */
 /* 6. Hot-reload grammar via stdin                                    */
@@ -297,6 +365,7 @@ rl.on("line", (line) => {
 	const trimmed = line.trim();
 	if (
 		!trimmed.startsWith("update,") &&
+		!trimmed.startsWith("extras,") &&
 		!trimmed.startsWith("confidence,") &&
 		!trimmed.startsWith("debug,")
 	)
@@ -317,12 +386,22 @@ rl.on("line", (line) => {
 		.map((s) => normalizePhrase(s))
 		.filter(Boolean);
+	if (trimmed.startsWith("extras,")) {
+		EXTRA_GRAMMAR = phrases;
+		GRAMMAR = [...COMMANDS, ...EXTRA_GRAMMAR, UNKNOWN_TOKEN];
+		console.error(
+			`[wakeword] extra grammar updated (${phrases.length}): ${phrases.join(", ")}`,
+		);
+		rec = buildRecognizer();
+		return;
+	}
 	if (!phrases.length) return;
 	COMMANDS = phrases;
-	GRAMMAR = [...phrases, UNKNOWN_TOKEN];
+	GRAMMAR = [...COMMANDS, ...EXTRA_GRAMMAR, UNKNOWN_TOKEN];
 	console.error(
-		`[wakeword] grammar updated (${phrases.length}): ${phrases.join(", ")}`
+		`[wakeword] grammar updated (${phrases.length}): ${phrases.join(", ")}`,
 	);
 	rec = buildRecognizer();
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@lumiastream/wakeword",
-	"version": "1.1.7",
+	"version": "1.1.9",
 	"type": "module",
 	"main": "lib/index.js",
 	"files": [