npm - @lumiastream/wakeword - Versions diffs - 1.1.8 → 1.1.9 - Mend

@lumiastream/wakeword 1.1.8 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/lib/voice.js +62 -26
package/package.json +1 -1

package/lib/voice.js CHANGED Viewed

@@ -117,7 +117,8 @@ try {
 /* 2. Resolve Vosk model                                              */
 /* ------------------------------------------------------------------ */
 const envModelPath = (process.env.LUMIA_VOICE_MODEL_PATH || "").trim();
-let modelPath = envModelPath || join(here, "..", "models", "vosk-model-small-en-us-0.15");
+let modelPath =
+	envModelPath || join(here, "..", "models", "vosk-model-small-en-us-0.15");
 modelPath = unpacked(modelPath);
 if (!existsSync(modelPath))
@@ -135,9 +136,10 @@ let EXTRA_GRAMMAR = [];
 const model = new Model(modelPath);
 const buildRecognizer = () => {
-	const recognizer = MATCH_SENTENCE || DISABLE_GRAMMAR
-		? new Recognizer({ model, sampleRate: SAMPLE_RATE })
-		: new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
+	const recognizer =
+		MATCH_SENTENCE || DISABLE_GRAMMAR
+			? new Recognizer({ model, sampleRate: SAMPLE_RATE })
+			: new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
 	recognizer.setWords(true);
 	return recognizer;
 };
@@ -161,7 +163,7 @@ if (audioDevice !== null) {
 	recArgs.device = "default";
 	console.error("Using default Windows audio device: default");
 	console.error(
-		"To specify a different device, use: AUDIO_DEVICE=<device_id> or pass as 3rd argument"
+		"To specify a different device, use: AUDIO_DEVICE=<device_id> or pass as 3rd argument",
 	);
 }
@@ -176,16 +178,17 @@ mic.on("error", (err) => {
 // You might need to adjust this value based on your specific use case.
 let WORD_CONFIDENCE_THRESHOLD = 0.7;
 const DEBUG_AUDIO = ["1", "true", "yes"].includes(
-	(process.env.WAKEWORD_DEBUG || "").toLowerCase()
+	(process.env.WAKEWORD_DEBUG || "").toLowerCase(),
 );
 const LOG_PARTIAL =
 	DEBUG_AUDIO ||
 	["1", "true", "yes"].includes(
-		(process.env.WAKEWORD_LOG_PARTIAL || "").toLowerCase()
+		(process.env.WAKEWORD_LOG_PARTIAL || "").toLowerCase(),
 	);
 let LOG_FINAL = ["1", "true", "yes"].includes(
-	(process.env.WAKEWORD_LOG_FINAL || "").toLowerCase()
+	(process.env.WAKEWORD_LOG_FINAL || "").toLowerCase(),
 );
+let emittedMatchesInUtterance = new Set();
 let lastLevelLog = 0;
 function logAudioLevel(buf) {
@@ -231,40 +234,50 @@ mic.on("data", (buf) => {
 					console.log(
 						`Discarding low-confidence word: "${
 							wordDetail.word
-						}" (Conf: ${wordDetail.conf.toFixed(2)})`
+						}" (Conf: ${wordDetail.conf.toFixed(2)})`,
 					);
 				}
 			}
 			const finalRecognizedText = recognizedWords.join(" ").trim();
 			const averageConfidenceAll =
-				totalConfidenceCount > 0 ? totalConfidenceAll / totalConfidenceCount : 0;
+				totalConfidenceCount > 0
+					? totalConfidenceAll / totalConfidenceCount
+					: 0;
 			const averageConfidence =
 				recognizedWords.length > 0
 					? totalConfidence / recognizedWords.length
 					: averageConfidenceAll;
-			handle(finalRecognizedText, averageConfidence, fullResult.text); // Pass both the filtered text and an average confidence
+			handle(finalRecognizedText, averageConfidence, fullResult.text, {
+				isPartial: false,
+			}); // Pass both the filtered text and an average confidence
 		} else if (fullResult && fullResult.text) {
-			// Fallback for cases where setWords(true) might not fully apply or for partial results
-			handle(fullResult.text.trim(), 1.0, fullResult.text); // Assume high confidence if no word-level details
+			// Fallback for cases where setWords(true) might not fully apply
+			handle(fullResult.text.trim(), 1.0, fullResult.text, {
+				isPartial: false,
+			}); // Assume high confidence if no word-level details
 		}
-	} else if (LOG_PARTIAL) {
+	} else {
 		const partial = rec.partialResult();
-		if (partial?.partial) {
+		if (partial?.partial && LOG_PARTIAL) {
 			console.error(`[wakeword] partial: "${partial.partial}"`);
 		}
+		if (partial?.partial && !MATCH_SENTENCE) {
+			handle(partial.partial.trim(), 1.0, partial.partial, { isPartial: true });
+		}
 	}
 });
-function handle(processedWord, averageConfidence, originalText) {
+function handle(processedWord, averageConfidence, originalText, options = {}) {
+	const { isPartial = false } = options;
 	if (!processedWord && !originalText) return;
 	const finalSentence =
 		typeof originalText === "string" && originalText.trim()
 			? originalText.trim()
 			: (processedWord ?? "").toString().trim();
-	if (LOG_FINAL && finalSentence) {
+	if (!isPartial && LOG_FINAL && finalSentence) {
 		process.stdout?.write(`final|${finalSentence}\n`);
 	}
@@ -279,11 +292,11 @@ function handle(processedWord, averageConfidence, originalText) {
 		if (!tokens?.length) return;
 		const hits = MATCH_SENTENCE
 			? allowedCommands.filter((command) =>
-					tokensContainSequence(tokens, tokenize(command))
-			  )
+					tokensContainSequence(tokens, tokenize(command)),
+				)
 			: allowedCommands.filter((command) =>
-					tokensEqual(tokens, tokenize(command))
-			  );
+					tokensEqual(tokens, tokenize(command)),
+				);
 		hits.forEach((hit) => matches.add(hit));
 	};
@@ -306,19 +319,42 @@ function handle(processedWord, averageConfidence, originalText) {
 	// If word-level confidence filtering removed all words, fall back to the
 	// original text when overall confidence is still acceptable.
-	if (!matches.size && normalizedOriginal && averageConfidence >= WORD_CONFIDENCE_THRESHOLD) {
+	if (
+		!matches.size &&
+		normalizedOriginal &&
+		averageConfidence >= WORD_CONFIDENCE_THRESHOLD
+	) {
 		findMatches(originalTokens);
 	}
-	if (!matches.size) return;
+	if (!matches.size) {
+		if (!isPartial) {
+			emittedMatchesInUtterance.clear();
+		}
+		return;
+	}
-	matches.forEach((match) => {
+	const uniqueMatches = [...matches].filter(
+		(match) => !emittedMatchesInUtterance.has(match),
+	);
+	if (!uniqueMatches.length) {
+		if (!isPartial) {
+			emittedMatchesInUtterance.clear();
+		}
+		return;
+	}
+	uniqueMatches.forEach((match) => {
 		if (finalSentence) {
 			process.stdout?.write(`sentence|${finalSentence}\n`);
 		}
 		process.stdout?.write(`voice|${match}\n`);
 		process.stdout?.write(`confidence|${averageConfidence}\n`);
+		emittedMatchesInUtterance.add(match);
 	});
+	if (!isPartial) {
+		emittedMatchesInUtterance.clear();
+	}
 }
 /* ------------------------------------------------------------------ */
 /* 6. Hot-reload grammar via stdin                                    */
@@ -354,7 +390,7 @@ rl.on("line", (line) => {
 		EXTRA_GRAMMAR = phrases;
 		GRAMMAR = [...COMMANDS, ...EXTRA_GRAMMAR, UNKNOWN_TOKEN];
 		console.error(
-			`[wakeword] extra grammar updated (${phrases.length}): ${phrases.join(", ")}`
+			`[wakeword] extra grammar updated (${phrases.length}): ${phrases.join(", ")}`,
 		);
 		rec = buildRecognizer();
 		return;
@@ -365,7 +401,7 @@ rl.on("line", (line) => {
 	COMMANDS = phrases;
 	GRAMMAR = [...COMMANDS, ...EXTRA_GRAMMAR, UNKNOWN_TOKEN];
 	console.error(
-		`[wakeword] grammar updated (${phrases.length}): ${phrases.join(", ")}`
+		`[wakeword] grammar updated (${phrases.length}): ${phrases.join(", ")}`,
 	);
 	rec = buildRecognizer();
 });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@lumiastream/wakeword",
-	"version": "1.1.8",
+	"version": "1.1.9",
 	"type": "module",
 	"main": "lib/index.js",
 	"files": [