npm - @lumiastream/wakeword - Versions diffs - 1.0.1-alpha.9 → 1.0.1 - Mend

@@ -1,111 +1,144 @@
+// voice.js  (ESM)
 import { Model, Recognizer, setLogLevel } from "vosk-koffi";
 import record from "./record.js";
 import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
-import { existsSync } from "node:fs";
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const binPath = join(
-	"binaries",
-	process.platform === "win32"
-		? "sox.exe"
-		: process.platform === "darwin"
-		? "soxmac"
-		: "soxlinux"
-);
-let COMMANDS = [
-	"blue",
-	"[unk]", // always keep an [unk] fallback!
-];
-const SAMPLE_RATE = 16_000;
-setLogLevel(0);
+import { existsSync, chmodSync } from "node:fs";
+import readline from "node:readline";
-// 1. load model once
-let modelPath = join(__dirname, "..", "models", "vosk-model-small-en-us-0.15");
+/* ------------------------------------------------------------------ */
+/* 0. Helpers                                                         */
+/* ------------------------------------------------------------------ */
+const here = dirname(fileURLToPath(import.meta.url));
-/* If the file is running from inside  app.asar  we need the unpacked copy */
-if (modelPath.includes("app.asar")) {
-	modelPath = modelPath.replace("app.asar", "app.asar.unpacked");
+function unpacked(p) {
+	return p.includes("app.asar")
+		? p.replace("app.asar", "app.asar.unpacked")
+		: p;
 }
-if (!existsSync(modelPath)) {
-	throw new Error(`Vosk model not found at ${modelPath}`);
+/* ------------------------------------------------------------------ */
+/* 1. Resolve SoX binary                                              */
+/* ------------------------------------------------------------------ */
+const exeName = { win32: "sox.exe", darwin: "soxmac", linux: "soxlinux" }[
+	process.platform
+];
+/* Priority: argv[2]  → fallback to sibling binaries/<exe> */
+let soxPath = process.argv[2] || join(here, "..", "binaries", exeName);
+soxPath = unpacked(soxPath);
+if (!existsSync(soxPath)) throw new Error(`SoX not found: ${soxPath}`);
+try {
+	chmodSync(soxPath, 0o755);
+} catch {
+	/* ignore on read‐only FS */
 }
+/* ------------------------------------------------------------------ */
+/* 2. Resolve Vosk model                                              */
+/* ------------------------------------------------------------------ */
+let modelPath = join(here, "..", "models", "vosk-model-small-en-us-0.15");
+modelPath = unpacked(modelPath);
+if (!existsSync(modelPath))
+	throw new Error(`Vosk model not found: ${modelPath}`);
+/* ------------------------------------------------------------------ */
+/* 3. Initialise recogniser                                           */
+/* ------------------------------------------------------------------ */
+setLogLevel(0);
+const SAMPLE_RATE = Number(process.env.SAMPLE_RATE || 16_000);
+let GRAMMAR = ["[unk]"]; // seed; always keep [unk]
 const model = new Model(modelPath);
+let rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
+rec.setWords(true);
-// 2. build a grammar recognizer
-let rec = new Recognizer({
-	model,
-	sampleRate: SAMPLE_RATE,
-	grammar: COMMANDS,
-});
+/* ------------------------------------------------------------------ */
+/* 4. Start the microphone                                            */
+/* ------------------------------------------------------------------ */
+const recArgs = { sampleRate: SAMPLE_RATE, threshold: 0, binPath: soxPath };
+if (process.platform === "win32") {
+	recArgs.device = "0";
+}
-// 3. open the mic (16-kHz, 16-bit, mono)
-const mic = record
-	.record({
-		sampleRate: SAMPLE_RATE,
-		threshold: 0,
-		binPath,
-	})
-	.stream();
+const mic = record.record(recArgs).stream();
+// Define a confidence threshold for individual words.
+// You might need to adjust this value based on your specific use case.
+let WORD_CONFIDENCE_THRESHOLD = 0.7;
 mic.on("data", (buf) => {
-	// accept 0.1-sec chunks for low latency
 	if (rec.acceptWaveform(buf)) {
-		const result = rec.result();
-		handle(result?.text?.trim());
-	} else {
-		// optional: JSON.parse(rec.partialResult()).partial for live captions
+		const fullResult = rec.result();
+		// Check if the result has word details
+		if (fullResult && fullResult.result && Array.isArray(fullResult.result)) {
+			let recognizedWords = [];
+			let totalConfidence = 0;
+			for (const wordDetail of fullResult.result) {
+				// Each word has its own confidence ('conf')
+				if (wordDetail.conf >= WORD_CONFIDENCE_THRESHOLD) {
+					recognizedWords.push(wordDetail.word);
+					totalConfidence += wordDetail.conf;
+				} else {
+					console.log(
+						`Discarding low-confidence word: "${
+							wordDetail.word
+						}" (Conf: ${wordDetail.conf.toFixed(2)})`
+					);
+				}
+			}
+			const finalRecognizedText = recognizedWords.join(" ").trim();
+			const averageConfidence =
+				recognizedWords.length > 0
+					? totalConfidence / recognizedWords.length
+					: 0;
+			handle(finalRecognizedText, averageConfidence, fullResult.text); // Pass both the filtered text and an average confidence
+		} else if (fullResult && fullResult.text) {
+			// Fallback for cases where setWords(true) might not fully apply or for partial results
+			handle(fullResult.text.trim(), 1.0, fullResult.text); // Assume high confidence if no word-level details
+		}
 	}
 });
-// 4. map recognised phrase ➜ action
-function handle(phrase) {
-	// Igonre unk
-	if (phrase.includes("[unk]")) return;
-	if (phrase && COMMANDS.includes(phrase)) {
-		// Send to stdout
-		process.stdout.write(`voice|${phrase}\n`);
-	}
-}
+function handle(processedWord, averageConfidence, originalText) {
+	if (!processedWord) return;
-const updateGrammar = (grammar) => {
-	COMMANDS = [...grammar, "[unk]"];
-	rec = new Recognizer({
-		model,
-		sampleRate: SAMPLE_RATE,
-		grammar: COMMANDS,
-	});
-};
-// Listen for CLI input to update grammar at runtime
-import readline from "node:readline";
+	if (processedWord.includes("[unk]")) return;
-// Set up readline interface for stdin
-const rl = readline.createInterface({
-	input: process.stdin,
-	output: process.stdout,
-	terminal: false,
-});
+	if (GRAMMAR.includes(processedWord)) {
+		process.stdout?.write(`voice|${processedWord}\n`);
+		process.stdout?.write(`confidence|${averageConfidence}\n`);
+	}
+}
+/* ------------------------------------------------------------------ */
+/* 6. Hot-reload grammar via stdin                                    */
+/* ------------------------------------------------------------------ */
+const rl = readline.createInterface({ input: process.stdin, terminal: false });
-// Listen for lines from stdin
 rl.on("line", (line) => {
 	const trimmed = line.trim();
-	// Example: update,open settings,mute audio,start recording
-	if (trimmed.startsWith("update")) {
-		const parts = trimmed.split(",");
-		if (parts.length > 1) {
-			// Remove the "update" command and use the rest as grammar
-			const newGrammar = parts
-				.slice(1)
-				.map((s) => s.trim())
-				.filter(Boolean);
-			if (newGrammar.length > 0) {
-				updateGrammar(newGrammar);
-			}
-		}
+	if (!trimmed.startsWith("update,") || !trimmed.startsWith("confidence,"))
+		return;
+	if (trimmed.startsWith("confidence,")) {
+		WORD_CONFIDENCE_THRESHOLD = Number(trimmed.split(",")[1]);
+		return;
 	}
+	const phrases = trimmed
+		.split(",")
+		.slice(1)
+		.map((s) => s.trim())
+		.filter(Boolean);
+	if (!phrases.length) return;
+	GRAMMAR = [...phrases, "[unk]"];
+	rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
 });

@@ -6,7 +6,6 @@ export default (options) => {
 	}
 	let args = [
-		"--default-device",
 		"--no-show-progress", // show no progress
 		"--rate",
 		options.sampleRate, // sample rate
@@ -44,7 +43,10 @@ export default (options) => {
 	const spawnOptions = {};
 	if (options.device) {
+		args.unshift("-t", "waveaudio", options.device);
 		spawnOptions.env = { ...process.env, AUDIODEV: options.device };
+	} else {
+		args.unshift("--default-device");
 	}
 	return { cmd, args, spawnOptions };

@@ -1,13 +1,9 @@
 {
 	"name": "@lumiastream/wakeword",
-	"version": "1.0.1-alpha.9",
+	"version": "1.0.1",
 	"type": "module",
 	"main": "lib/voice.js",
-	"bin": {
-		"wakeword": "bin/wakeword"
-	},
 	"files": [
-		"bin/",
 		"lib/",
 		"models/",
 		"binaries/"

@@ -1,44 +0,0 @@
-#!/usr/bin/env node
-import { fileURLToPath } from "url";
-import { spawn } from "node:child_process";
-import path from "node:path";
-// Pick correct SoX binary for the current OS
-const exe = {
-	win32: "sox.exe",
-	darwin: "soxmac",
-	linux: "soxlinux",
-}[process.platform];
-const soxPath = path.join(
-	path.dirname(fileURLToPath(import.meta.url)), // …/bin
-	"..",
-	"binaries",
-	exe
-);
-const child = spawn(
-	process.execPath,
-	[
-		path.join(
-			path.dirname(fileURLToPath(import.meta.url)),
-			"..",
-			"lib",
-			"voice.js"
-		),
-		soxPath,
-		...process.argv.slice(2),
-	],
-	{ stdio: ["pipe", "inherit", "inherit"] }
-);
-// If you want to forward user input from this process to the child:
-// if (process.stdin.isTTY) {
-// 	process.stdin.setRawMode(false);
-// }
-// process.stdin.pipe(child.stdin);
-// listen for hotkey events from the child process
-child.on("message", (message) => {
-	console.log("hotkey", message);
-});

@lumiastream/wakeword 1.0.1-alpha.9 → 1.0.1