@lumiastream/wakeword 1.0.1-alpha.9 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -6,7 +6,6 @@ export default (options) => {
6
6
  }
7
7
 
8
8
  let args = [
9
- "--default-device",
10
9
  "--no-show-progress", // show no progress
11
10
  "--rate",
12
11
  options.sampleRate, // sample rate
@@ -44,7 +43,10 @@ export default (options) => {
44
43
  const spawnOptions = {};
45
44
 
46
45
  if (options.device) {
46
+ args.unshift("-t", "waveaudio", options.device);
47
47
  spawnOptions.env = { ...process.env, AUDIODEV: options.device };
48
+ } else {
49
+ args.unshift("--default-device");
48
50
  }
49
51
 
50
52
  return { cmd, args, spawnOptions };
package/lib/voice.js CHANGED
@@ -2,110 +2,142 @@ import { Model, Recognizer, setLogLevel } from "vosk-koffi";
2
2
  import record from "./record.js";
3
3
  import { dirname, join } from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
- import { existsSync } from "node:fs";
6
-
7
- const __dirname = dirname(fileURLToPath(import.meta.url));
8
-
9
- const binPath = join(
10
- "binaries",
11
- process.platform === "win32"
12
- ? "sox.exe"
13
- : process.platform === "darwin"
14
- ? "soxmac"
15
- : "soxlinux"
16
- );
17
-
18
- let COMMANDS = [
19
- "blue",
20
- "[unk]", // always keep an [unk] fallback!
21
- ];
22
-
23
- const SAMPLE_RATE = 16_000;
24
- setLogLevel(0);
5
+ import { existsSync, chmodSync } from "node:fs";
6
+ import readline from "node:readline";
25
7
 
26
- // 1. load model once
27
- let modelPath = join(__dirname, "..", "models", "vosk-model-small-en-us-0.15");
8
+ /* ------------------------------------------------------------------ */
9
+ /* 0. Helpers */
10
+ /* ------------------------------------------------------------------ */
11
+ const here = dirname(fileURLToPath(import.meta.url));
28
12
 
29
- /* If the file is running from inside app.asar we need the unpacked copy */
30
- if (modelPath.includes("app.asar")) {
31
- modelPath = modelPath.replace("app.asar", "app.asar.unpacked");
13
+ function unpacked(p) {
14
+ return p.includes("app.asar")
15
+ ? p.replace("app.asar", "app.asar.unpacked")
16
+ : p;
32
17
  }
33
18
 
34
- if (!existsSync(modelPath)) {
35
- throw new Error(`Vosk model not found at ${modelPath}`);
19
+ /* ------------------------------------------------------------------ */
20
+ /* 1. Resolve SoX binary */
21
+ /* ------------------------------------------------------------------ */
22
+ const exeName = { win32: "sox.exe", darwin: "soxmac", linux: "soxlinux" }[
23
+ process.platform
24
+ ];
25
+
26
+ /* Priority: argv[2] → fallback to sibling binaries/<exe> */
27
+ let soxPath = process.argv[2] || join(here, "..", "binaries", exeName);
28
+ soxPath = unpacked(soxPath);
29
+
30
+ if (!existsSync(soxPath)) throw new Error(`SoX not found: ${soxPath}`);
31
+ try {
32
+ chmodSync(soxPath, 0o755);
33
+ } catch {
34
+ /* ignore on read‐only FS */
36
35
  }
37
36
 
37
+ /* ------------------------------------------------------------------ */
38
+ /* 2. Resolve Vosk model */
39
+ /* ------------------------------------------------------------------ */
40
+ let modelPath = join(here, "..", "models", "vosk-model-small-en-us-0.15");
41
+ modelPath = unpacked(modelPath);
42
+
43
+ if (!existsSync(modelPath))
44
+ throw new Error(`Vosk model not found: ${modelPath}`);
45
+
46
+ /* ------------------------------------------------------------------ */
47
+ /* 3. Initialise recogniser */
48
+ /* ------------------------------------------------------------------ */
49
+ setLogLevel(0);
50
+
51
+ const SAMPLE_RATE = Number(process.env.SAMPLE_RATE || 16_000);
52
+ let GRAMMAR = ["[unk]"]; // seed; always keep [unk]
53
+
38
54
  const model = new Model(modelPath);
55
+ let rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
56
+ rec.setWords(true);
39
57
 
40
- // 2. build a grammar recognizer
41
- let rec = new Recognizer({
42
- model,
43
- sampleRate: SAMPLE_RATE,
44
- grammar: COMMANDS,
45
- });
58
+ /* ------------------------------------------------------------------ */
59
+ /* 4. Start the microphone */
60
+ /* ------------------------------------------------------------------ */
61
+ const recArgs = { sampleRate: SAMPLE_RATE, threshold: 0, binPath: soxPath };
62
+ if (process.platform === "win32") {
63
+ recArgs.device = "0";
64
+ }
46
65
 
47
- // 3. open the mic (16-kHz, 16-bit, mono)
48
- const mic = record
49
- .record({
50
- sampleRate: SAMPLE_RATE,
51
- threshold: 0,
52
- binPath,
53
- })
54
- .stream();
66
+ const mic = record.record(recArgs).stream();
67
+ // Define a confidence threshold for individual words.
68
+ // You might need to adjust this value based on your specific use case.
69
+ let WORD_CONFIDENCE_THRESHOLD = 0.7;
55
70
 
56
71
  mic.on("data", (buf) => {
57
- // accept 0.1-sec chunks for low latency
58
72
  if (rec.acceptWaveform(buf)) {
59
- const result = rec.result();
60
- handle(result?.text?.trim());
61
- } else {
62
- // optional: JSON.parse(rec.partialResult()).partial for live captions
73
+ const fullResult = rec.result();
74
+
75
+ // Check if the result has word details
76
+ if (fullResult && fullResult.result && Array.isArray(fullResult.result)) {
77
+ let recognizedWords = [];
78
+ let totalConfidence = 0;
79
+
80
+ for (const wordDetail of fullResult.result) {
81
+ // Each word has its own confidence ('conf')
82
+ if (wordDetail.conf >= WORD_CONFIDENCE_THRESHOLD) {
83
+ recognizedWords.push(wordDetail.word);
84
+ totalConfidence += wordDetail.conf;
85
+ } else {
86
+ console.log(
87
+ `Discarding low-confidence word: "${
88
+ wordDetail.word
89
+ }" (Conf: ${wordDetail.conf.toFixed(2)})`
90
+ );
91
+ }
92
+ }
93
+
94
+ const finalRecognizedText = recognizedWords.join(" ").trim();
95
+ const averageConfidence =
96
+ recognizedWords.length > 0
97
+ ? totalConfidence / recognizedWords.length
98
+ : 0;
99
+
100
+ handle(finalRecognizedText, averageConfidence, fullResult.text); // Pass both the filtered text and an average confidence
101
+ } else if (fullResult && fullResult.text) {
102
+ // Fallback for cases where setWords(true) might not fully apply or for partial results
103
+ handle(fullResult.text.trim(), 1.0, fullResult.text); // Assume high confidence if no word-level details
104
+ }
63
105
  }
64
106
  });
65
107
 
66
- // 4. map recognised phrase ➜ action
67
- function handle(phrase) {
68
- // Igonre unk
69
- if (phrase.includes("[unk]")) return;
70
- if (phrase && COMMANDS.includes(phrase)) {
71
- // Send to stdout
72
- process.stdout.write(`voice|${phrase}\n`);
73
- }
74
- }
108
+ function handle(processedWord, averageConfidence, originalText) {
109
+ if (!processedWord) return;
75
110
 
76
- const updateGrammar = (grammar) => {
77
- COMMANDS = [...grammar, "[unk]"];
78
- rec = new Recognizer({
79
- model,
80
- sampleRate: SAMPLE_RATE,
81
- grammar: COMMANDS,
82
- });
83
- };
84
- // Listen for CLI input to update grammar at runtime
85
- import readline from "node:readline";
111
+ if (processedWord.includes("[unk]")) return;
86
112
 
87
- // Set up readline interface for stdin
88
- const rl = readline.createInterface({
89
- input: process.stdin,
90
- output: process.stdout,
91
- terminal: false,
92
- });
113
+ if (GRAMMAR.includes(processedWord)) {
114
+ process.stdout?.write(`voice|${processedWord}\n`);
115
+ process.stdout?.write(`confidence|${averageConfidence}\n`);
116
+ }
117
+ }
118
+ /* ------------------------------------------------------------------ */
119
+ /* 6. Hot-reload grammar via stdin */
120
+ /* ------------------------------------------------------------------ */
121
+ const rl = readline.createInterface({ input: process.stdin, terminal: false });
93
122
 
94
- // Listen for lines from stdin
95
123
  rl.on("line", (line) => {
96
124
  const trimmed = line.trim();
97
- // Example: update,open settings,mute audio,start recording
98
- if (trimmed.startsWith("update")) {
99
- const parts = trimmed.split(",");
100
- if (parts.length > 1) {
101
- // Remove the "update" command and use the rest as grammar
102
- const newGrammar = parts
103
- .slice(1)
104
- .map((s) => s.trim())
105
- .filter(Boolean);
106
- if (newGrammar.length > 0) {
107
- updateGrammar(newGrammar);
108
- }
109
- }
125
+ if (!trimmed.startsWith("update,") && !trimmed.startsWith("confidence,"))
126
+ return;
127
+
128
+ if (trimmed.startsWith("confidence,")) {
129
+ WORD_CONFIDENCE_THRESHOLD = Number(trimmed.split(",")[1]);
130
+ return;
110
131
  }
132
+
133
+ const phrases = trimmed
134
+ .split(",")
135
+ .slice(1)
136
+ .map((s) => s.trim())
137
+ .filter(Boolean);
138
+
139
+ if (!phrases.length) return;
140
+
141
+ GRAMMAR = [...phrases, "[unk]"];
142
+ rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
111
143
  });
package/package.json CHANGED
@@ -1,13 +1,9 @@
1
1
  {
2
2
  "name": "@lumiastream/wakeword",
3
- "version": "1.0.1-alpha.9",
3
+ "version": "1.0.2",
4
4
  "type": "module",
5
5
  "main": "lib/voice.js",
6
- "bin": {
7
- "wakeword": "bin/wakeword"
8
- },
9
6
  "files": [
10
- "bin/",
11
7
  "lib/",
12
8
  "models/",
13
9
  "binaries/"
package/bin/wakeword DELETED
@@ -1,44 +0,0 @@
1
- #!/usr/bin/env node
2
- import { fileURLToPath } from "url";
3
- import { spawn } from "node:child_process";
4
- import path from "node:path";
5
-
6
- // Pick correct SoX binary for the current OS
7
- const exe = {
8
- win32: "sox.exe",
9
- darwin: "soxmac",
10
- linux: "soxlinux",
11
- }[process.platform];
12
-
13
- const soxPath = path.join(
14
- path.dirname(fileURLToPath(import.meta.url)), // …/bin
15
- "..",
16
- "binaries",
17
- exe
18
- );
19
-
20
- const child = spawn(
21
- process.execPath,
22
- [
23
- path.join(
24
- path.dirname(fileURLToPath(import.meta.url)),
25
- "..",
26
- "lib",
27
- "voice.js"
28
- ),
29
- soxPath,
30
- ...process.argv.slice(2),
31
- ],
32
- { stdio: ["pipe", "inherit", "inherit"] }
33
- );
34
-
35
- // If you want to forward user input from this process to the child:
36
- // if (process.stdin.isTTY) {
37
- // process.stdin.setRawMode(false);
38
- // }
39
- // process.stdin.pipe(child.stdin);
40
-
41
- // listen for hotkey events from the child process
42
- child.on("message", (message) => {
43
- console.log("hotkey", message);
44
- });