@lumiastream/wakeword 1.0.1-alpha.9 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -6,7 +6,6 @@ export default (options) => {
6
6
  }
7
7
 
8
8
  let args = [
9
- "--default-device",
10
9
  "--no-show-progress", // show no progress
11
10
  "--rate",
12
11
  options.sampleRate, // sample rate
@@ -44,7 +43,10 @@ export default (options) => {
44
43
  const spawnOptions = {};
45
44
 
46
45
  if (options.device) {
46
+ args.unshift("-t", "waveaudio", options.device);
47
47
  spawnOptions.env = { ...process.env, AUDIODEV: options.device };
48
+ } else {
49
+ args.unshift("--default-device");
48
50
  }
49
51
 
50
52
  return { cmd, args, spawnOptions };
package/lib/voice.js CHANGED
@@ -1,111 +1,144 @@
1
+ // voice.js (ESM)
1
2
  import { Model, Recognizer, setLogLevel } from "vosk-koffi";
2
3
  import record from "./record.js";
3
4
  import { dirname, join } from "node:path";
4
5
  import { fileURLToPath } from "node:url";
5
- import { existsSync } from "node:fs";
6
-
7
- const __dirname = dirname(fileURLToPath(import.meta.url));
8
-
9
- const binPath = join(
10
- "binaries",
11
- process.platform === "win32"
12
- ? "sox.exe"
13
- : process.platform === "darwin"
14
- ? "soxmac"
15
- : "soxlinux"
16
- );
17
-
18
- let COMMANDS = [
19
- "blue",
20
- "[unk]", // always keep an [unk] fallback!
21
- ];
22
-
23
- const SAMPLE_RATE = 16_000;
24
- setLogLevel(0);
6
+ import { existsSync, chmodSync } from "node:fs";
7
+ import readline from "node:readline";
25
8
 
26
- // 1. load model once
27
- let modelPath = join(__dirname, "..", "models", "vosk-model-small-en-us-0.15");
9
+ /* ------------------------------------------------------------------ */
10
+ /* 0. Helpers */
11
+ /* ------------------------------------------------------------------ */
12
+ const here = dirname(fileURLToPath(import.meta.url));
28
13
 
29
- /* If the file is running from inside app.asar we need the unpacked copy */
30
- if (modelPath.includes("app.asar")) {
31
- modelPath = modelPath.replace("app.asar", "app.asar.unpacked");
14
+ function unpacked(p) {
15
+ return p.includes("app.asar")
16
+ ? p.replace("app.asar", "app.asar.unpacked")
17
+ : p;
32
18
  }
33
19
 
34
- if (!existsSync(modelPath)) {
35
- throw new Error(`Vosk model not found at ${modelPath}`);
20
+ /* ------------------------------------------------------------------ */
21
+ /* 1. Resolve SoX binary */
22
+ /* ------------------------------------------------------------------ */
23
+ const exeName = { win32: "sox.exe", darwin: "soxmac", linux: "soxlinux" }[
24
+ process.platform
25
+ ];
26
+
27
+ /* Priority: argv[2] → fallback to sibling binaries/<exe> */
28
+ let soxPath = process.argv[2] || join(here, "..", "binaries", exeName);
29
+ soxPath = unpacked(soxPath);
30
+
31
+ if (!existsSync(soxPath)) throw new Error(`SoX not found: ${soxPath}`);
32
+ try {
33
+ chmodSync(soxPath, 0o755);
34
+ } catch {
35
+ /* ignore on read‐only FS */
36
36
  }
37
37
 
38
+ /* ------------------------------------------------------------------ */
39
+ /* 2. Resolve Vosk model */
40
+ /* ------------------------------------------------------------------ */
41
+ let modelPath = join(here, "..", "models", "vosk-model-small-en-us-0.15");
42
+ modelPath = unpacked(modelPath);
43
+
44
+ if (!existsSync(modelPath))
45
+ throw new Error(`Vosk model not found: ${modelPath}`);
46
+
47
+ /* ------------------------------------------------------------------ */
48
+ /* 3. Initialise recogniser */
49
+ /* ------------------------------------------------------------------ */
50
+ setLogLevel(0);
51
+
52
+ const SAMPLE_RATE = Number(process.env.SAMPLE_RATE || 16_000);
53
+ let GRAMMAR = ["[unk]"]; // seed; always keep [unk]
54
+
38
55
  const model = new Model(modelPath);
56
+ let rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
57
+ rec.setWords(true);
39
58
 
40
- // 2. build a grammar recognizer
41
- let rec = new Recognizer({
42
- model,
43
- sampleRate: SAMPLE_RATE,
44
- grammar: COMMANDS,
45
- });
59
+ /* ------------------------------------------------------------------ */
60
+ /* 4. Start the microphone */
61
+ /* ------------------------------------------------------------------ */
62
+ const recArgs = { sampleRate: SAMPLE_RATE, threshold: 0, binPath: soxPath };
63
+ if (process.platform === "win32") {
64
+ recArgs.device = "0";
65
+ }
46
66
 
47
- // 3. open the mic (16-kHz, 16-bit, mono)
48
- const mic = record
49
- .record({
50
- sampleRate: SAMPLE_RATE,
51
- threshold: 0,
52
- binPath,
53
- })
54
- .stream();
67
+ const mic = record.record(recArgs).stream();
68
+ // Define a confidence threshold for individual words.
69
+ // You might need to adjust this value based on your specific use case.
70
+ let WORD_CONFIDENCE_THRESHOLD = 0.7;
55
71
 
56
72
  mic.on("data", (buf) => {
57
- // accept 0.1-sec chunks for low latency
58
73
  if (rec.acceptWaveform(buf)) {
59
- const result = rec.result();
60
- handle(result?.text?.trim());
61
- } else {
62
- // optional: JSON.parse(rec.partialResult()).partial for live captions
74
+ const fullResult = rec.result();
75
+
76
+ // Check if the result has word details
77
+ if (fullResult && fullResult.result && Array.isArray(fullResult.result)) {
78
+ let recognizedWords = [];
79
+ let totalConfidence = 0;
80
+
81
+ for (const wordDetail of fullResult.result) {
82
+ // Each word has its own confidence ('conf')
83
+ if (wordDetail.conf >= WORD_CONFIDENCE_THRESHOLD) {
84
+ recognizedWords.push(wordDetail.word);
85
+ totalConfidence += wordDetail.conf;
86
+ } else {
87
+ console.log(
88
+ `Discarding low-confidence word: "${
89
+ wordDetail.word
90
+ }" (Conf: ${wordDetail.conf.toFixed(2)})`
91
+ );
92
+ }
93
+ }
94
+
95
+ const finalRecognizedText = recognizedWords.join(" ").trim();
96
+ const averageConfidence =
97
+ recognizedWords.length > 0
98
+ ? totalConfidence / recognizedWords.length
99
+ : 0;
100
+
101
+ handle(finalRecognizedText, averageConfidence, fullResult.text); // Pass both the filtered text and an average confidence
102
+ } else if (fullResult && fullResult.text) {
103
+ // Fallback for cases where setWords(true) might not fully apply or for partial results
104
+ handle(fullResult.text.trim(), 1.0, fullResult.text); // Assume high confidence if no word-level details
105
+ }
63
106
  }
64
107
  });
65
108
 
66
- // 4. map recognised phrase ➜ action
67
- function handle(phrase) {
68
- // Igonre unk
69
- if (phrase.includes("[unk]")) return;
70
- if (phrase && COMMANDS.includes(phrase)) {
71
- // Send to stdout
72
- process.stdout.write(`voice|${phrase}\n`);
73
- }
74
- }
109
+ function handle(processedWord, averageConfidence, originalText) {
110
+ if (!processedWord) return;
75
111
 
76
- const updateGrammar = (grammar) => {
77
- COMMANDS = [...grammar, "[unk]"];
78
- rec = new Recognizer({
79
- model,
80
- sampleRate: SAMPLE_RATE,
81
- grammar: COMMANDS,
82
- });
83
- };
84
- // Listen for CLI input to update grammar at runtime
85
- import readline from "node:readline";
112
+ if (processedWord.includes("[unk]")) return;
86
113
 
87
- // Set up readline interface for stdin
88
- const rl = readline.createInterface({
89
- input: process.stdin,
90
- output: process.stdout,
91
- terminal: false,
92
- });
114
+ if (GRAMMAR.includes(processedWord)) {
115
+ process.stdout?.write(`voice|${processedWord}\n`);
116
+ process.stdout?.write(`confidence|${averageConfidence}\n`);
117
+ }
118
+ }
119
+ /* ------------------------------------------------------------------ */
120
+ /* 6. Hot-reload grammar via stdin */
121
+ /* ------------------------------------------------------------------ */
122
+ const rl = readline.createInterface({ input: process.stdin, terminal: false });
93
123
 
94
- // Listen for lines from stdin
95
124
  rl.on("line", (line) => {
96
125
  const trimmed = line.trim();
97
- // Example: update,open settings,mute audio,start recording
98
- if (trimmed.startsWith("update")) {
99
- const parts = trimmed.split(",");
100
- if (parts.length > 1) {
101
- // Remove the "update" command and use the rest as grammar
102
- const newGrammar = parts
103
- .slice(1)
104
- .map((s) => s.trim())
105
- .filter(Boolean);
106
- if (newGrammar.length > 0) {
107
- updateGrammar(newGrammar);
108
- }
109
- }
126
+ if (!trimmed.startsWith("update,") || !trimmed.startsWith("confidence,"))
127
+ return;
128
+
129
+ if (trimmed.startsWith("confidence,")) {
130
+ WORD_CONFIDENCE_THRESHOLD = Number(trimmed.split(",")[1]);
131
+ return;
110
132
  }
133
+
134
+ const phrases = trimmed
135
+ .split(",")
136
+ .slice(1)
137
+ .map((s) => s.trim())
138
+ .filter(Boolean);
139
+
140
+ if (!phrases.length) return;
141
+
142
+ GRAMMAR = [...phrases, "[unk]"];
143
+ rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
111
144
  });
package/package.json CHANGED
@@ -1,13 +1,9 @@
1
1
  {
2
2
  "name": "@lumiastream/wakeword",
3
- "version": "1.0.1-alpha.9",
3
+ "version": "1.0.1",
4
4
  "type": "module",
5
5
  "main": "lib/voice.js",
6
- "bin": {
7
- "wakeword": "bin/wakeword"
8
- },
9
6
  "files": [
10
- "bin/",
11
7
  "lib/",
12
8
  "models/",
13
9
  "binaries/"
package/bin/wakeword DELETED
@@ -1,44 +0,0 @@
1
- #!/usr/bin/env node
2
- import { fileURLToPath } from "url";
3
- import { spawn } from "node:child_process";
4
- import path from "node:path";
5
-
6
- // Pick correct SoX binary for the current OS
7
- const exe = {
8
- win32: "sox.exe",
9
- darwin: "soxmac",
10
- linux: "soxlinux",
11
- }[process.platform];
12
-
13
- const soxPath = path.join(
14
- path.dirname(fileURLToPath(import.meta.url)), // …/bin
15
- "..",
16
- "binaries",
17
- exe
18
- );
19
-
20
- const child = spawn(
21
- process.execPath,
22
- [
23
- path.join(
24
- path.dirname(fileURLToPath(import.meta.url)),
25
- "..",
26
- "lib",
27
- "voice.js"
28
- ),
29
- soxPath,
30
- ...process.argv.slice(2),
31
- ],
32
- { stdio: ["pipe", "inherit", "inherit"] }
33
- );
34
-
35
- // If you want to forward user input from this process to the child:
36
- // if (process.stdin.isTTY) {
37
- // process.stdin.setRawMode(false);
38
- // }
39
- // process.stdin.pipe(child.stdin);
40
-
41
- // listen for hotkey events from the child process
42
- child.on("message", (message) => {
43
- console.log("hotkey", message);
44
- });