@lumiastream/wakeword 1.0.1-alpha.9 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binaries/libflac-8.dll +0 -0
- package/binaries/libgcc_s_sjlj-1.dll +0 -0
- package/binaries/libgomp-1.dll +0 -0
- package/binaries/libid3tag-0.dll +0 -0
- package/binaries/libogg-0.dll +0 -0
- package/binaries/libpng16-16.dll +0 -0
- package/binaries/libsox-3.dll +0 -0
- package/binaries/libssp-0.dll +0 -0
- package/binaries/libvorbis-0.dll +0 -0
- package/binaries/libvorbisenc-2.dll +0 -0
- package/binaries/libvorbisfile-3.dll +0 -0
- package/binaries/libwavpack-1.dll +0 -0
- package/binaries/libwinpthread-1.dll +0 -0
- package/binaries/zlib1.dll +0 -0
- package/lib/recorders/sox.js +3 -1
- package/lib/voice.js +118 -85
- package/package.json +1 -5
- package/bin/wakeword +0 -44
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/recorders/sox.js
CHANGED
|
@@ -6,7 +6,6 @@ export default (options) => {
|
|
|
6
6
|
}
|
|
7
7
|
|
|
8
8
|
let args = [
|
|
9
|
-
"--default-device",
|
|
10
9
|
"--no-show-progress", // show no progress
|
|
11
10
|
"--rate",
|
|
12
11
|
options.sampleRate, // sample rate
|
|
@@ -44,7 +43,10 @@ export default (options) => {
|
|
|
44
43
|
const spawnOptions = {};
|
|
45
44
|
|
|
46
45
|
if (options.device) {
|
|
46
|
+
args.unshift("-t", "waveaudio", options.device);
|
|
47
47
|
spawnOptions.env = { ...process.env, AUDIODEV: options.device };
|
|
48
|
+
} else {
|
|
49
|
+
args.unshift("--default-device");
|
|
48
50
|
}
|
|
49
51
|
|
|
50
52
|
return { cmd, args, spawnOptions };
|
package/lib/voice.js
CHANGED
|
@@ -1,111 +1,144 @@
|
|
|
1
|
+
// voice.js (ESM)
|
|
1
2
|
import { Model, Recognizer, setLogLevel } from "vosk-koffi";
|
|
2
3
|
import record from "./record.js";
|
|
3
4
|
import { dirname, join } from "node:path";
|
|
4
5
|
import { fileURLToPath } from "node:url";
|
|
5
|
-
import { existsSync } from "node:fs";
|
|
6
|
-
|
|
7
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
8
|
-
|
|
9
|
-
const binPath = join(
|
|
10
|
-
"binaries",
|
|
11
|
-
process.platform === "win32"
|
|
12
|
-
? "sox.exe"
|
|
13
|
-
: process.platform === "darwin"
|
|
14
|
-
? "soxmac"
|
|
15
|
-
: "soxlinux"
|
|
16
|
-
);
|
|
17
|
-
|
|
18
|
-
let COMMANDS = [
|
|
19
|
-
"blue",
|
|
20
|
-
"[unk]", // always keep an [unk] fallback!
|
|
21
|
-
];
|
|
22
|
-
|
|
23
|
-
const SAMPLE_RATE = 16_000;
|
|
24
|
-
setLogLevel(0);
|
|
6
|
+
import { existsSync, chmodSync } from "node:fs";
|
|
7
|
+
import readline from "node:readline";
|
|
25
8
|
|
|
26
|
-
|
|
27
|
-
|
|
9
|
+
/* ------------------------------------------------------------------ */
|
|
10
|
+
/* 0. Helpers */
|
|
11
|
+
/* ------------------------------------------------------------------ */
|
|
12
|
+
const here = dirname(fileURLToPath(import.meta.url));
|
|
28
13
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
14
|
+
function unpacked(p) {
|
|
15
|
+
return p.includes("app.asar")
|
|
16
|
+
? p.replace("app.asar", "app.asar.unpacked")
|
|
17
|
+
: p;
|
|
32
18
|
}
|
|
33
19
|
|
|
34
|
-
|
|
35
|
-
|
|
20
|
+
/* ------------------------------------------------------------------ */
|
|
21
|
+
/* 1. Resolve SoX binary */
|
|
22
|
+
/* ------------------------------------------------------------------ */
|
|
23
|
+
const exeName = { win32: "sox.exe", darwin: "soxmac", linux: "soxlinux" }[
|
|
24
|
+
process.platform
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
/* Priority: argv[2] → fallback to sibling binaries/<exe> */
|
|
28
|
+
let soxPath = process.argv[2] || join(here, "..", "binaries", exeName);
|
|
29
|
+
soxPath = unpacked(soxPath);
|
|
30
|
+
|
|
31
|
+
if (!existsSync(soxPath)) throw new Error(`SoX not found: ${soxPath}`);
|
|
32
|
+
try {
|
|
33
|
+
chmodSync(soxPath, 0o755);
|
|
34
|
+
} catch {
|
|
35
|
+
/* ignore on read‐only FS */
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
/* ------------------------------------------------------------------ */
|
|
39
|
+
/* 2. Resolve Vosk model */
|
|
40
|
+
/* ------------------------------------------------------------------ */
|
|
41
|
+
let modelPath = join(here, "..", "models", "vosk-model-small-en-us-0.15");
|
|
42
|
+
modelPath = unpacked(modelPath);
|
|
43
|
+
|
|
44
|
+
if (!existsSync(modelPath))
|
|
45
|
+
throw new Error(`Vosk model not found: ${modelPath}`);
|
|
46
|
+
|
|
47
|
+
/* ------------------------------------------------------------------ */
|
|
48
|
+
/* 3. Initialise recogniser */
|
|
49
|
+
/* ------------------------------------------------------------------ */
|
|
50
|
+
setLogLevel(0);
|
|
51
|
+
|
|
52
|
+
const SAMPLE_RATE = Number(process.env.SAMPLE_RATE || 16_000);
|
|
53
|
+
let GRAMMAR = ["[unk]"]; // seed; always keep [unk]
|
|
54
|
+
|
|
38
55
|
const model = new Model(modelPath);
|
|
56
|
+
let rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
|
|
57
|
+
rec.setWords(true);
|
|
39
58
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
59
|
+
/* ------------------------------------------------------------------ */
|
|
60
|
+
/* 4. Start the microphone */
|
|
61
|
+
/* ------------------------------------------------------------------ */
|
|
62
|
+
const recArgs = { sampleRate: SAMPLE_RATE, threshold: 0, binPath: soxPath };
|
|
63
|
+
if (process.platform === "win32") {
|
|
64
|
+
recArgs.device = "0";
|
|
65
|
+
}
|
|
46
66
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
threshold: 0,
|
|
52
|
-
binPath,
|
|
53
|
-
})
|
|
54
|
-
.stream();
|
|
67
|
+
const mic = record.record(recArgs).stream();
|
|
68
|
+
// Define a confidence threshold for individual words.
|
|
69
|
+
// You might need to adjust this value based on your specific use case.
|
|
70
|
+
let WORD_CONFIDENCE_THRESHOLD = 0.7;
|
|
55
71
|
|
|
56
72
|
mic.on("data", (buf) => {
|
|
57
|
-
// accept 0.1-sec chunks for low latency
|
|
58
73
|
if (rec.acceptWaveform(buf)) {
|
|
59
|
-
const
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
74
|
+
const fullResult = rec.result();
|
|
75
|
+
|
|
76
|
+
// Check if the result has word details
|
|
77
|
+
if (fullResult && fullResult.result && Array.isArray(fullResult.result)) {
|
|
78
|
+
let recognizedWords = [];
|
|
79
|
+
let totalConfidence = 0;
|
|
80
|
+
|
|
81
|
+
for (const wordDetail of fullResult.result) {
|
|
82
|
+
// Each word has its own confidence ('conf')
|
|
83
|
+
if (wordDetail.conf >= WORD_CONFIDENCE_THRESHOLD) {
|
|
84
|
+
recognizedWords.push(wordDetail.word);
|
|
85
|
+
totalConfidence += wordDetail.conf;
|
|
86
|
+
} else {
|
|
87
|
+
console.log(
|
|
88
|
+
`Discarding low-confidence word: "${
|
|
89
|
+
wordDetail.word
|
|
90
|
+
}" (Conf: ${wordDetail.conf.toFixed(2)})`
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const finalRecognizedText = recognizedWords.join(" ").trim();
|
|
96
|
+
const averageConfidence =
|
|
97
|
+
recognizedWords.length > 0
|
|
98
|
+
? totalConfidence / recognizedWords.length
|
|
99
|
+
: 0;
|
|
100
|
+
|
|
101
|
+
handle(finalRecognizedText, averageConfidence, fullResult.text); // Pass both the filtered text and an average confidence
|
|
102
|
+
} else if (fullResult && fullResult.text) {
|
|
103
|
+
// Fallback for cases where setWords(true) might not fully apply or for partial results
|
|
104
|
+
handle(fullResult.text.trim(), 1.0, fullResult.text); // Assume high confidence if no word-level details
|
|
105
|
+
}
|
|
63
106
|
}
|
|
64
107
|
});
|
|
65
108
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
// Igonre unk
|
|
69
|
-
if (phrase.includes("[unk]")) return;
|
|
70
|
-
if (phrase && COMMANDS.includes(phrase)) {
|
|
71
|
-
// Send to stdout
|
|
72
|
-
process.stdout.write(`voice|${phrase}\n`);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
109
|
+
function handle(processedWord, averageConfidence, originalText) {
|
|
110
|
+
if (!processedWord) return;
|
|
75
111
|
|
|
76
|
-
|
|
77
|
-
COMMANDS = [...grammar, "[unk]"];
|
|
78
|
-
rec = new Recognizer({
|
|
79
|
-
model,
|
|
80
|
-
sampleRate: SAMPLE_RATE,
|
|
81
|
-
grammar: COMMANDS,
|
|
82
|
-
});
|
|
83
|
-
};
|
|
84
|
-
// Listen for CLI input to update grammar at runtime
|
|
85
|
-
import readline from "node:readline";
|
|
112
|
+
if (processedWord.includes("[unk]")) return;
|
|
86
113
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
114
|
+
if (GRAMMAR.includes(processedWord)) {
|
|
115
|
+
process.stdout?.write(`voice|${processedWord}\n`);
|
|
116
|
+
process.stdout?.write(`confidence|${averageConfidence}\n`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
/* ------------------------------------------------------------------ */
|
|
120
|
+
/* 6. Hot-reload grammar via stdin */
|
|
121
|
+
/* ------------------------------------------------------------------ */
|
|
122
|
+
const rl = readline.createInterface({ input: process.stdin, terminal: false });
|
|
93
123
|
|
|
94
|
-
// Listen for lines from stdin
|
|
95
124
|
rl.on("line", (line) => {
|
|
96
125
|
const trimmed = line.trim();
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
.slice(1)
|
|
104
|
-
.map((s) => s.trim())
|
|
105
|
-
.filter(Boolean);
|
|
106
|
-
if (newGrammar.length > 0) {
|
|
107
|
-
updateGrammar(newGrammar);
|
|
108
|
-
}
|
|
109
|
-
}
|
|
126
|
+
if (!trimmed.startsWith("update,") || !trimmed.startsWith("confidence,"))
|
|
127
|
+
return;
|
|
128
|
+
|
|
129
|
+
if (trimmed.startsWith("confidence,")) {
|
|
130
|
+
WORD_CONFIDENCE_THRESHOLD = Number(trimmed.split(",")[1]);
|
|
131
|
+
return;
|
|
110
132
|
}
|
|
133
|
+
|
|
134
|
+
const phrases = trimmed
|
|
135
|
+
.split(",")
|
|
136
|
+
.slice(1)
|
|
137
|
+
.map((s) => s.trim())
|
|
138
|
+
.filter(Boolean);
|
|
139
|
+
|
|
140
|
+
if (!phrases.length) return;
|
|
141
|
+
|
|
142
|
+
GRAMMAR = [...phrases, "[unk]"];
|
|
143
|
+
rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
|
|
111
144
|
});
|
package/package.json
CHANGED
package/bin/wakeword
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import { fileURLToPath } from "url";
|
|
3
|
-
import { spawn } from "node:child_process";
|
|
4
|
-
import path from "node:path";
|
|
5
|
-
|
|
6
|
-
// Pick correct SoX binary for the current OS
|
|
7
|
-
const exe = {
|
|
8
|
-
win32: "sox.exe",
|
|
9
|
-
darwin: "soxmac",
|
|
10
|
-
linux: "soxlinux",
|
|
11
|
-
}[process.platform];
|
|
12
|
-
|
|
13
|
-
const soxPath = path.join(
|
|
14
|
-
path.dirname(fileURLToPath(import.meta.url)), // …/bin
|
|
15
|
-
"..",
|
|
16
|
-
"binaries",
|
|
17
|
-
exe
|
|
18
|
-
);
|
|
19
|
-
|
|
20
|
-
const child = spawn(
|
|
21
|
-
process.execPath,
|
|
22
|
-
[
|
|
23
|
-
path.join(
|
|
24
|
-
path.dirname(fileURLToPath(import.meta.url)),
|
|
25
|
-
"..",
|
|
26
|
-
"lib",
|
|
27
|
-
"voice.js"
|
|
28
|
-
),
|
|
29
|
-
soxPath,
|
|
30
|
-
...process.argv.slice(2),
|
|
31
|
-
],
|
|
32
|
-
{ stdio: ["pipe", "inherit", "inherit"] }
|
|
33
|
-
);
|
|
34
|
-
|
|
35
|
-
// If you want to forward user input from this process to the child:
|
|
36
|
-
// if (process.stdin.isTTY) {
|
|
37
|
-
// process.stdin.setRawMode(false);
|
|
38
|
-
// }
|
|
39
|
-
// process.stdin.pipe(child.stdin);
|
|
40
|
-
|
|
41
|
-
// listen for hotkey events from the child process
|
|
42
|
-
child.on("message", (message) => {
|
|
43
|
-
console.log("hotkey", message);
|
|
44
|
-
});
|