@lumiastream/wakeword 1.1.6 → 1.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Readme.md +2 -1
- package/lib/index.js +6 -1
- package/lib/voice.js +19 -3
- package/package.json +1 -1
package/Readme.md
CHANGED
|
@@ -96,6 +96,7 @@ Starts the wake word detection process.
|
|
|
96
96
|
- `sampleRate` (number): Sample rate, default 16000
|
|
97
97
|
- `grammar` (string[]): Array of wake words to detect
|
|
98
98
|
- `confidence` (number): Confidence threshold (0-1), default 0.7
|
|
99
|
+
- `modelPath` (string): Full path to a Vosk model directory (optional)
|
|
99
100
|
|
|
100
101
|
**Returns:** ChildProcess instance
|
|
101
102
|
|
|
@@ -153,4 +154,4 @@ Source: https://github.com/zackees/static-sox/tree/main/bin
|
|
|
153
154
|
|
|
154
155
|
## License
|
|
155
156
|
|
|
156
|
-
See LICENSE file for details.
|
|
157
|
+
See LICENSE file for details.
|
package/lib/index.js
CHANGED
|
@@ -19,6 +19,7 @@ export { listAudioDevices };
|
|
|
19
19
|
* @param {number} [options.sampleRate] - Sample rate (default: 16000)
|
|
20
20
|
* @param {Array<string>} [options.grammar] - Wake words to detect
|
|
21
21
|
* @param {number} [options.confidence] - Confidence threshold (0-1)
|
|
22
|
+
* @param {string} [options.modelPath] - Full path to a Vosk model directory
|
|
22
23
|
* @returns {ChildProcess} The spawned voice detection process
|
|
23
24
|
*/
|
|
24
25
|
export function startWakeWord(options = {}) {
|
|
@@ -28,6 +29,7 @@ export function startWakeWord(options = {}) {
|
|
|
28
29
|
sampleRate = 16000,
|
|
29
30
|
grammar = [],
|
|
30
31
|
confidence = 0.7,
|
|
32
|
+
modelPath = null,
|
|
31
33
|
} = options;
|
|
32
34
|
|
|
33
35
|
const voiceScript = join(here, "voice.js");
|
|
@@ -50,6 +52,9 @@ export function startWakeWord(options = {}) {
|
|
|
50
52
|
if (device && !args[2]) {
|
|
51
53
|
env.AUDIO_DEVICE = device;
|
|
52
54
|
}
|
|
55
|
+
if (modelPath) {
|
|
56
|
+
env.LUMIA_VOICE_MODEL_PATH = modelPath;
|
|
57
|
+
}
|
|
53
58
|
|
|
54
59
|
const proc = spawn("node", args, {
|
|
55
60
|
env,
|
|
@@ -72,4 +77,4 @@ export function startWakeWord(options = {}) {
|
|
|
72
77
|
export default {
|
|
73
78
|
listAudioDevices,
|
|
74
79
|
startWakeWord,
|
|
75
|
-
};
|
|
80
|
+
};
|
package/lib/voice.js
CHANGED
|
@@ -62,6 +62,7 @@ const defaultExeName = {
|
|
|
62
62
|
}[process.platform];
|
|
63
63
|
const exeName = defaultExeName;
|
|
64
64
|
const MATCH_SENTENCE = toBool(process.env.LUMIA_VOICE_MATCH_SENTENCE);
|
|
65
|
+
const DISABLE_GRAMMAR = toBool(process.env.LUMIA_VOICE_DISABLE_GRAMMAR);
|
|
65
66
|
|
|
66
67
|
/* Priority for sox path: argv[2] → fallback to sibling binaries/<exe> */
|
|
67
68
|
/* Priority for device: argv[3] → env var → default */
|
|
@@ -91,7 +92,8 @@ try {
|
|
|
91
92
|
/* ------------------------------------------------------------------ */
|
|
92
93
|
/* 2. Resolve Vosk model */
|
|
93
94
|
/* ------------------------------------------------------------------ */
|
|
94
|
-
|
|
95
|
+
const envModelPath = (process.env.LUMIA_VOICE_MODEL_PATH || "").trim();
|
|
96
|
+
let modelPath = envModelPath || join(here, "..", "models", "vosk-model-small-en-us-0.15");
|
|
95
97
|
modelPath = unpacked(modelPath);
|
|
96
98
|
|
|
97
99
|
if (!existsSync(modelPath))
|
|
@@ -108,7 +110,7 @@ let COMMANDS = [];
|
|
|
108
110
|
|
|
109
111
|
const model = new Model(modelPath);
|
|
110
112
|
const buildRecognizer = () => {
|
|
111
|
-
const recognizer = MATCH_SENTENCE
|
|
113
|
+
const recognizer = MATCH_SENTENCE || DISABLE_GRAMMAR
|
|
112
114
|
? new Recognizer({ model, sampleRate: SAMPLE_RATE })
|
|
113
115
|
: new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
|
|
114
116
|
recognizer.setWords(true);
|
|
@@ -188,8 +190,14 @@ mic.on("data", (buf) => {
|
|
|
188
190
|
if (fullResult && fullResult.result && Array.isArray(fullResult.result)) {
|
|
189
191
|
let recognizedWords = [];
|
|
190
192
|
let totalConfidence = 0;
|
|
193
|
+
let totalConfidenceAll = 0;
|
|
194
|
+
let totalConfidenceCount = 0;
|
|
191
195
|
|
|
192
196
|
for (const wordDetail of fullResult.result) {
|
|
197
|
+
if (typeof wordDetail.conf === "number") {
|
|
198
|
+
totalConfidenceAll += wordDetail.conf;
|
|
199
|
+
totalConfidenceCount += 1;
|
|
200
|
+
}
|
|
193
201
|
// Each word has its own confidence ('conf')
|
|
194
202
|
if (wordDetail.conf >= WORD_CONFIDENCE_THRESHOLD) {
|
|
195
203
|
recognizedWords.push(wordDetail.word);
|
|
@@ -204,10 +212,12 @@ mic.on("data", (buf) => {
|
|
|
204
212
|
}
|
|
205
213
|
|
|
206
214
|
const finalRecognizedText = recognizedWords.join(" ").trim();
|
|
215
|
+
const averageConfidenceAll =
|
|
216
|
+
totalConfidenceCount > 0 ? totalConfidenceAll / totalConfidenceCount : 0;
|
|
207
217
|
const averageConfidence =
|
|
208
218
|
recognizedWords.length > 0
|
|
209
219
|
? totalConfidence / recognizedWords.length
|
|
210
|
-
:
|
|
220
|
+
: averageConfidenceAll;
|
|
211
221
|
|
|
212
222
|
handle(finalRecognizedText, averageConfidence, fullResult.text); // Pass both the filtered text and an average confidence
|
|
213
223
|
} else if (fullResult && fullResult.text) {
|
|
@@ -262,6 +272,12 @@ function handle(processedWord, averageConfidence, originalText) {
|
|
|
262
272
|
findMatches(normalizedProcessed);
|
|
263
273
|
findMatches(normalizedOriginal, [...confidentCommands]);
|
|
264
274
|
|
|
275
|
+
// If word-level confidence filtering removed all words, fall back to the
|
|
276
|
+
// original text when overall confidence is still acceptable.
|
|
277
|
+
if (!matches.size && normalizedOriginal && averageConfidence >= WORD_CONFIDENCE_THRESHOLD) {
|
|
278
|
+
findMatches(normalizedOriginal);
|
|
279
|
+
}
|
|
280
|
+
|
|
265
281
|
if (!matches.size) return;
|
|
266
282
|
|
|
267
283
|
matches.forEach((match) => {
|