@lumiastream/wakeword 1.1.1 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binaries/sox.exe +0 -0
- package/binaries/soxlinux +0 -0
- package/binaries/soxmac +0 -0
- package/lib/list-devices.js +60 -7
- package/lib/recorders/sox.js +71 -51
- package/lib/voice.js +113 -21
- package/package.json +1 -1
package/binaries/sox.exe
CHANGED
|
File without changes
|
package/binaries/soxlinux
CHANGED
|
File without changes
|
package/binaries/soxmac
CHANGED
|
File without changes
|
package/lib/list-devices.js
CHANGED
|
@@ -31,9 +31,60 @@ export function listAudioDevices() {
|
|
|
31
31
|
return;
|
|
32
32
|
}
|
|
33
33
|
|
|
34
|
-
// Windows: Use PowerShell to
|
|
35
|
-
const psCommand = `
|
|
36
|
-
|
|
34
|
+
// Windows: Use PowerShell to enumerate capture endpoints so names match Sound settings
|
|
35
|
+
const psCommand = `
|
|
36
|
+
$devices = @()
|
|
37
|
+
$defaultName = $null
|
|
38
|
+
|
|
39
|
+
# Try to read the current default recording device
|
|
40
|
+
try {
|
|
41
|
+
$defaultKey = 'HKCU:\\SOFTWARE\\Microsoft\\Multimedia\\Sound Mapper'
|
|
42
|
+
if (Test-Path $defaultKey) {
|
|
43
|
+
$defaultName = (Get-ItemProperty -Path $defaultKey -ErrorAction SilentlyContinue).Record
|
|
44
|
+
}
|
|
45
|
+
} catch {}
|
|
46
|
+
|
|
47
|
+
# First pass: registry-backed capture endpoints (matches Sound control panel)
|
|
48
|
+
try {
|
|
49
|
+
$captureRoot = 'HKLM:\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\MMDevices\\Audio\\Capture'
|
|
50
|
+
if (Test-Path $captureRoot) {
|
|
51
|
+
foreach ($dev in Get-ChildItem $captureRoot) {
|
|
52
|
+
$props = Get-ItemProperty $dev.PSPath
|
|
53
|
+
$name = $props.FriendlyName
|
|
54
|
+
$state = $props.DeviceState
|
|
55
|
+
|
|
56
|
+
# Only include active/available devices with names
|
|
57
|
+
if ([string]::IsNullOrWhiteSpace($name)) { continue }
|
|
58
|
+
if (($state -band 1) -ne 1) { continue }
|
|
59
|
+
|
|
60
|
+
$displayName = $name
|
|
61
|
+
if ($defaultName -and $name -eq $defaultName) {
|
|
62
|
+
$displayName = "$name (Default)"
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
$devices += [PSCustomObject]@{
|
|
66
|
+
name = $displayName
|
|
67
|
+
id = $name # SoX expects the friendly name
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
} catch {}
|
|
72
|
+
|
|
73
|
+
# Fallback: PnP endpoints filtered to likely inputs
|
|
74
|
+
if (-not $devices.Count) {
|
|
75
|
+
try {
|
|
76
|
+
$pnp = Get-CimInstance -ClassName Win32_PnPEntity -Filter "PNPClass='AudioEndpoint'" -ErrorAction Stop
|
|
77
|
+
foreach ($d in $pnp) {
|
|
78
|
+
if (-not $d.Name) { continue }
|
|
79
|
+
if ($d.Name -notmatch '(?i)microphone|mic|input|line in|array|webcam|stereo mix') { continue }
|
|
80
|
+
$devices += [PSCustomObject]@{ name = $d.Name; id = $d.Name }
|
|
81
|
+
}
|
|
82
|
+
} catch {}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
$devices | ConvertTo-Json -Depth 3
|
|
86
|
+
`;
|
|
87
|
+
const proc = spawn("powershell", ["-NoProfile", "-NonInteractive", "-Command", psCommand], {
|
|
37
88
|
encoding: "utf8",
|
|
38
89
|
windowsHide: true,
|
|
39
90
|
});
|
|
@@ -59,10 +110,12 @@ export function listAudioDevices() {
|
|
|
59
110
|
const deviceArray = Array.isArray(psDevices) ? psDevices : [psDevices];
|
|
60
111
|
|
|
61
112
|
deviceArray.forEach((device, index) => {
|
|
62
|
-
|
|
113
|
+
const name = device?.name || device?.Name;
|
|
114
|
+
const id = device?.id || device?.Id || device?.ID;
|
|
115
|
+
if (name) {
|
|
63
116
|
devices.push({
|
|
64
|
-
id:
|
|
65
|
-
name
|
|
117
|
+
id: (id ?? name).toString(),
|
|
118
|
+
name,
|
|
66
119
|
});
|
|
67
120
|
}
|
|
68
121
|
});
|
|
@@ -117,4 +170,4 @@ if (process.argv[1] && import.meta.url.endsWith(process.argv[1].replace(/\\/g, '
|
|
|
117
170
|
console.error("Error listing devices:", err);
|
|
118
171
|
process.exit(1);
|
|
119
172
|
});
|
|
120
|
-
}
|
|
173
|
+
}
|
package/lib/recorders/sox.js
CHANGED
|
@@ -1,53 +1,73 @@
|
|
|
1
1
|
export default (options) => {
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
2
|
+
let cmd = "sox";
|
|
3
|
+
|
|
4
|
+
if (options.binPath) {
|
|
5
|
+
cmd = options.binPath;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
// Build common output/encoding args
|
|
9
|
+
let args = [
|
|
10
|
+
"--no-show-progress",
|
|
11
|
+
"--rate",
|
|
12
|
+
options.sampleRate,
|
|
13
|
+
"--channels",
|
|
14
|
+
options.channels,
|
|
15
|
+
"--encoding",
|
|
16
|
+
"signed-integer",
|
|
17
|
+
"--bits",
|
|
18
|
+
"16",
|
|
19
|
+
"--type",
|
|
20
|
+
options.audioType,
|
|
21
|
+
"-", // write to stdout
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
if (options.bufferSize) {
|
|
25
|
+
args.push("--buffer", options.bufferSize);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (options.endOnSilence) {
|
|
29
|
+
args = args.concat([
|
|
30
|
+
"silence",
|
|
31
|
+
"1",
|
|
32
|
+
"0.1",
|
|
33
|
+
options.thresholdStart || options.threshold + "%",
|
|
34
|
+
"1",
|
|
35
|
+
options.silence,
|
|
36
|
+
options.thresholdEnd || options.threshold + "%",
|
|
37
|
+
]);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (options.arguments) {
|
|
41
|
+
args = args.concat(options.arguments);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const spawnOptions = {};
|
|
45
|
+
|
|
46
|
+
// Prepend input spec based on platform
|
|
47
|
+
const platform = process.platform;
|
|
48
|
+
if (platform === "win32") {
|
|
49
|
+
const rawDev = options.device;
|
|
50
|
+
const dev =
|
|
51
|
+
rawDev && `${rawDev}`.trim().length ? `${rawDev}`.trim() : "default";
|
|
52
|
+
args.unshift("-t", "waveaudio", dev);
|
|
53
|
+
// AUDIODEV sometimes respected on Windows; keep for compatibility
|
|
54
|
+
spawnOptions.env = { ...process.env, AUDIODEV: dev };
|
|
55
|
+
} else if (platform === "darwin") {
|
|
56
|
+
// CoreAudio input
|
|
57
|
+
if (options.device) {
|
|
58
|
+
args.unshift("-t", "coreaudio", options.device);
|
|
59
|
+
} else {
|
|
60
|
+
// Explicitly select CoreAudio default device for reliability
|
|
61
|
+
args.unshift("-t", "coreaudio", "default");
|
|
62
|
+
}
|
|
63
|
+
} else {
|
|
64
|
+
// Linux: ALSA default or specified
|
|
65
|
+
if (options.device) {
|
|
66
|
+
args.unshift("-t", "alsa", options.device);
|
|
67
|
+
} else {
|
|
68
|
+
args.unshift("-d");
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return { cmd, args, spawnOptions };
|
|
53
73
|
};
|
package/lib/voice.js
CHANGED
|
@@ -16,12 +16,26 @@ function unpacked(p) {
|
|
|
16
16
|
: p;
|
|
17
17
|
}
|
|
18
18
|
|
|
19
|
+
const UNKNOWN_TOKEN = "[unk]";
|
|
20
|
+
const normalizePhrase = (phrase = "") => phrase.trim().toLowerCase();
|
|
21
|
+
const toBool = (v = "") =>
|
|
22
|
+
["1", "true", "yes", "y"].includes(`${v}`.trim().toLowerCase());
|
|
23
|
+
|
|
19
24
|
/* ------------------------------------------------------------------ */
|
|
20
25
|
/* 1. Resolve SoX binary and audio device */
|
|
21
26
|
/* ------------------------------------------------------------------ */
|
|
22
|
-
const
|
|
23
|
-
|
|
24
|
-
|
|
27
|
+
const defaultExeName = {
|
|
28
|
+
win32: "sox.exe",
|
|
29
|
+
darwin: "soxmac",
|
|
30
|
+
linux: "soxlinux",
|
|
31
|
+
}[process.platform];
|
|
32
|
+
const envExeOverride =
|
|
33
|
+
process.platform === "win32" && process.env.LUMIA_WIN_MIC_ALIAS_NAME
|
|
34
|
+
? process.env.LUMIA_WIN_MIC_ALIAS_NAME.trim()
|
|
35
|
+
: null;
|
|
36
|
+
const exeName =
|
|
37
|
+
envExeOverride && envExeOverride.length ? envExeOverride : defaultExeName;
|
|
38
|
+
const MATCH_SENTENCE = toBool(process.env.LUMIA_VOICE_MATCH_SENTENCE);
|
|
25
39
|
|
|
26
40
|
/* Priority for sox path: argv[2] → fallback to sibling binaries/<exe> */
|
|
27
41
|
/* Priority for device: argv[3] → env var → default */
|
|
@@ -32,7 +46,14 @@ if (!soxPath || soxPath === "") {
|
|
|
32
46
|
soxPath = unpacked(soxPath);
|
|
33
47
|
|
|
34
48
|
// Parse device from argv[3] or environment variable
|
|
35
|
-
let
|
|
49
|
+
let audioDeviceRaw = process.argv[3] ?? process.env.AUDIO_DEVICE ?? null;
|
|
50
|
+
let audioDevice =
|
|
51
|
+
typeof audioDeviceRaw === "string" ? audioDeviceRaw.trim() : audioDeviceRaw;
|
|
52
|
+
if (!audioDevice) audioDevice = null;
|
|
53
|
+
// Normalize Windows numeric "0" to SoX's "default" alias
|
|
54
|
+
if (process.platform === "win32" && audioDevice === "0") {
|
|
55
|
+
audioDevice = "default";
|
|
56
|
+
}
|
|
36
57
|
|
|
37
58
|
if (!existsSync(soxPath)) throw new Error(`SoX not found: ${soxPath}`);
|
|
38
59
|
try {
|
|
@@ -56,35 +77,81 @@ if (!existsSync(modelPath))
|
|
|
56
77
|
setLogLevel(0);
|
|
57
78
|
|
|
58
79
|
const SAMPLE_RATE = Number(process.env.SAMPLE_RATE || 16_000);
|
|
59
|
-
let GRAMMAR = [
|
|
80
|
+
let GRAMMAR = [UNKNOWN_TOKEN]; // seed; always keep [unk]
|
|
81
|
+
let COMMANDS = [];
|
|
60
82
|
|
|
61
83
|
const model = new Model(modelPath);
|
|
62
|
-
|
|
63
|
-
|
|
84
|
+
const buildRecognizer = () => {
|
|
85
|
+
const recognizer = MATCH_SENTENCE
|
|
86
|
+
? new Recognizer({ model, sampleRate: SAMPLE_RATE })
|
|
87
|
+
: new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
|
|
88
|
+
recognizer.setWords(true);
|
|
89
|
+
return recognizer;
|
|
90
|
+
};
|
|
91
|
+
let rec = buildRecognizer();
|
|
64
92
|
|
|
65
93
|
/* ------------------------------------------------------------------ */
|
|
66
94
|
/* 4. Start the microphone */
|
|
67
95
|
/* ------------------------------------------------------------------ */
|
|
68
96
|
const recArgs = { sampleRate: SAMPLE_RATE, threshold: 0, binPath: soxPath };
|
|
97
|
+
// Feed raw PCM to Vosk for consistent parsing across platforms
|
|
98
|
+
recArgs.audioType = "raw";
|
|
99
|
+
recArgs.channels = 1;
|
|
69
100
|
|
|
70
101
|
// Set device based on platform and configuration
|
|
71
102
|
if (audioDevice !== null) {
|
|
72
103
|
// User specified a device explicitly
|
|
73
104
|
recArgs.device = audioDevice;
|
|
74
|
-
console.error(`Using audio device: ${audioDevice}`);
|
|
105
|
+
console.error(`Using audio device: ${audioDevice || "default"}`);
|
|
75
106
|
} else if (process.platform === "win32") {
|
|
76
|
-
// Windows: default
|
|
77
|
-
recArgs.device = "
|
|
78
|
-
console.error("Using default Windows audio device:
|
|
79
|
-
console.error(
|
|
107
|
+
// Windows: use default alias for reliability
|
|
108
|
+
recArgs.device = "default";
|
|
109
|
+
console.error("Using default Windows audio device: default");
|
|
110
|
+
console.error(
|
|
111
|
+
"To specify a different device, use: AUDIO_DEVICE=<device_id> or pass as 3rd argument"
|
|
112
|
+
);
|
|
80
113
|
}
|
|
81
114
|
|
|
82
115
|
const mic = record.record(recArgs).stream();
|
|
116
|
+
// Handle recorder (SoX) errors to avoid unhandled 'error' events
|
|
117
|
+
mic.on("error", (err) => {
|
|
118
|
+
const msg = typeof err === "string" ? err : err?.message || String(err);
|
|
119
|
+
console.error(`[wakeword] audio stream error: ${msg}`);
|
|
120
|
+
process.exit(2);
|
|
121
|
+
});
|
|
83
122
|
// Define a confidence threshold for individual words.
|
|
84
123
|
// You might need to adjust this value based on your specific use case.
|
|
85
124
|
let WORD_CONFIDENCE_THRESHOLD = 0.7;
|
|
125
|
+
const DEBUG_AUDIO = ["1", "true", "yes"].includes(
|
|
126
|
+
(process.env.WAKEWORD_DEBUG || "").toLowerCase()
|
|
127
|
+
);
|
|
128
|
+
const LOG_PARTIAL =
|
|
129
|
+
DEBUG_AUDIO ||
|
|
130
|
+
["1", "true", "yes"].includes(
|
|
131
|
+
(process.env.WAKEWORD_LOG_PARTIAL || "").toLowerCase()
|
|
132
|
+
);
|
|
133
|
+
let lastLevelLog = 0;
|
|
134
|
+
|
|
135
|
+
function logAudioLevel(buf) {
|
|
136
|
+
const now = Date.now();
|
|
137
|
+
if (now - lastLevelLog < 1000) return;
|
|
138
|
+
lastLevelLog = now;
|
|
139
|
+
|
|
140
|
+
// Expect 16-bit little-endian PCM
|
|
141
|
+
const samples = buf.length / 2;
|
|
142
|
+
if (!samples) return;
|
|
143
|
+
let sumSquares = 0;
|
|
144
|
+
for (let i = 0; i < buf.length; i += 2) {
|
|
145
|
+
const sample = buf.readInt16LE(i);
|
|
146
|
+
sumSquares += sample * sample;
|
|
147
|
+
}
|
|
148
|
+
const rms = Math.sqrt(sumSquares / samples);
|
|
149
|
+
console.error(`[wakeword] audio rms=${rms.toFixed(1)} (0-32768)`);
|
|
150
|
+
}
|
|
86
151
|
|
|
87
152
|
mic.on("data", (buf) => {
|
|
153
|
+
if (DEBUG_AUDIO) logAudioLevel(buf);
|
|
154
|
+
|
|
88
155
|
if (rec.acceptWaveform(buf)) {
|
|
89
156
|
const fullResult = rec.result();
|
|
90
157
|
|
|
@@ -118,18 +185,39 @@ mic.on("data", (buf) => {
|
|
|
118
185
|
// Fallback for cases where setWords(true) might not fully apply or for partial results
|
|
119
186
|
handle(fullResult.text.trim(), 1.0, fullResult.text); // Assume high confidence if no word-level details
|
|
120
187
|
}
|
|
188
|
+
} else if (LOG_PARTIAL) {
|
|
189
|
+
const partial = rec.partialResult();
|
|
190
|
+
if (partial?.partial) {
|
|
191
|
+
console.error(`[wakeword] partial: "${partial.partial}"`);
|
|
192
|
+
}
|
|
121
193
|
}
|
|
122
194
|
});
|
|
123
195
|
|
|
124
196
|
function handle(processedWord, averageConfidence, originalText) {
|
|
125
|
-
if (!processedWord) return;
|
|
197
|
+
if (!processedWord && !originalText) return;
|
|
198
|
+
|
|
199
|
+
const normalizedProcessed = normalizePhrase(processedWord);
|
|
200
|
+
const normalizedOriginal = normalizePhrase(originalText);
|
|
201
|
+
const matches = new Set();
|
|
202
|
+
|
|
203
|
+
const findMatches = (text) => {
|
|
204
|
+
if (!text || text.includes(UNKNOWN_TOKEN)) return;
|
|
205
|
+
const hits = MATCH_SENTENCE
|
|
206
|
+
? COMMANDS.filter((command) => text.includes(command))
|
|
207
|
+
: COMMANDS.filter((command) => text === command);
|
|
208
|
+
hits.forEach((hit) => matches.add(hit));
|
|
209
|
+
};
|
|
126
210
|
|
|
127
|
-
|
|
211
|
+
// Try the filtered text first, then fall back to the raw sentence for sentence matching
|
|
212
|
+
findMatches(normalizedProcessed);
|
|
213
|
+
findMatches(normalizedOriginal);
|
|
128
214
|
|
|
129
|
-
if (
|
|
130
|
-
|
|
215
|
+
if (!matches.size) return;
|
|
216
|
+
|
|
217
|
+
matches.forEach((match) => {
|
|
218
|
+
process.stdout?.write(`voice|${match}\n`);
|
|
131
219
|
process.stdout?.write(`confidence|${averageConfidence}\n`);
|
|
132
|
-
}
|
|
220
|
+
});
|
|
133
221
|
}
|
|
134
222
|
/* ------------------------------------------------------------------ */
|
|
135
223
|
/* 6. Hot-reload grammar via stdin */
|
|
@@ -149,11 +237,15 @@ rl.on("line", (line) => {
|
|
|
149
237
|
const phrases = trimmed
|
|
150
238
|
.split(",")
|
|
151
239
|
.slice(1)
|
|
152
|
-
.map((s) => s
|
|
153
|
-
|
|
240
|
+
.map((s) => normalizePhrase(s))
|
|
241
|
+
.filter(Boolean);
|
|
154
242
|
|
|
155
243
|
if (!phrases.length) return;
|
|
156
244
|
|
|
157
|
-
|
|
158
|
-
|
|
245
|
+
COMMANDS = phrases;
|
|
246
|
+
GRAMMAR = [...phrases, UNKNOWN_TOKEN];
|
|
247
|
+
console.error(
|
|
248
|
+
`[wakeword] grammar updated (${phrases.length}): ${phrases.join(", ")}`
|
|
249
|
+
);
|
|
250
|
+
rec = buildRecognizer();
|
|
159
251
|
});
|