@lumiastream/wakeword 1.1.1 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/binaries/sox.exe CHANGED
File without changes
package/binaries/soxlinux CHANGED
File without changes
package/binaries/soxmac CHANGED
File without changes
@@ -31,9 +31,60 @@ export function listAudioDevices() {
31
31
  return;
32
32
  }
33
33
 
34
- // Windows: Use PowerShell to get audio devices
35
- const psCommand = `Get-WmiObject Win32_SoundDevice | Select-Object -Property Name, DeviceID | ConvertTo-Json`;
36
- const proc = spawn("powershell", ["-Command", psCommand], {
34
+ // Windows: Use PowerShell to enumerate capture endpoints so names match Sound settings
35
+ const psCommand = `
36
+ $devices = @()
37
+ $defaultName = $null
38
+
39
+ # Try to read the current default recording device
40
+ try {
41
+ $defaultKey = 'HKCU:\\SOFTWARE\\Microsoft\\Multimedia\\Sound Mapper'
42
+ if (Test-Path $defaultKey) {
43
+ $defaultName = (Get-ItemProperty -Path $defaultKey -ErrorAction SilentlyContinue).Record
44
+ }
45
+ } catch {}
46
+
47
+ # First pass: registry-backed capture endpoints (matches Sound control panel)
48
+ try {
49
+ $captureRoot = 'HKLM:\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\MMDevices\\Audio\\Capture'
50
+ if (Test-Path $captureRoot) {
51
+ foreach ($dev in Get-ChildItem $captureRoot) {
52
+ $props = Get-ItemProperty $dev.PSPath
53
+ $name = $props.FriendlyName
54
+ $state = $props.DeviceState
55
+
56
+ # Only include active/available devices with names
57
+ if ([string]::IsNullOrWhiteSpace($name)) { continue }
58
+ if (($state -band 1) -ne 1) { continue }
59
+
60
+ $displayName = $name
61
+ if ($defaultName -and $name -eq $defaultName) {
62
+ $displayName = "$name (Default)"
63
+ }
64
+
65
+ $devices += [PSCustomObject]@{
66
+ name = $displayName
67
+ id = $name # SoX expects the friendly name
68
+ }
69
+ }
70
+ }
71
+ } catch {}
72
+
73
+ # Fallback: PnP endpoints filtered to likely inputs
74
+ if (-not $devices.Count) {
75
+ try {
76
+ $pnp = Get-CimInstance -ClassName Win32_PnPEntity -Filter "PNPClass='AudioEndpoint'" -ErrorAction Stop
77
+ foreach ($d in $pnp) {
78
+ if (-not $d.Name) { continue }
79
+ if ($d.Name -notmatch '(?i)microphone|mic|input|line in|array|webcam|stereo mix') { continue }
80
+ $devices += [PSCustomObject]@{ name = $d.Name; id = $d.Name }
81
+ }
82
+ } catch {}
83
+ }
84
+
85
+ $devices | ConvertTo-Json -Depth 3
86
+ `;
87
+ const proc = spawn("powershell", ["-NoProfile", "-NonInteractive", "-Command", psCommand], {
37
88
  encoding: "utf8",
38
89
  windowsHide: true,
39
90
  });
@@ -59,10 +110,12 @@ export function listAudioDevices() {
59
110
  const deviceArray = Array.isArray(psDevices) ? psDevices : [psDevices];
60
111
 
61
112
  deviceArray.forEach((device, index) => {
62
- if (device && device.Name) {
113
+ const name = device?.name || device?.Name;
114
+ const id = device?.id || device?.Id || device?.ID;
115
+ if (name) {
63
116
  devices.push({
64
- id: index.toString(),
65
- name: device.Name,
117
+ id: (id ?? name).toString(),
118
+ name,
66
119
  });
67
120
  }
68
121
  });
@@ -117,4 +170,4 @@ if (process.argv[1] && import.meta.url.endsWith(process.argv[1].replace(/\\/g, '
117
170
  console.error("Error listing devices:", err);
118
171
  process.exit(1);
119
172
  });
120
- }
173
+ }
@@ -1,53 +1,73 @@
1
1
  export default (options) => {
2
- let cmd = "sox";
3
-
4
- if (options.binPath) {
5
- cmd = options.binPath;
6
- }
7
-
8
- let args = [
9
- "--no-show-progress", // show no progress
10
- "--rate",
11
- options.sampleRate, // sample rate
12
- "--channels",
13
- options.channels, // channels
14
- "--encoding",
15
- "signed-integer", // sample encoding
16
- "--bits",
17
- "16", // precision (bits)
18
- "--type",
19
- options.audioType, // audio type
20
- "-", // pipe
21
- ];
22
-
23
- if (options.bufferSize) {
24
- args.push("--buffer", options.bufferSize);
25
- }
26
-
27
- if (options.endOnSilence) {
28
- args = args.concat([
29
- "silence",
30
- "1",
31
- "0.1",
32
- options.thresholdStart || options.threshold + "%",
33
- "1",
34
- options.silence,
35
- options.thresholdEnd || options.threshold + "%",
36
- ]);
37
- }
38
-
39
- if (options.arguments) {
40
- args = args.concat(options.arguments);
41
- }
42
-
43
- const spawnOptions = {};
44
-
45
- if (options.device) {
46
- args.unshift("-t", "waveaudio", options.device);
47
- spawnOptions.env = { ...process.env, AUDIODEV: options.device };
48
- } else {
49
- args.unshift("--default-device");
50
- }
51
-
52
- return { cmd, args, spawnOptions };
2
+ let cmd = "sox";
3
+
4
+ if (options.binPath) {
5
+ cmd = options.binPath;
6
+ }
7
+
8
+ // Build common output/encoding args
9
+ let args = [
10
+ "--no-show-progress",
11
+ "--rate",
12
+ options.sampleRate,
13
+ "--channels",
14
+ options.channels,
15
+ "--encoding",
16
+ "signed-integer",
17
+ "--bits",
18
+ "16",
19
+ "--type",
20
+ options.audioType,
21
+ "-", // write to stdout
22
+ ];
23
+
24
+ if (options.bufferSize) {
25
+ args.push("--buffer", options.bufferSize);
26
+ }
27
+
28
+ if (options.endOnSilence) {
29
+ args = args.concat([
30
+ "silence",
31
+ "1",
32
+ "0.1",
33
+ options.thresholdStart || options.threshold + "%",
34
+ "1",
35
+ options.silence,
36
+ options.thresholdEnd || options.threshold + "%",
37
+ ]);
38
+ }
39
+
40
+ if (options.arguments) {
41
+ args = args.concat(options.arguments);
42
+ }
43
+
44
+ const spawnOptions = {};
45
+
46
+ // Prepend input spec based on platform
47
+ const platform = process.platform;
48
+ if (platform === "win32") {
49
+ const rawDev = options.device;
50
+ const dev =
51
+ rawDev && `${rawDev}`.trim().length ? `${rawDev}`.trim() : "default";
52
+ args.unshift("-t", "waveaudio", dev);
53
+ // AUDIODEV sometimes respected on Windows; keep for compatibility
54
+ spawnOptions.env = { ...process.env, AUDIODEV: dev };
55
+ } else if (platform === "darwin") {
56
+ // CoreAudio input
57
+ if (options.device) {
58
+ args.unshift("-t", "coreaudio", options.device);
59
+ } else {
60
+ // Explicitly select CoreAudio default device for reliability
61
+ args.unshift("-t", "coreaudio", "default");
62
+ }
63
+ } else {
64
+ // Linux: ALSA default or specified
65
+ if (options.device) {
66
+ args.unshift("-t", "alsa", options.device);
67
+ } else {
68
+ args.unshift("-d");
69
+ }
70
+ }
71
+
72
+ return { cmd, args, spawnOptions };
53
73
  };
package/lib/voice.js CHANGED
@@ -16,12 +16,26 @@ function unpacked(p) {
16
16
  : p;
17
17
  }
18
18
 
19
+ const UNKNOWN_TOKEN = "[unk]";
20
+ const normalizePhrase = (phrase = "") => phrase.trim().toLowerCase();
21
+ const toBool = (v = "") =>
22
+ ["1", "true", "yes", "y"].includes(`${v}`.trim().toLowerCase());
23
+
19
24
  /* ------------------------------------------------------------------ */
20
25
  /* 1. Resolve SoX binary and audio device */
21
26
  /* ------------------------------------------------------------------ */
22
- const exeName = { win32: "sox.exe", darwin: "soxmac", linux: "soxlinux" }[
23
- process.platform
24
- ];
27
+ const defaultExeName = {
28
+ win32: "sox.exe",
29
+ darwin: "soxmac",
30
+ linux: "soxlinux",
31
+ }[process.platform];
32
+ const envExeOverride =
33
+ process.platform === "win32" && process.env.LUMIA_WIN_MIC_ALIAS_NAME
34
+ ? process.env.LUMIA_WIN_MIC_ALIAS_NAME.trim()
35
+ : null;
36
+ const exeName =
37
+ envExeOverride && envExeOverride.length ? envExeOverride : defaultExeName;
38
+ const MATCH_SENTENCE = toBool(process.env.LUMIA_VOICE_MATCH_SENTENCE);
25
39
 
26
40
  /* Priority for sox path: argv[2] → fallback to sibling binaries/<exe> */
27
41
  /* Priority for device: argv[3] → env var → default */
@@ -32,7 +46,14 @@ if (!soxPath || soxPath === "") {
32
46
  soxPath = unpacked(soxPath);
33
47
 
34
48
  // Parse device from argv[3] or environment variable
35
- let audioDevice = process.argv[3] || process.env.AUDIO_DEVICE || null;
49
+ let audioDeviceRaw = process.argv[3] ?? process.env.AUDIO_DEVICE ?? null;
50
+ let audioDevice =
51
+ typeof audioDeviceRaw === "string" ? audioDeviceRaw.trim() : audioDeviceRaw;
52
+ if (!audioDevice) audioDevice = null;
53
+ // Normalize Windows numeric "0" to SoX's "default" alias
54
+ if (process.platform === "win32" && audioDevice === "0") {
55
+ audioDevice = "default";
56
+ }
36
57
 
37
58
  if (!existsSync(soxPath)) throw new Error(`SoX not found: ${soxPath}`);
38
59
  try {
@@ -56,35 +77,81 @@ if (!existsSync(modelPath))
56
77
  setLogLevel(0);
57
78
 
58
79
  const SAMPLE_RATE = Number(process.env.SAMPLE_RATE || 16_000);
59
- let GRAMMAR = ["[unk]"]; // seed; always keep [unk]
80
+ let GRAMMAR = [UNKNOWN_TOKEN]; // seed; always keep [unk]
81
+ let COMMANDS = [];
60
82
 
61
83
  const model = new Model(modelPath);
62
- let rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
63
- rec.setWords(true);
84
+ const buildRecognizer = () => {
85
+ const recognizer = MATCH_SENTENCE
86
+ ? new Recognizer({ model, sampleRate: SAMPLE_RATE })
87
+ : new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
88
+ recognizer.setWords(true);
89
+ return recognizer;
90
+ };
91
+ let rec = buildRecognizer();
64
92
 
65
93
  /* ------------------------------------------------------------------ */
66
94
  /* 4. Start the microphone */
67
95
  /* ------------------------------------------------------------------ */
68
96
  const recArgs = { sampleRate: SAMPLE_RATE, threshold: 0, binPath: soxPath };
97
+ // Feed raw PCM to Vosk for consistent parsing across platforms
98
+ recArgs.audioType = "raw";
99
+ recArgs.channels = 1;
69
100
 
70
101
  // Set device based on platform and configuration
71
102
  if (audioDevice !== null) {
72
103
  // User specified a device explicitly
73
104
  recArgs.device = audioDevice;
74
- console.error(`Using audio device: ${audioDevice}`);
105
+ console.error(`Using audio device: ${audioDevice || "default"}`);
75
106
  } else if (process.platform === "win32") {
76
- // Windows: default to device 0 if not specified
77
- recArgs.device = "0";
78
- console.error("Using default Windows audio device: 0");
79
- console.error("To specify a different device, use: AUDIO_DEVICE=<device_id> or pass as 3rd argument");
107
+ // Windows: use default alias for reliability
108
+ recArgs.device = "default";
109
+ console.error("Using default Windows audio device: default");
110
+ console.error(
111
+ "To specify a different device, use: AUDIO_DEVICE=<device_id> or pass as 3rd argument"
112
+ );
80
113
  }
81
114
 
82
115
  const mic = record.record(recArgs).stream();
116
+ // Handle recorder (SoX) errors to avoid unhandled 'error' events
117
+ mic.on("error", (err) => {
118
+ const msg = typeof err === "string" ? err : err?.message || String(err);
119
+ console.error(`[wakeword] audio stream error: ${msg}`);
120
+ process.exit(2);
121
+ });
83
122
  // Define a confidence threshold for individual words.
84
123
  // You might need to adjust this value based on your specific use case.
85
124
  let WORD_CONFIDENCE_THRESHOLD = 0.7;
125
+ const DEBUG_AUDIO = ["1", "true", "yes"].includes(
126
+ (process.env.WAKEWORD_DEBUG || "").toLowerCase()
127
+ );
128
+ const LOG_PARTIAL =
129
+ DEBUG_AUDIO ||
130
+ ["1", "true", "yes"].includes(
131
+ (process.env.WAKEWORD_LOG_PARTIAL || "").toLowerCase()
132
+ );
133
+ let lastLevelLog = 0;
134
+
135
+ function logAudioLevel(buf) {
136
+ const now = Date.now();
137
+ if (now - lastLevelLog < 1000) return;
138
+ lastLevelLog = now;
139
+
140
+ // Expect 16-bit little-endian PCM
141
+ const samples = buf.length / 2;
142
+ if (!samples) return;
143
+ let sumSquares = 0;
144
+ for (let i = 0; i < buf.length; i += 2) {
145
+ const sample = buf.readInt16LE(i);
146
+ sumSquares += sample * sample;
147
+ }
148
+ const rms = Math.sqrt(sumSquares / samples);
149
+ console.error(`[wakeword] audio rms=${rms.toFixed(1)} (0-32768)`);
150
+ }
86
151
 
87
152
  mic.on("data", (buf) => {
153
+ if (DEBUG_AUDIO) logAudioLevel(buf);
154
+
88
155
  if (rec.acceptWaveform(buf)) {
89
156
  const fullResult = rec.result();
90
157
 
@@ -118,18 +185,39 @@ mic.on("data", (buf) => {
118
185
  // Fallback for cases where setWords(true) might not fully apply or for partial results
119
186
  handle(fullResult.text.trim(), 1.0, fullResult.text); // Assume high confidence if no word-level details
120
187
  }
188
+ } else if (LOG_PARTIAL) {
189
+ const partial = rec.partialResult();
190
+ if (partial?.partial) {
191
+ console.error(`[wakeword] partial: "${partial.partial}"`);
192
+ }
121
193
  }
122
194
  });
123
195
 
124
196
  function handle(processedWord, averageConfidence, originalText) {
125
- if (!processedWord) return;
197
+ if (!processedWord && !originalText) return;
198
+
199
+ const normalizedProcessed = normalizePhrase(processedWord);
200
+ const normalizedOriginal = normalizePhrase(originalText);
201
+ const matches = new Set();
202
+
203
+ const findMatches = (text) => {
204
+ if (!text || text.includes(UNKNOWN_TOKEN)) return;
205
+ const hits = MATCH_SENTENCE
206
+ ? COMMANDS.filter((command) => text.includes(command))
207
+ : COMMANDS.filter((command) => text === command);
208
+ hits.forEach((hit) => matches.add(hit));
209
+ };
126
210
 
127
- if (processedWord.includes("[unk]")) return;
211
+ // Try the filtered text first, then fall back to the raw sentence for sentence matching
212
+ findMatches(normalizedProcessed);
213
+ findMatches(normalizedOriginal);
128
214
 
129
- if (GRAMMAR.includes(processedWord)) {
130
- process.stdout?.write(`voice|${processedWord}\n`);
215
+ if (!matches.size) return;
216
+
217
+ matches.forEach((match) => {
218
+ process.stdout?.write(`voice|${match}\n`);
131
219
  process.stdout?.write(`confidence|${averageConfidence}\n`);
132
- }
220
+ });
133
221
  }
134
222
  /* ------------------------------------------------------------------ */
135
223
  /* 6. Hot-reload grammar via stdin */
@@ -149,11 +237,15 @@ rl.on("line", (line) => {
149
237
  const phrases = trimmed
150
238
  .split(",")
151
239
  .slice(1)
152
- .map((s) => s.trim())
153
- .filter(Boolean);
240
+ .map((s) => normalizePhrase(s))
241
+ .filter(Boolean);
154
242
 
155
243
  if (!phrases.length) return;
156
244
 
157
- GRAMMAR = [...phrases, "[unk]"];
158
- rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
245
+ COMMANDS = phrases;
246
+ GRAMMAR = [...phrases, UNKNOWN_TOKEN];
247
+ console.error(
248
+ `[wakeword] grammar updated (${phrases.length}): ${phrases.join(", ")}`
249
+ );
250
+ rec = buildRecognizer();
159
251
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lumiastream/wakeword",
3
- "version": "1.1.1",
3
+ "version": "1.1.4",
4
4
  "type": "module",
5
5
  "main": "lib/index.js",
6
6
  "files": [