@lumiastream/wakeword 1.1.2 → 1.1.5-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,9 +31,60 @@ export function listAudioDevices() {
31
31
  return;
32
32
  }
33
33
 
34
- // Windows: Use PowerShell to get audio devices
35
- const psCommand = `Get-WmiObject Win32_SoundDevice | Select-Object -Property Name, DeviceID | ConvertTo-Json`;
36
- const proc = spawn("powershell", ["-Command", psCommand], {
34
+ // Windows: Use PowerShell to enumerate capture endpoints so names match Sound settings
35
+ const psCommand = `
36
+ $devices = @()
37
+ $defaultName = $null
38
+
39
+ # Try to read the current default recording device
40
+ try {
41
+ $defaultKey = 'HKCU:\\SOFTWARE\\Microsoft\\Multimedia\\Sound Mapper'
42
+ if (Test-Path $defaultKey) {
43
+ $defaultName = (Get-ItemProperty -Path $defaultKey -ErrorAction SilentlyContinue).Record
44
+ }
45
+ } catch {}
46
+
47
+ # First pass: registry-backed capture endpoints (matches Sound control panel)
48
+ try {
49
+ $captureRoot = 'HKLM:\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\MMDevices\\Audio\\Capture'
50
+ if (Test-Path $captureRoot) {
51
+ foreach ($dev in Get-ChildItem $captureRoot) {
52
+ $props = Get-ItemProperty $dev.PSPath
53
+ $name = $props.FriendlyName
54
+ $state = $props.DeviceState
55
+
56
+ # Only include active/available devices with names
57
+ if ([string]::IsNullOrWhiteSpace($name)) { continue }
58
+ if (($state -band 1) -ne 1) { continue }
59
+
60
+ $displayName = $name
61
+ if ($defaultName -and $name -eq $defaultName) {
62
+ $displayName = "$name (Default)"
63
+ }
64
+
65
+ $devices += [PSCustomObject]@{
66
+ name = $displayName
67
+ id = $name # SoX expects the friendly name
68
+ }
69
+ }
70
+ }
71
+ } catch {}
72
+
73
+ # Fallback: PnP endpoints filtered to likely inputs
74
+ if (-not $devices.Count) {
75
+ try {
76
+ $pnp = Get-CimInstance -ClassName Win32_PnPEntity -Filter "PNPClass='AudioEndpoint'" -ErrorAction Stop
77
+ foreach ($d in $pnp) {
78
+ if (-not $d.Name) { continue }
79
+ if ($d.Name -notmatch '(?i)microphone|mic|input|line in|array|webcam|stereo mix') { continue }
80
+ $devices += [PSCustomObject]@{ name = $d.Name; id = $d.Name }
81
+ }
82
+ } catch {}
83
+ }
84
+
85
+ $devices | ConvertTo-Json -Depth 3
86
+ `;
87
+ const proc = spawn("powershell", ["-NoProfile", "-NonInteractive", "-Command", psCommand], {
37
88
  encoding: "utf8",
38
89
  windowsHide: true,
39
90
  });
@@ -59,10 +110,12 @@ export function listAudioDevices() {
59
110
  const deviceArray = Array.isArray(psDevices) ? psDevices : [psDevices];
60
111
 
61
112
  deviceArray.forEach((device, index) => {
62
- if (device && device.Name) {
113
+ const name = device?.name || device?.Name;
114
+ const id = device?.id || device?.Id || device?.ID;
115
+ if (name) {
63
116
  devices.push({
64
- id: index.toString(),
65
- name: device.Name,
117
+ id: (id ?? name).toString(),
118
+ name,
66
119
  });
67
120
  }
68
121
  });
@@ -117,4 +170,4 @@ if (process.argv[1] && import.meta.url.endsWith(process.argv[1].replace(/\\/g, '
117
170
  console.error("Error listing devices:", err);
118
171
  process.exit(1);
119
172
  });
120
- }
173
+ }
@@ -46,7 +46,9 @@ export default (options) => {
46
46
  // Prepend input spec based on platform
47
47
  const platform = process.platform;
48
48
  if (platform === "win32") {
49
- const dev = options.device ?? "default";
49
+ const rawDev = options.device;
50
+ const dev =
51
+ rawDev && `${rawDev}`.trim().length ? `${rawDev}`.trim() : "default";
50
52
  args.unshift("-t", "waveaudio", dev);
51
53
  // AUDIODEV sometimes respected on Windows; keep for compatibility
52
54
  spawnOptions.env = { ...process.env, AUDIODEV: dev };
package/lib/voice.js CHANGED
@@ -1,10 +1,37 @@
1
- import { Model, Recognizer, setLogLevel } from "vosk-koffi";
2
1
  import record from "./record.js";
3
- import { dirname, join } from "node:path";
2
+ import koffi from "koffi";
3
+ import { delimiter, dirname, join } from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
5
  import { existsSync, chmodSync } from "node:fs";
6
6
  import readline from "node:readline";
7
7
 
8
+ // Ensure native libs can load from app.asar.unpacked when packaged
9
+ const maybeUnpackedPath = (libPath) => {
10
+ if (typeof libPath !== "string") return libPath;
11
+ if (!libPath.includes("app.asar")) return libPath;
12
+ const unpacked = libPath.replace("app.asar", "app.asar.unpacked");
13
+ return existsSync(unpacked) ? unpacked : libPath;
14
+ };
15
+ const ensureWinBinOnPath = (libPath) => {
16
+ if (process.platform !== "win32") return;
17
+ const dir = dirname(libPath);
18
+ const current = process.env.Path || process.env.PATH || "";
19
+ const parts = current.split(delimiter).filter(Boolean);
20
+ if (!parts.includes(dir)) {
21
+ process.env.Path = [dir, ...parts].join(delimiter);
22
+ }
23
+ };
24
+ const originalKoffiLoad = koffi.load.bind(koffi);
25
+ koffi.load = (libPath, ...rest) => {
26
+ const resolved = maybeUnpackedPath(libPath);
27
+ if (resolved !== libPath && typeof resolved === "string") {
28
+ ensureWinBinOnPath(resolved);
29
+ }
30
+ return originalKoffiLoad(resolved, ...rest);
31
+ };
32
+
33
+ const { Model, Recognizer, setLogLevel } = await import("vosk-koffi");
34
+
8
35
  /* ------------------------------------------------------------------ */
9
36
  /* 0. Helpers */
10
37
  /* ------------------------------------------------------------------ */
@@ -16,12 +43,21 @@ function unpacked(p) {
16
43
  : p;
17
44
  }
18
45
 
46
+ const UNKNOWN_TOKEN = "[unk]";
47
+ const normalizePhrase = (phrase = "") => phrase.trim().toLowerCase();
48
+ const toBool = (v = "") =>
49
+ ["1", "true", "yes", "y"].includes(`${v}`.trim().toLowerCase());
50
+
19
51
  /* ------------------------------------------------------------------ */
20
52
  /* 1. Resolve SoX binary and audio device */
21
53
  /* ------------------------------------------------------------------ */
22
- const exeName = { win32: "sox.exe", darwin: "soxmac", linux: "soxlinux" }[
23
- process.platform
24
- ];
54
+ const defaultExeName = {
55
+ win32: "sox.exe",
56
+ darwin: "soxmac",
57
+ linux: "soxlinux",
58
+ }[process.platform];
59
+ const exeName = defaultExeName;
60
+ const MATCH_SENTENCE = toBool(process.env.LUMIA_VOICE_MATCH_SENTENCE);
25
61
 
26
62
  /* Priority for sox path: argv[2] → fallback to sibling binaries/<exe> */
27
63
  /* Priority for device: argv[3] → env var → default */
@@ -32,7 +68,14 @@ if (!soxPath || soxPath === "") {
32
68
  soxPath = unpacked(soxPath);
33
69
 
34
70
  // Parse device from argv[3] or environment variable
35
- let audioDevice = process.argv[3] || process.env.AUDIO_DEVICE || null;
71
+ let audioDeviceRaw = process.argv[3] ?? process.env.AUDIO_DEVICE ?? null;
72
+ let audioDevice =
73
+ typeof audioDeviceRaw === "string" ? audioDeviceRaw.trim() : audioDeviceRaw;
74
+ if (!audioDevice) audioDevice = null;
75
+ // Normalize Windows numeric "0" to SoX's "default" alias
76
+ if (process.platform === "win32" && audioDevice === "0") {
77
+ audioDevice = "default";
78
+ }
36
79
 
37
80
  if (!existsSync(soxPath)) throw new Error(`SoX not found: ${soxPath}`);
38
81
  try {
@@ -56,26 +99,36 @@ if (!existsSync(modelPath))
56
99
  setLogLevel(0);
57
100
 
58
101
  const SAMPLE_RATE = Number(process.env.SAMPLE_RATE || 16_000);
59
- let GRAMMAR = ["[unk]"]; // seed; always keep [unk]
102
+ let GRAMMAR = [UNKNOWN_TOKEN]; // seed; always keep [unk]
103
+ let COMMANDS = [];
60
104
 
61
105
  const model = new Model(modelPath);
62
- let rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
63
- rec.setWords(true);
106
+ const buildRecognizer = () => {
107
+ const recognizer = MATCH_SENTENCE
108
+ ? new Recognizer({ model, sampleRate: SAMPLE_RATE })
109
+ : new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
110
+ recognizer.setWords(true);
111
+ return recognizer;
112
+ };
113
+ let rec = buildRecognizer();
64
114
 
65
115
  /* ------------------------------------------------------------------ */
66
116
  /* 4. Start the microphone */
67
117
  /* ------------------------------------------------------------------ */
68
118
  const recArgs = { sampleRate: SAMPLE_RATE, threshold: 0, binPath: soxPath };
119
+ // Feed raw PCM to Vosk for consistent parsing across platforms
120
+ recArgs.audioType = "raw";
121
+ recArgs.channels = 1;
69
122
 
70
123
  // Set device based on platform and configuration
71
124
  if (audioDevice !== null) {
72
125
  // User specified a device explicitly
73
126
  recArgs.device = audioDevice;
74
- console.error(`Using audio device: ${audioDevice}`);
127
+ console.error(`Using audio device: ${audioDevice || "default"}`);
75
128
  } else if (process.platform === "win32") {
76
- // Windows: default to device 0 if not specified
77
- recArgs.device = "0";
78
- console.error("Using default Windows audio device: 0");
129
+ // Windows: use default alias for reliability
130
+ recArgs.device = "default";
131
+ console.error("Using default Windows audio device: default");
79
132
  console.error(
80
133
  "To specify a different device, use: AUDIO_DEVICE=<device_id> or pass as 3rd argument"
81
134
  );
@@ -91,8 +144,36 @@ mic.on("error", (err) => {
91
144
  // Define a confidence threshold for individual words.
92
145
  // You might need to adjust this value based on your specific use case.
93
146
  let WORD_CONFIDENCE_THRESHOLD = 0.7;
147
+ const DEBUG_AUDIO = ["1", "true", "yes"].includes(
148
+ (process.env.WAKEWORD_DEBUG || "").toLowerCase()
149
+ );
150
+ const LOG_PARTIAL =
151
+ DEBUG_AUDIO ||
152
+ ["1", "true", "yes"].includes(
153
+ (process.env.WAKEWORD_LOG_PARTIAL || "").toLowerCase()
154
+ );
155
+ let lastLevelLog = 0;
156
+
157
+ function logAudioLevel(buf) {
158
+ const now = Date.now();
159
+ if (now - lastLevelLog < 1000) return;
160
+ lastLevelLog = now;
161
+
162
+ // Expect 16-bit little-endian PCM
163
+ const samples = buf.length / 2;
164
+ if (!samples) return;
165
+ let sumSquares = 0;
166
+ for (let i = 0; i < buf.length; i += 2) {
167
+ const sample = buf.readInt16LE(i);
168
+ sumSquares += sample * sample;
169
+ }
170
+ const rms = Math.sqrt(sumSquares / samples);
171
+ console.error(`[wakeword] audio rms=${rms.toFixed(1)} (0-32768)`);
172
+ }
94
173
 
95
174
  mic.on("data", (buf) => {
175
+ if (DEBUG_AUDIO) logAudioLevel(buf);
176
+
96
177
  if (rec.acceptWaveform(buf)) {
97
178
  const fullResult = rec.result();
98
179
 
@@ -126,18 +207,39 @@ mic.on("data", (buf) => {
126
207
  // Fallback for cases where setWords(true) might not fully apply or for partial results
127
208
  handle(fullResult.text.trim(), 1.0, fullResult.text); // Assume high confidence if no word-level details
128
209
  }
210
+ } else if (LOG_PARTIAL) {
211
+ const partial = rec.partialResult();
212
+ if (partial?.partial) {
213
+ console.error(`[wakeword] partial: "${partial.partial}"`);
214
+ }
129
215
  }
130
216
  });
131
217
 
132
218
  function handle(processedWord, averageConfidence, originalText) {
133
- if (!processedWord) return;
219
+ if (!processedWord && !originalText) return;
220
+
221
+ const normalizedProcessed = normalizePhrase(processedWord);
222
+ const normalizedOriginal = normalizePhrase(originalText);
223
+ const matches = new Set();
224
+
225
+ const findMatches = (text) => {
226
+ if (!text || text.includes(UNKNOWN_TOKEN)) return;
227
+ const hits = MATCH_SENTENCE
228
+ ? COMMANDS.filter((command) => text.includes(command))
229
+ : COMMANDS.filter((command) => text === command);
230
+ hits.forEach((hit) => matches.add(hit));
231
+ };
134
232
 
135
- if (processedWord.includes("[unk]")) return;
233
+ // Try the filtered text first, then fall back to the raw sentence for sentence matching
234
+ findMatches(normalizedProcessed);
235
+ findMatches(normalizedOriginal);
136
236
 
137
- if (GRAMMAR.includes(processedWord)) {
138
- process.stdout?.write(`voice|${processedWord}\n`);
237
+ if (!matches.size) return;
238
+
239
+ matches.forEach((match) => {
240
+ process.stdout?.write(`voice|${match}\n`);
139
241
  process.stdout?.write(`confidence|${averageConfidence}\n`);
140
- }
242
+ });
141
243
  }
142
244
  /* ------------------------------------------------------------------ */
143
245
  /* 6. Hot-reload grammar via stdin */
@@ -157,11 +259,15 @@ rl.on("line", (line) => {
157
259
  const phrases = trimmed
158
260
  .split(",")
159
261
  .slice(1)
160
- .map((s) => s.trim())
262
+ .map((s) => normalizePhrase(s))
161
263
  .filter(Boolean);
162
264
 
163
265
  if (!phrases.length) return;
164
266
 
165
- GRAMMAR = [...phrases, "[unk]"];
166
- rec = new Recognizer({ model, sampleRate: SAMPLE_RATE, grammar: GRAMMAR });
267
+ COMMANDS = phrases;
268
+ GRAMMAR = [...phrases, UNKNOWN_TOKEN];
269
+ console.error(
270
+ `[wakeword] grammar updated (${phrases.length}): ${phrases.join(", ")}`
271
+ );
272
+ rec = buildRecognizer();
167
273
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lumiastream/wakeword",
3
- "version": "1.1.2",
3
+ "version": "1.1.5-alpha.1",
4
4
  "type": "module",
5
5
  "main": "lib/index.js",
6
6
  "files": [
@@ -15,6 +15,7 @@
15
15
  "start": "node lib/voice.js"
16
16
  },
17
17
  "dependencies": {
18
+ "koffi": "^2.8.6",
18
19
  "vosk-koffi": "^1.1.1"
19
20
  }
20
21
  }