speech-opencode 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -8,8 +8,12 @@ export interface VoicePluginOptions {
8
8
  silenceDuration?: number;
9
9
  /** Maximum recording duration in seconds as a safety timeout (default 300 = 5 minutes) */
10
10
  maxDuration?: number;
11
- /** Enable wake word trigger file watching (default true) */
11
+ /** Enable wake word detection (default true). Requires Python + openwakeword */
12
12
  enableWakeWord?: boolean;
13
+ /** Wake word to listen for (default "hey_jarvis"). Options: hey_jarvis, alexa, hey_mycroft */
14
+ wakeWord?: string;
15
+ /** Wake word detection threshold 0.0-1.0 (default 0.5, lower = more sensitive) */
16
+ wakeWordThreshold?: number;
13
17
  }
14
18
  export declare const VoicePlugin: (options?: VoicePluginOptions) => Plugin;
15
19
  declare const _default: Plugin;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;AA2GvD,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,+DAA+D;IAC/D,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,0FAA0F;IAC1F,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,4DAA4D;IAC5D,cAAc,CAAC,EAAE,OAAO,CAAA;CACzB;AAmID,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,MAyDnC,CAAA;;AAGH,wBAA4B"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;AA8GvD,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,+DAA+D;IAC/D,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,0FAA0F;IAC1F,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gFAAgF;IAChF,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kFAAkF;IAClF,iBAAiB,CAAC,EAAE,MAAM,CAAA;CAC3B;AA2UD,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,MAwEnC,CAAA;;AAGH,wBAA4B"}
package/dist/index.js CHANGED
@@ -1,11 +1,13 @@
1
1
  import { tool } from "@opencode-ai/plugin";
2
2
  import OpenAI from "openai";
3
3
  import { spawn } from "child_process";
4
- import { unlinkSync, readFileSync, existsSync, watch, mkdirSync } from "fs";
4
+ import { unlinkSync, readFileSync, existsSync, watch, mkdirSync, writeFileSync } from "fs";
5
5
  import { tmpdir, homedir } from "os";
6
6
  import { join, dirname } from "path";
7
7
  // Trigger file path for wake word integration
8
8
  const TRIGGER_FILE = join(homedir(), ".cache", "opencode", "voice_trigger");
9
+ // Store reference to wake word listener process
10
+ let wakeWordProcess = null;
9
11
  /**
10
12
  * Records audio from the microphone with automatic silence detection.
11
13
  * Recording stops after the specified silence duration.
@@ -138,6 +140,188 @@ function clearTriggerFile() {
138
140
  // Ignore errors
139
141
  }
140
142
  }
143
+ /**
144
+ * Embedded Python wake word listener script
145
+ */
146
+ const WAKE_WORD_SCRIPT = `
147
+ #!/usr/bin/env python3
148
+ """Embedded wake word listener for speech-opencode"""
149
+ import sys
150
+ import signal
151
+ from pathlib import Path
152
+
153
+ try:
154
+ import os
155
+ # Silence ONNX Runtime warnings
156
+ os.environ["ORT_LOGGING_LEVEL"] = "3"
157
+
158
+ import pyaudio
159
+ import numpy as np
160
+ from openwakeword.model import Model
161
+ except ImportError as e:
162
+ print(f"[WakeWord] Missing dependency: {e}", file=sys.stderr)
163
+ print("[WakeWord] Install with: pip install openwakeword pyaudio numpy scipy", file=sys.stderr)
164
+ sys.exit(1)
165
+
166
+ SAMPLE_RATE = 16000
167
+ CHUNK_SIZE = 1280
168
+ TRIGGER_FILE = Path.home() / ".cache" / "opencode" / "voice_trigger"
169
+
170
+ def get_input_device():
171
+ """Find the best input device, preferring pipewire"""
172
+ p = pyaudio.PyAudio()
173
+
174
+ # First pass: look for pipewire (usually works best on modern Linux)
175
+ for i in range(p.get_device_count()):
176
+ info = p.get_device_info_by_index(i)
177
+ name = str(info.get("name", "")).lower()
178
+ if info.get("maxInputChannels", 0) > 0 and "pipewire" in name:
179
+ p.terminate()
180
+ return i
181
+
182
+ # Second pass: any non-monitor, non-bluetooth input
183
+ for i in range(p.get_device_count()):
184
+ info = p.get_device_info_by_index(i)
185
+ name = str(info.get("name", "")).lower()
186
+ if info.get("maxInputChannels", 0) > 0:
187
+ if "monitor" not in name and "bluez" not in name and "bluetooth" not in name:
188
+ p.terminate()
189
+ return i
190
+
191
+ p.terminate()
192
+ return None
193
+
194
+ def main():
195
+ wake_word = sys.argv[1] if len(sys.argv) > 1 else "hey_jarvis"
196
+ threshold = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
197
+
198
+ print(f"[WakeWord] Loading model: {wake_word}")
199
+ model = Model()
200
+
201
+ device_index = get_input_device()
202
+ audio = pyaudio.PyAudio()
203
+
204
+ if device_index is not None:
205
+ info = audio.get_device_info_by_index(device_index)
206
+ sample_rate = int(info.get('defaultSampleRate', SAMPLE_RATE))
207
+ print(f"[WakeWord] Using device: {info.get('name')} @ {sample_rate}Hz")
208
+ else:
209
+ sample_rate = SAMPLE_RATE
210
+
211
+ stream = audio.open(
212
+ format=pyaudio.paInt16,
213
+ channels=1,
214
+ rate=sample_rate,
215
+ input=True,
216
+ input_device_index=device_index,
217
+ frames_per_buffer=CHUNK_SIZE,
218
+ )
219
+
220
+ print(f"[WakeWord] Listening for '{wake_word.replace('_', ' ')}'...")
221
+
222
+ running = True
223
+ cooldown = 0
224
+
225
+ def handle_signal(sig, frame):
226
+ nonlocal running
227
+ running = False
228
+
229
+ signal.signal(signal.SIGINT, handle_signal)
230
+ signal.signal(signal.SIGTERM, handle_signal)
231
+
232
+ try:
233
+ from scipy import signal as scipy_signal
234
+ need_resample = sample_rate != SAMPLE_RATE
235
+ except ImportError:
236
+ need_resample = False
237
+
238
+ while running:
239
+ try:
240
+ data = stream.read(CHUNK_SIZE, exception_on_overflow=False)
241
+ audio_array = np.frombuffer(data, dtype=np.int16)
242
+
243
+ if need_resample and sample_rate != SAMPLE_RATE:
244
+ num_samples = int(len(audio_array) * SAMPLE_RATE / sample_rate)
245
+ audio_array = scipy_signal.resample(audio_array, num_samples).astype(np.int16)
246
+
247
+ if cooldown > 0:
248
+ cooldown -= 1
249
+ continue
250
+
251
+ prediction = model.predict(audio_array)
252
+ score = prediction.get(wake_word, 0)
253
+
254
+ if score > threshold:
255
+ print(f"[WakeWord] Detected! (score: {score:.3f})")
256
+ TRIGGER_FILE.parent.mkdir(parents=True, exist_ok=True)
257
+ TRIGGER_FILE.write_text("triggered")
258
+ cooldown = int(SAMPLE_RATE / CHUNK_SIZE * 3) # 3 second cooldown
259
+
260
+ except Exception as e:
261
+ if running:
262
+ print(f"[WakeWord] Error: {e}", file=sys.stderr)
263
+ continue
264
+
265
+ stream.stop_stream()
266
+ stream.close()
267
+ audio.terminate()
268
+ print("[WakeWord] Stopped")
269
+
270
+ if __name__ == "__main__":
271
+ main()
272
+ `;
273
+ /**
274
+ * Starts the wake word listener as a background Python process
275
+ */
276
+ function startWakeWordListener(wakeWord = "hey_jarvis", threshold = 0.5) {
277
+ // Write the script to a temp file
278
+ const scriptPath = join(tmpdir(), "opencode-wakeword-listener.py");
279
+ try {
280
+ writeFileSync(scriptPath, WAKE_WORD_SCRIPT);
281
+ }
282
+ catch (err) {
283
+ console.error("[Voice Plugin] Failed to write wake word script:", err);
284
+ return null;
285
+ }
286
+ // Spawn Python process
287
+ const proc = spawn("python3", [scriptPath, wakeWord, threshold.toString()], {
288
+ stdio: ["ignore", "pipe", "pipe"],
289
+ detached: false,
290
+ });
291
+ proc.stdout?.on("data", (data) => {
292
+ const msg = data.toString().trim();
293
+ if (msg)
294
+ console.log(msg);
295
+ });
296
+ proc.stderr?.on("data", (data) => {
297
+ const msg = data.toString().trim();
298
+ // Filter out ONNX Runtime warnings about missing providers
299
+ if (msg && !msg.includes("UserWarning") && !msg.includes("onnxruntime") && !msg.includes("CUDAExecutionProvider") && !msg.includes("ALSA lib")) {
300
+ console.error(msg);
301
+ }
302
+ });
303
+ proc.on("error", (err) => {
304
+ console.error("[Voice Plugin] Wake word listener failed to start:", err.message);
305
+ console.error("[Voice Plugin] Make sure Python 3 and dependencies are installed:");
306
+ console.error("[Voice Plugin] pip install openwakeword pyaudio numpy scipy");
307
+ });
308
+ proc.on("exit", (code) => {
309
+ if (code !== 0 && code !== null) {
310
+ console.error(`[Voice Plugin] Wake word listener exited with code ${code}`);
311
+ }
312
+ wakeWordProcess = null;
313
+ });
314
+ return proc;
315
+ }
316
+ /**
317
+ * Stops the wake word listener process
318
+ */
319
+ function stopWakeWordListener() {
320
+ if (wakeWordProcess) {
321
+ wakeWordProcess.kill("SIGTERM");
322
+ wakeWordProcess = null;
323
+ }
324
+ }
141
325
  /**
142
326
  * Sets up wake word trigger file watching
143
327
  * When the trigger file is written, it records audio, transcribes it, and appends to the TUI prompt
@@ -149,9 +333,7 @@ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, cl
149
333
  mkdirSync(triggerDir, { recursive: true });
150
334
  // Clear any existing trigger
151
335
  clearTriggerFile();
152
- console.log("[Voice Plugin] Wake word watcher enabled");
153
- console.log(`[Voice Plugin] Watching: ${TRIGGER_FILE}`);
154
- console.log("[Voice Plugin] Run 'python wakeword/listener.py' to enable 'Hey Jarvis' wake word");
336
+ console.log("[Voice Plugin] Wake word trigger watcher enabled");
155
337
  // Watch the directory for the trigger file
156
338
  let isRecording = false;
157
339
  watch(triggerDir, async (eventType, filename) => {
@@ -161,6 +343,18 @@ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, cl
161
343
  return;
162
344
  isRecording = true;
163
345
  console.log("[Voice Plugin] Wake word triggered! Recording...");
346
+ // Show toast notification
347
+ try {
348
+ await client.tui.showToast({
349
+ body: {
350
+ message: "Wake word detected! Listening...",
351
+ variant: "info"
352
+ }
353
+ });
354
+ }
355
+ catch (err) {
356
+ console.error("[Voice Plugin] Failed to show toast:", err);
357
+ }
164
358
  try {
165
359
  // Clear the trigger file immediately
166
360
  clearTriggerFile();
@@ -188,13 +382,23 @@ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, cl
188
382
  });
189
383
  }
190
384
  export const VoicePlugin = (options = {}) => async (ctx) => {
191
- const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, enableWakeWord = true, } = options;
385
+ const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, enableWakeWord = true, wakeWord = "hey_jarvis", wakeWordThreshold = 0.5, } = options;
192
386
  if (!apiKey) {
193
387
  console.warn("[Voice Plugin] Warning: OPENAI_API_KEY not set. Voice transcription will fail.");
194
388
  }
195
- // Set up wake word watcher if enabled
389
+ // Start wake word listener and set up file watcher if enabled
196
390
  if (enableWakeWord && apiKey && ctx.client) {
197
- setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, ctx.client);
391
+ // Start the Python wake word listener
392
+ wakeWordProcess = startWakeWordListener(wakeWord, wakeWordThreshold);
393
+ if (wakeWordProcess) {
394
+ console.log(`[Voice Plugin] Wake word listener started (say "${wakeWord.replace('_', ' ')}")`);
395
+ // Set up the trigger file watcher
396
+ setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, ctx.client);
397
+ // Clean up on process exit
398
+ process.on("exit", stopWakeWordListener);
399
+ process.on("SIGINT", stopWakeWordListener);
400
+ process.on("SIGTERM", stopWakeWordListener);
401
+ }
198
402
  }
199
403
  return {
200
404
  tool: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "speech-opencode",
3
- "version": "1.1.2",
3
+ "version": "1.1.4",
4
4
  "description": "Voice input plugin for OpenCode using OpenAI Whisper",
5
5
  "keywords": [
6
6
  "opencode",