speech-opencode 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -8,8 +8,12 @@ export interface VoicePluginOptions {
8
8
  silenceDuration?: number;
9
9
  /** Maximum recording duration in seconds as a safety timeout (default 300 = 5 minutes) */
10
10
  maxDuration?: number;
11
- /** Enable wake word trigger file watching (default true) */
11
+ /** Enable wake word detection (default true). Requires Python + openwakeword */
12
12
  enableWakeWord?: boolean;
13
+ /** Wake word to listen for (default "hey_jarvis"). Options: hey_jarvis, alexa, hey_mycroft */
14
+ wakeWord?: string;
15
+ /** Wake word detection threshold 0.0-1.0 (default 0.5, lower = more sensitive) */
16
+ wakeWordThreshold?: number;
13
17
  }
14
18
  export declare const VoicePlugin: (options?: VoicePluginOptions) => Plugin;
15
19
  declare const _default: Plugin;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;AA2GvD,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,+DAA+D;IAC/D,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,0FAA0F;IAC1F,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,4DAA4D;IAC5D,cAAc,CAAC,EAAE,OAAO,CAAA;CACzB;AAmID,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,MAyDnC,CAAA;;AAGH,wBAA4B"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;AA8GvD,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,+DAA+D;IAC/D,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,0FAA0F;IAC1F,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gFAAgF;IAChF,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kFAAkF;IAClF,iBAAiB,CAAC,EAAE,MAAM,CAAA;CAC3B;AA4TD,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,MAwEnC,CAAA;;AAGH,wBAA4B"}
package/dist/index.js CHANGED
@@ -1,11 +1,13 @@
1
1
  import { tool } from "@opencode-ai/plugin";
2
2
  import OpenAI from "openai";
3
3
  import { spawn } from "child_process";
4
- import { unlinkSync, readFileSync, existsSync, watch, mkdirSync } from "fs";
4
+ import { unlinkSync, readFileSync, existsSync, watch, mkdirSync, writeFileSync } from "fs";
5
5
  import { tmpdir, homedir } from "os";
6
6
  import { join, dirname } from "path";
7
7
  // Trigger file path for wake word integration
8
8
  const TRIGGER_FILE = join(homedir(), ".cache", "opencode", "voice_trigger");
9
+ // Store reference to wake word listener process
10
+ let wakeWordProcess = null;
9
11
  /**
10
12
  * Records audio from the microphone with automatic silence detection.
11
13
  * Recording stops after the specified silence duration.
@@ -138,6 +140,186 @@ function clearTriggerFile() {
138
140
  // Ignore errors
139
141
  }
140
142
  }
143
+ /**
144
+ * Embedded Python wake word listener script
145
+ */
146
+ const WAKE_WORD_SCRIPT = `
147
+ #!/usr/bin/env python3
148
+ """Embedded wake word listener for speech-opencode"""
149
+ import sys
150
+ import signal
151
+ from pathlib import Path
152
+
153
+ try:
154
+ import os
155
+ # Silence ONNX Runtime warnings
156
+ os.environ["ORT_LOGGING_LEVEL"] = "3"
157
+
158
+ import pyaudio
159
+ import numpy as np
160
+ from openwakeword.model import Model
161
+ except ImportError as e:
162
+ print(f"[WakeWord] Missing dependency: {e}", file=sys.stderr)
163
+ print("[WakeWord] Install with: pip install openwakeword pyaudio numpy scipy", file=sys.stderr)
164
+ sys.exit(1)
165
+
166
+ SAMPLE_RATE = 16000
167
+ CHUNK_SIZE = 1280
168
+ TRIGGER_FILE = Path.home() / ".cache" / "opencode" / "voice_trigger"
169
+
170
+ def get_input_device():
171
+ """Find the best input device, preferring pipewire"""
172
+ p = pyaudio.PyAudio()
173
+
174
+ # First pass: look for pipewire (usually works best on modern Linux)
175
+ for i in range(p.get_device_count()):
176
+ info = p.get_device_info_by_index(i)
177
+ name = str(info.get("name", "")).lower()
178
+ if info.get("maxInputChannels", 0) > 0 and "pipewire" in name:
179
+ p.terminate()
180
+ return i
181
+
182
+ # Second pass: any non-monitor, non-bluetooth input
183
+ for i in range(p.get_device_count()):
184
+ info = p.get_device_info_by_index(i)
185
+ name = str(info.get("name", "")).lower()
186
+ if info.get("maxInputChannels", 0) > 0:
187
+ if "monitor" not in name and "bluez" not in name and "bluetooth" not in name:
188
+ p.terminate()
189
+ return i
190
+
191
+ p.terminate()
192
+ return None
193
+
194
+ def main():
195
+ wake_word = sys.argv[1] if len(sys.argv) > 1 else "hey_jarvis"
196
+ threshold = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
197
+
198
+ print(f"[WakeWord] Loading model: {wake_word}")
199
+ model = Model()
200
+
201
+ device_index = get_input_device()
202
+ audio = pyaudio.PyAudio()
203
+
204
+ if device_index is not None:
205
+ info = audio.get_device_info_by_index(device_index)
206
+ sample_rate = int(info.get('defaultSampleRate', SAMPLE_RATE))
207
+ print(f"[WakeWord] Using device: {info.get('name')} @ {sample_rate}Hz")
208
+ else:
209
+ sample_rate = SAMPLE_RATE
210
+
211
+ stream = audio.open(
212
+ format=pyaudio.paInt16,
213
+ channels=1,
214
+ rate=sample_rate,
215
+ input=True,
216
+ input_device_index=device_index,
217
+ frames_per_buffer=CHUNK_SIZE,
218
+ )
219
+
220
+ print(f"[WakeWord] Listening for '{wake_word.replace('_', ' ')}'...")
221
+
222
+ running = True
223
+ cooldown = 0
224
+
225
+ def handle_signal(sig, frame):
226
+ nonlocal running
227
+ running = False
228
+
229
+ signal.signal(signal.SIGINT, handle_signal)
230
+ signal.signal(signal.SIGTERM, handle_signal)
231
+
232
+ try:
233
+ from scipy import signal as scipy_signal
234
+ need_resample = sample_rate != SAMPLE_RATE
235
+ except ImportError:
236
+ need_resample = False
237
+
238
+ while running:
239
+ try:
240
+ data = stream.read(CHUNK_SIZE, exception_on_overflow=False)
241
+ audio_array = np.frombuffer(data, dtype=np.int16)
242
+
243
+ if need_resample and sample_rate != SAMPLE_RATE:
244
+ num_samples = int(len(audio_array) * SAMPLE_RATE / sample_rate)
245
+ audio_array = scipy_signal.resample(audio_array, num_samples).astype(np.int16)
246
+
247
+ if cooldown > 0:
248
+ cooldown -= 1
249
+ continue
250
+
251
+ prediction = model.predict(audio_array)
252
+ score = prediction.get(wake_word, 0)
253
+
254
+ if score > threshold:
255
+ print(f"[WakeWord] Detected! (score: {score:.3f})")
256
+ TRIGGER_FILE.parent.mkdir(parents=True, exist_ok=True)
257
+ TRIGGER_FILE.write_text("triggered")
258
+ cooldown = int(SAMPLE_RATE / CHUNK_SIZE * 3) # 3 second cooldown
259
+
260
+ except Exception as e:
261
+ if running:
262
+ print(f"[WakeWord] Error: {e}", file=sys.stderr)
263
+ continue
264
+
265
+ stream.stop_stream()
266
+ stream.close()
267
+ audio.terminate()
268
+ print("[WakeWord] Stopped")
269
+
270
+ if __name__ == "__main__":
271
+ main()
272
+ `;
273
+ /**
274
+ * Starts the wake word listener as a background Python process
275
+ */
276
+ function startWakeWordListener(wakeWord = "hey_jarvis", threshold = 0.5) {
277
+ // Write the script to a temp file
278
+ const scriptPath = join(tmpdir(), "opencode-wakeword-listener.py");
279
+ try {
280
+ writeFileSync(scriptPath, WAKE_WORD_SCRIPT);
281
+ }
282
+ catch (err) {
283
+ console.error("[Voice Plugin] Failed to write wake word script:", err);
284
+ return null;
285
+ }
286
+ // Spawn Python process
287
+ const proc = spawn("python3", [scriptPath, wakeWord, threshold.toString()], {
288
+ stdio: ["ignore", "pipe", "pipe"],
289
+ detached: false,
290
+ });
291
+ proc.stdout?.on("data", (data) => {
292
+ const msg = data.toString().trim();
293
+ if (msg)
294
+ console.log(msg);
295
+ });
296
+ proc.stderr?.on("data", (data) => {
297
+ const msg = data.toString().trim();
298
+ if (msg)
299
+ console.error(msg);
300
+ });
301
+ proc.on("error", (err) => {
302
+ console.error("[Voice Plugin] Wake word listener failed to start:", err.message);
303
+ console.error("[Voice Plugin] Make sure Python 3 and dependencies are installed:");
304
+ console.error("[Voice Plugin] pip install openwakeword pyaudio numpy scipy");
305
+ });
306
+ proc.on("exit", (code) => {
307
+ if (code !== 0 && code !== null) {
308
+ console.error(`[Voice Plugin] Wake word listener exited with code ${code}`);
309
+ }
310
+ wakeWordProcess = null;
311
+ });
312
+ return proc;
313
+ }
314
+ /**
315
+ * Stops the wake word listener process
316
+ */
317
+ function stopWakeWordListener() {
318
+ if (wakeWordProcess) {
319
+ wakeWordProcess.kill("SIGTERM");
320
+ wakeWordProcess = null;
321
+ }
322
+ }
141
323
  /**
142
324
  * Sets up wake word trigger file watching
143
325
  * When the trigger file is written, it records audio, transcribes it, and appends to the TUI prompt
@@ -149,9 +331,7 @@ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, cl
149
331
  mkdirSync(triggerDir, { recursive: true });
150
332
  // Clear any existing trigger
151
333
  clearTriggerFile();
152
- console.log("[Voice Plugin] Wake word watcher enabled");
153
- console.log(`[Voice Plugin] Watching: ${TRIGGER_FILE}`);
154
- console.log("[Voice Plugin] Run 'python wakeword/listener.py' to enable 'Hey Jarvis' wake word");
334
+ console.log("[Voice Plugin] Wake word trigger watcher enabled");
155
335
  // Watch the directory for the trigger file
156
336
  let isRecording = false;
157
337
  watch(triggerDir, async (eventType, filename) => {
@@ -188,13 +368,23 @@ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, cl
188
368
  });
189
369
  }
190
370
  export const VoicePlugin = (options = {}) => async (ctx) => {
191
- const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, enableWakeWord = true, } = options;
371
+ const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, enableWakeWord = true, wakeWord = "hey_jarvis", wakeWordThreshold = 0.5, } = options;
192
372
  if (!apiKey) {
193
373
  console.warn("[Voice Plugin] Warning: OPENAI_API_KEY not set. Voice transcription will fail.");
194
374
  }
195
- // Set up wake word watcher if enabled
375
+ // Start wake word listener and set up file watcher if enabled
196
376
  if (enableWakeWord && apiKey && ctx.client) {
197
- setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, ctx.client);
377
+ // Start the Python wake word listener
378
+ wakeWordProcess = startWakeWordListener(wakeWord, wakeWordThreshold);
379
+ if (wakeWordProcess) {
380
+ console.log(`[Voice Plugin] Wake word listener started (say "${wakeWord.replace('_', ' ')}")`);
381
+ // Set up the trigger file watcher
382
+ setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, ctx.client);
383
+ // Clean up on process exit
384
+ process.on("exit", stopWakeWordListener);
385
+ process.on("SIGINT", stopWakeWordListener);
386
+ process.on("SIGTERM", stopWakeWordListener);
387
+ }
198
388
  }
199
389
  return {
200
390
  tool: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "speech-opencode",
3
- "version": "1.1.2",
3
+ "version": "1.1.3",
4
4
  "description": "Voice input plugin for OpenCode using OpenAI Whisper",
5
5
  "keywords": [
6
6
  "opencode",