npm - speech-opencode - Versions diffs - 1.1.2 → 1.1.4 - Mend

speech-opencode 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -8,8 +8,12 @@ export interface VoicePluginOptions {
     silenceDuration?: number;
     /** Maximum recording duration in seconds as a safety timeout (default 300 = 5 minutes) */
     maxDuration?: number;
-    /** Enable wake word trigger file watching (default true) */
+    /** Enable wake word detection (default true). Requires Python + openwakeword */
     enableWakeWord?: boolean;
+    /** Wake word to listen for (default "hey_jarvis"). Options: hey_jarvis, alexa, hey_mycroft */
+    wakeWord?: string;
+    /** Wake word detection threshold 0.0-1.0 (default 0.5, lower = more sensitive) */
+    wakeWordThreshold?: number;
 }
 export declare const VoicePlugin: (options?: VoicePluginOptions) => Plugin;
 declare const _default: Plugin;

package/dist/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;~~AA2GvD~~,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,+DAA+D;IAC/D,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,0FAA0F;IAC1F,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,~~4DAA4D~~;~~IAC5D~~,cAAc,CAAC,EAAE,OAAO,CAAA;~~CACzB~~;~~AAmID~~,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,~~MAyDnC~~,CAAA;;AAGH,wBAA4B"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;AA8GvD,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,+DAA+D;IAC/D,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,0FAA0F;IAC1F,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gFAAgF;IAChF,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kFAAkF;IAClF,iBAAiB,CAAC,EAAE,MAAM,CAAA;CAC3B;AA2UD,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,MAwEnC,CAAA;;AAGH,wBAA4B"}

package/dist/index.js CHANGED Viewed

@@ -1,11 +1,13 @@
 import { tool } from "@opencode-ai/plugin";
 import OpenAI from "openai";
 import { spawn } from "child_process";
-import { unlinkSync, readFileSync, existsSync, watch, mkdirSync } from "fs";
+import { unlinkSync, readFileSync, existsSync, watch, mkdirSync, writeFileSync } from "fs";
 import { tmpdir, homedir } from "os";
 import { join, dirname } from "path";
 // Trigger file path for wake word integration
 const TRIGGER_FILE = join(homedir(), ".cache", "opencode", "voice_trigger");
+// Store reference to wake word listener process
+let wakeWordProcess = null;
 /**
  * Records audio from the microphone with automatic silence detection.
  * Recording stops after the specified silence duration.
@@ -138,6 +140,188 @@ function clearTriggerFile() {
         // Ignore errors
     }
 }
+/**
+ * Embedded Python wake word listener script
+ */
+const WAKE_WORD_SCRIPT = `
+#!/usr/bin/env python3
+"""Embedded wake word listener for speech-opencode"""
+import sys
+import signal
+from pathlib import Path
+try:
+    import os
+    # Silence ONNX Runtime warnings
+    os.environ["ORT_LOGGING_LEVEL"] = "3"
+    import pyaudio
+    import numpy as np
+    from openwakeword.model import Model
+except ImportError as e:
+    print(f"[WakeWord] Missing dependency: {e}", file=sys.stderr)
+    print("[WakeWord] Install with: pip install openwakeword pyaudio numpy scipy", file=sys.stderr)
+    sys.exit(1)
+SAMPLE_RATE = 16000
+CHUNK_SIZE = 1280
+TRIGGER_FILE = Path.home() / ".cache" / "opencode" / "voice_trigger"
+def get_input_device():
+    """Find the best input device, preferring pipewire"""
+    p = pyaudio.PyAudio()
+    # First pass: look for pipewire (usually works best on modern Linux)
+    for i in range(p.get_device_count()):
+        info = p.get_device_info_by_index(i)
+        name = str(info.get("name", "")).lower()
+        if info.get("maxInputChannels", 0) > 0 and "pipewire" in name:
+            p.terminate()
+            return i
+    # Second pass: any non-monitor, non-bluetooth input
+    for i in range(p.get_device_count()):
+        info = p.get_device_info_by_index(i)
+        name = str(info.get("name", "")).lower()
+        if info.get("maxInputChannels", 0) > 0:
+            if "monitor" not in name and "bluez" not in name and "bluetooth" not in name:
+                p.terminate()
+                return i
+    p.terminate()
+    return None
+def main():
+    wake_word = sys.argv[1] if len(sys.argv) > 1 else "hey_jarvis"
+    threshold = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
+    print(f"[WakeWord] Loading model: {wake_word}")
+    model = Model()
+    device_index = get_input_device()
+    audio = pyaudio.PyAudio()
+    if device_index is not None:
+        info = audio.get_device_info_by_index(device_index)
+        sample_rate = int(info.get('defaultSampleRate', SAMPLE_RATE))
+        print(f"[WakeWord] Using device: {info.get('name')} @ {sample_rate}Hz")
+    else:
+        sample_rate = SAMPLE_RATE
+    stream = audio.open(
+        format=pyaudio.paInt16,
+        channels=1,
+        rate=sample_rate,
+        input=True,
+        input_device_index=device_index,
+        frames_per_buffer=CHUNK_SIZE,
+    )
+    print(f"[WakeWord] Listening for '{wake_word.replace('_', ' ')}'...")
+    running = True
+    cooldown = 0
+    def handle_signal(sig, frame):
+        nonlocal running
+        running = False
+    signal.signal(signal.SIGINT, handle_signal)
+    signal.signal(signal.SIGTERM, handle_signal)
+    try:
+        from scipy import signal as scipy_signal
+        need_resample = sample_rate != SAMPLE_RATE
+    except ImportError:
+        need_resample = False
+    while running:
+        try:
+            data = stream.read(CHUNK_SIZE, exception_on_overflow=False)
+            audio_array = np.frombuffer(data, dtype=np.int16)
+            if need_resample and sample_rate != SAMPLE_RATE:
+                num_samples = int(len(audio_array) * SAMPLE_RATE / sample_rate)
+                audio_array = scipy_signal.resample(audio_array, num_samples).astype(np.int16)
+            if cooldown > 0:
+                cooldown -= 1
+                continue
+            prediction = model.predict(audio_array)
+            score = prediction.get(wake_word, 0)
+            if score > threshold:
+                print(f"[WakeWord] Detected! (score: {score:.3f})")
+                TRIGGER_FILE.parent.mkdir(parents=True, exist_ok=True)
+                TRIGGER_FILE.write_text("triggered")
+                cooldown = int(SAMPLE_RATE / CHUNK_SIZE * 3)  # 3 second cooldown
+        except Exception as e:
+            if running:
+                print(f"[WakeWord] Error: {e}", file=sys.stderr)
+            continue
+    stream.stop_stream()
+    stream.close()
+    audio.terminate()
+    print("[WakeWord] Stopped")
+if __name__ == "__main__":
+    main()
+`;
+/**
+ * Starts the wake word listener as a background Python process
+ */
+function startWakeWordListener(wakeWord = "hey_jarvis", threshold = 0.5) {
+    // Write the script to a temp file
+    const scriptPath = join(tmpdir(), "opencode-wakeword-listener.py");
+    try {
+        writeFileSync(scriptPath, WAKE_WORD_SCRIPT);
+    }
+    catch (err) {
+        console.error("[Voice Plugin] Failed to write wake word script:", err);
+        return null;
+    }
+    // Spawn Python process
+    const proc = spawn("python3", [scriptPath, wakeWord, threshold.toString()], {
+        stdio: ["ignore", "pipe", "pipe"],
+        detached: false,
+    });
+    proc.stdout?.on("data", (data) => {
+        const msg = data.toString().trim();
+        if (msg)
+            console.log(msg);
+    });
+    proc.stderr?.on("data", (data) => {
+        const msg = data.toString().trim();
+        // Filter out ONNX Runtime warnings about missing providers
+        if (msg && !msg.includes("UserWarning") && !msg.includes("onnxruntime") && !msg.includes("CUDAExecutionProvider") && !msg.includes("ALSA lib")) {
+            console.error(msg);
+        }
+    });
+    proc.on("error", (err) => {
+        console.error("[Voice Plugin] Wake word listener failed to start:", err.message);
+        console.error("[Voice Plugin] Make sure Python 3 and dependencies are installed:");
+        console.error("[Voice Plugin]   pip install openwakeword pyaudio numpy scipy");
+    });
+    proc.on("exit", (code) => {
+        if (code !== 0 && code !== null) {
+            console.error(`[Voice Plugin] Wake word listener exited with code ${code}`);
+        }
+        wakeWordProcess = null;
+    });
+    return proc;
+}
+/**
+ * Stops the wake word listener process
+ */
+function stopWakeWordListener() {
+    if (wakeWordProcess) {
+        wakeWordProcess.kill("SIGTERM");
+        wakeWordProcess = null;
+    }
+}
 /**
  * Sets up wake word trigger file watching
  * When the trigger file is written, it records audio, transcribes it, and appends to the TUI prompt
@@ -149,9 +333,7 @@ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, cl
     mkdirSync(triggerDir, { recursive: true });
     // Clear any existing trigger
     clearTriggerFile();
-    console.log("[Voice Plugin] Wake word watcher enabled");
-    console.log(`[Voice Plugin] Watching: ${TRIGGER_FILE}`);
-    console.log("[Voice Plugin] Run 'python wakeword/listener.py' to enable 'Hey Jarvis' wake word");
+    console.log("[Voice Plugin] Wake word trigger watcher enabled");
     // Watch the directory for the trigger file
     let isRecording = false;
     watch(triggerDir, async (eventType, filename) => {
@@ -161,6 +343,18 @@ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, cl
             return;
         isRecording = true;
         console.log("[Voice Plugin] Wake word triggered! Recording...");
+        // Show toast notification
+        try {
+            await client.tui.showToast({
+                body: {
+                    message: "Wake word detected! Listening...",
+                    variant: "info"
+                }
+            });
+        }
+        catch (err) {
+            console.error("[Voice Plugin] Failed to show toast:", err);
+        }
         try {
             // Clear the trigger file immediately
             clearTriggerFile();
@@ -188,13 +382,23 @@ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, cl
     });
 }
 export const VoicePlugin = (options = {}) => async (ctx) => {
-    const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, enableWakeWord = true, } = options;
+    const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, enableWakeWord = true, wakeWord = "hey_jarvis", wakeWordThreshold = 0.5, } = options;
     if (!apiKey) {
         console.warn("[Voice Plugin] Warning: OPENAI_API_KEY not set. Voice transcription will fail.");
     }
-    // Set up wake word watcher if enabled
+    // Start wake word listener and set up file watcher if enabled
     if (enableWakeWord && apiKey && ctx.client) {
-        setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, ctx.client);
+        // Start the Python wake word listener
+        wakeWordProcess = startWakeWordListener(wakeWord, wakeWordThreshold);
+        if (wakeWordProcess) {
+            console.log(`[Voice Plugin] Wake word listener started (say "${wakeWord.replace('_', ' ')}")`);
+            // Set up the trigger file watcher
+            setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, ctx.client);
+            // Clean up on process exit
+            process.on("exit", stopWakeWordListener);
+            process.on("SIGINT", stopWakeWordListener);
+            process.on("SIGTERM", stopWakeWordListener);
+        }
     }
     return {
         tool: {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "speech-opencode",
-  "version": "1.1.2",
+  "version": "1.1.4",
   "description": "Voice input plugin for OpenCode using OpenAI Whisper",
   "keywords": [
     "opencode",