speech-opencode 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -8,28 +8,13 @@ export interface VoicePluginOptions {
8
8
  silenceDuration?: number;
9
9
  /** Maximum recording duration in seconds as a safety timeout (default 300 = 5 minutes) */
10
10
  maxDuration?: number;
11
+ /** Enable wake word detection (default true). Requires Python + openwakeword */
12
+ enableWakeWord?: boolean;
13
+ /** Wake word to listen for (default "hey_jarvis"). Options: hey_jarvis, alexa, hey_mycroft */
14
+ wakeWord?: string;
15
+ /** Wake word detection threshold 0.0-1.0 (default 0.5, lower = more sensitive) */
16
+ wakeWordThreshold?: number;
11
17
  }
12
- /**
13
- * OpenCode Voice Plugin
14
- *
15
- * Adds a 'voice' tool that records audio from the microphone and transcribes it
16
- * using OpenAI's Whisper API.
17
- *
18
- * @example
19
- * ```ts
20
- * // In opencode.json
21
- * {
22
- * "plugin": ["opencode-voice"]
23
- * }
24
- * ```
25
- *
26
- * @example
27
- * ```ts
28
- * // With options in .opencode/plugin/voice.ts
29
- * import { VoicePlugin } from "opencode-voice"
30
- * export default VoicePlugin({ language: "en", defaultDuration: 10 })
31
- * ```
32
- */
33
18
  export declare const VoicePlugin: (options?: VoicePluginOptions) => Plugin;
34
19
  declare const _default: Plugin;
35
20
  export default _default;
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;AAwGvD,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,+DAA+D;IAC/D,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,0FAA0F;IAC1F,WAAW,CAAC,EAAE,MAAM,CAAA;CACrB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,MA4DnC,CAAA;;AAGH,wBAA4B"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;AA8GvD,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,+DAA+D;IAC/D,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,0FAA0F;IAC1F,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gFAAgF;IAChF,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kFAAkF;IAClF,iBAAiB,CAAC,EAAE,MAAM,CAAA;CAC3B;AA4TD,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,MAwEnC,CAAA;;AAGH,wBAA4B"}
package/dist/index.js CHANGED
@@ -1,9 +1,13 @@
1
1
  import { tool } from "@opencode-ai/plugin";
2
2
  import OpenAI from "openai";
3
3
  import { spawn } from "child_process";
4
- import { unlinkSync, readFileSync } from "fs";
5
- import { tmpdir } from "os";
6
- import { join } from "path";
4
+ import { unlinkSync, readFileSync, existsSync, watch, mkdirSync, writeFileSync } from "fs";
5
+ import { tmpdir, homedir } from "os";
6
+ import { join, dirname } from "path";
7
+ // Trigger file path for wake word integration
8
+ const TRIGGER_FILE = join(homedir(), ".cache", "opencode", "voice_trigger");
9
+ // Store reference to wake word listener process
10
+ let wakeWordProcess = null;
7
11
  /**
8
12
  * Records audio from the microphone with automatic silence detection.
9
13
  * Recording stops after the specified silence duration.
@@ -99,11 +103,289 @@ async function transcribeAudio(audioFilePath, apiKey, language) {
99
103
  * export default VoicePlugin({ language: "en", defaultDuration: 10 })
100
104
  * ```
101
105
  */
106
+ /**
107
+ * Records and transcribes audio, returning the transcription
108
+ */
109
+ async function recordAndTranscribe(apiKey, maxDuration, silenceDuration, language) {
110
+ let audioFile = null;
111
+ try {
112
+ audioFile = await recordAudio(maxDuration, silenceDuration);
113
+ const transcription = await transcribeAudio(audioFile, apiKey, language);
114
+ if (!transcription || transcription.trim() === "") {
115
+ return "No speech detected. Please try again and speak clearly into your microphone.";
116
+ }
117
+ return transcription;
118
+ }
119
+ finally {
120
+ if (audioFile) {
121
+ try {
122
+ unlinkSync(audioFile);
123
+ }
124
+ catch {
125
+ // Ignore cleanup errors
126
+ }
127
+ }
128
+ }
129
+ }
130
+ /**
131
+ * Clears the wake word trigger file
132
+ */
133
+ function clearTriggerFile() {
134
+ try {
135
+ if (existsSync(TRIGGER_FILE)) {
136
+ unlinkSync(TRIGGER_FILE);
137
+ }
138
+ }
139
+ catch {
140
+ // Ignore errors
141
+ }
142
+ }
143
+ /**
144
+ * Embedded Python wake word listener script
145
+ */
146
+ const WAKE_WORD_SCRIPT = `
147
+ #!/usr/bin/env python3
148
+ """Embedded wake word listener for speech-opencode"""
149
+ import sys
150
+ import signal
151
+ from pathlib import Path
152
+
153
+ try:
154
+ import os
155
+ # Silence ONNX Runtime warnings
156
+ os.environ["ORT_LOGGING_LEVEL"] = "3"
157
+
158
+ import pyaudio
159
+ import numpy as np
160
+ from openwakeword.model import Model
161
+ except ImportError as e:
162
+ print(f"[WakeWord] Missing dependency: {e}", file=sys.stderr)
163
+ print("[WakeWord] Install with: pip install openwakeword pyaudio numpy scipy", file=sys.stderr)
164
+ sys.exit(1)
165
+
166
+ SAMPLE_RATE = 16000
167
+ CHUNK_SIZE = 1280
168
+ TRIGGER_FILE = Path.home() / ".cache" / "opencode" / "voice_trigger"
169
+
170
+ def get_input_device():
171
+ """Find the best input device, preferring pipewire"""
172
+ p = pyaudio.PyAudio()
173
+
174
+ # First pass: look for pipewire (usually works best on modern Linux)
175
+ for i in range(p.get_device_count()):
176
+ info = p.get_device_info_by_index(i)
177
+ name = str(info.get("name", "")).lower()
178
+ if info.get("maxInputChannels", 0) > 0 and "pipewire" in name:
179
+ p.terminate()
180
+ return i
181
+
182
+ # Second pass: any non-monitor, non-bluetooth input
183
+ for i in range(p.get_device_count()):
184
+ info = p.get_device_info_by_index(i)
185
+ name = str(info.get("name", "")).lower()
186
+ if info.get("maxInputChannels", 0) > 0:
187
+ if "monitor" not in name and "bluez" not in name and "bluetooth" not in name:
188
+ p.terminate()
189
+ return i
190
+
191
+ p.terminate()
192
+ return None
193
+
194
+ def main():
195
+ wake_word = sys.argv[1] if len(sys.argv) > 1 else "hey_jarvis"
196
+ threshold = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
197
+
198
+ print(f"[WakeWord] Loading model: {wake_word}")
199
+ model = Model()
200
+
201
+ device_index = get_input_device()
202
+ audio = pyaudio.PyAudio()
203
+
204
+ if device_index is not None:
205
+ info = audio.get_device_info_by_index(device_index)
206
+ sample_rate = int(info.get('defaultSampleRate', SAMPLE_RATE))
207
+ print(f"[WakeWord] Using device: {info.get('name')} @ {sample_rate}Hz")
208
+ else:
209
+ sample_rate = SAMPLE_RATE
210
+
211
+ stream = audio.open(
212
+ format=pyaudio.paInt16,
213
+ channels=1,
214
+ rate=sample_rate,
215
+ input=True,
216
+ input_device_index=device_index,
217
+ frames_per_buffer=CHUNK_SIZE,
218
+ )
219
+
220
+ print(f"[WakeWord] Listening for '{wake_word.replace('_', ' ')}'...")
221
+
222
+ running = True
223
+ cooldown = 0
224
+
225
+ def handle_signal(sig, frame):
226
+ nonlocal running
227
+ running = False
228
+
229
+ signal.signal(signal.SIGINT, handle_signal)
230
+ signal.signal(signal.SIGTERM, handle_signal)
231
+
232
+ try:
233
+ from scipy import signal as scipy_signal
234
+ need_resample = sample_rate != SAMPLE_RATE
235
+ except ImportError:
236
+ need_resample = False
237
+
238
+ while running:
239
+ try:
240
+ data = stream.read(CHUNK_SIZE, exception_on_overflow=False)
241
+ audio_array = np.frombuffer(data, dtype=np.int16)
242
+
243
+ if need_resample and sample_rate != SAMPLE_RATE:
244
+ num_samples = int(len(audio_array) * SAMPLE_RATE / sample_rate)
245
+ audio_array = scipy_signal.resample(audio_array, num_samples).astype(np.int16)
246
+
247
+ if cooldown > 0:
248
+ cooldown -= 1
249
+ continue
250
+
251
+ prediction = model.predict(audio_array)
252
+ score = prediction.get(wake_word, 0)
253
+
254
+ if score > threshold:
255
+ print(f"[WakeWord] Detected! (score: {score:.3f})")
256
+ TRIGGER_FILE.parent.mkdir(parents=True, exist_ok=True)
257
+ TRIGGER_FILE.write_text("triggered")
258
+ cooldown = int(SAMPLE_RATE / CHUNK_SIZE * 3) # 3 second cooldown
259
+
260
+ except Exception as e:
261
+ if running:
262
+ print(f"[WakeWord] Error: {e}", file=sys.stderr)
263
+ continue
264
+
265
+ stream.stop_stream()
266
+ stream.close()
267
+ audio.terminate()
268
+ print("[WakeWord] Stopped")
269
+
270
+ if __name__ == "__main__":
271
+ main()
272
+ `;
273
+ /**
274
+ * Starts the wake word listener as a background Python process
275
+ */
276
+ function startWakeWordListener(wakeWord = "hey_jarvis", threshold = 0.5) {
277
+ // Write the script to a temp file
278
+ const scriptPath = join(tmpdir(), "opencode-wakeword-listener.py");
279
+ try {
280
+ writeFileSync(scriptPath, WAKE_WORD_SCRIPT);
281
+ }
282
+ catch (err) {
283
+ console.error("[Voice Plugin] Failed to write wake word script:", err);
284
+ return null;
285
+ }
286
+ // Spawn Python process
287
+ const proc = spawn("python3", [scriptPath, wakeWord, threshold.toString()], {
288
+ stdio: ["ignore", "pipe", "pipe"],
289
+ detached: false,
290
+ });
291
+ proc.stdout?.on("data", (data) => {
292
+ const msg = data.toString().trim();
293
+ if (msg)
294
+ console.log(msg);
295
+ });
296
+ proc.stderr?.on("data", (data) => {
297
+ const msg = data.toString().trim();
298
+ if (msg)
299
+ console.error(msg);
300
+ });
301
+ proc.on("error", (err) => {
302
+ console.error("[Voice Plugin] Wake word listener failed to start:", err.message);
303
+ console.error("[Voice Plugin] Make sure Python 3 and dependencies are installed:");
304
+ console.error("[Voice Plugin] pip install openwakeword pyaudio numpy scipy");
305
+ });
306
+ proc.on("exit", (code) => {
307
+ if (code !== 0 && code !== null) {
308
+ console.error(`[Voice Plugin] Wake word listener exited with code ${code}`);
309
+ }
310
+ wakeWordProcess = null;
311
+ });
312
+ return proc;
313
+ }
314
+ /**
315
+ * Stops the wake word listener process
316
+ */
317
+ function stopWakeWordListener() {
318
+ if (wakeWordProcess) {
319
+ wakeWordProcess.kill("SIGTERM");
320
+ wakeWordProcess = null;
321
+ }
322
+ }
323
+ /**
324
+ * Sets up wake word trigger file watching
325
+ * When the trigger file is written, it records audio, transcribes it, and appends to the TUI prompt
326
+ */
327
+ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, client // OpenCode SDK client
328
+ ) {
329
+ // Ensure the directory exists
330
+ const triggerDir = dirname(TRIGGER_FILE);
331
+ mkdirSync(triggerDir, { recursive: true });
332
+ // Clear any existing trigger
333
+ clearTriggerFile();
334
+ console.log("[Voice Plugin] Wake word trigger watcher enabled");
335
+ // Watch the directory for the trigger file
336
+ let isRecording = false;
337
+ watch(triggerDir, async (eventType, filename) => {
338
+ if (filename !== "voice_trigger" || isRecording)
339
+ return;
340
+ if (!existsSync(TRIGGER_FILE))
341
+ return;
342
+ isRecording = true;
343
+ console.log("[Voice Plugin] Wake word triggered! Recording...");
344
+ try {
345
+ // Clear the trigger file immediately
346
+ clearTriggerFile();
347
+ // Record and transcribe
348
+ const transcription = await recordAndTranscribe(apiKey, maxDuration, silenceDuration, language);
349
+ if (transcription && !transcription.startsWith("No speech detected")) {
350
+ console.log(`[Voice Plugin] Transcribed: "${transcription}"`);
351
+ // Append transcription to the TUI prompt
352
+ try {
353
+ await client.tui.appendPrompt({ body: { text: transcription } });
354
+ // Auto-submit the prompt
355
+ await client.tui.submitPrompt();
356
+ }
357
+ catch (err) {
358
+ console.error("[Voice Plugin] Failed to send to TUI:", err);
359
+ }
360
+ }
361
+ }
362
+ catch (error) {
363
+ console.error("[Voice Plugin] Error:", error);
364
+ }
365
+ finally {
366
+ isRecording = false;
367
+ }
368
+ });
369
+ }
102
370
  export const VoicePlugin = (options = {}) => async (ctx) => {
103
- const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, } = options;
371
+ const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, enableWakeWord = true, wakeWord = "hey_jarvis", wakeWordThreshold = 0.5, } = options;
104
372
  if (!apiKey) {
105
373
  console.warn("[Voice Plugin] Warning: OPENAI_API_KEY not set. Voice transcription will fail.");
106
374
  }
375
+ // Start wake word listener and set up file watcher if enabled
376
+ if (enableWakeWord && apiKey && ctx.client) {
377
+ // Start the Python wake word listener
378
+ wakeWordProcess = startWakeWordListener(wakeWord, wakeWordThreshold);
379
+ if (wakeWordProcess) {
380
+ console.log(`[Voice Plugin] Wake word listener started (say "${wakeWord.replace('_', ' ')}")`);
381
+ // Set up the trigger file watcher
382
+ setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, ctx.client);
383
+ // Clean up on process exit
384
+ process.on("exit", stopWakeWordListener);
385
+ process.on("SIGINT", stopWakeWordListener);
386
+ process.on("SIGTERM", stopWakeWordListener);
387
+ }
388
+ }
107
389
  return {
108
390
  tool: {
109
391
  voice: tool({
@@ -115,29 +397,14 @@ export const VoicePlugin = (options = {}) => async (ctx) => {
115
397
  if (!apiKey) {
116
398
  return "Error: OPENAI_API_KEY environment variable is not set. Please set it to use voice transcription.";
117
399
  }
118
- let audioFile = null;
119
400
  try {
120
- audioFile = await recordAudio(maxDuration, silenceDuration);
121
- const transcription = await transcribeAudio(audioFile, apiKey, language);
122
- if (!transcription || transcription.trim() === "") {
123
- return "No speech detected. Please try again and speak clearly into your microphone.";
124
- }
401
+ const transcription = await recordAndTranscribe(apiKey, maxDuration, silenceDuration, language);
125
402
  return `Transcribed speech: "${transcription}"`;
126
403
  }
127
404
  catch (error) {
128
405
  const errorMessage = error instanceof Error ? error.message : String(error);
129
406
  return `Voice recording/transcription failed: ${errorMessage}`;
130
407
  }
131
- finally {
132
- if (audioFile) {
133
- try {
134
- unlinkSync(audioFile);
135
- }
136
- catch {
137
- // Ignore cleanup errors
138
- }
139
- }
140
- }
141
408
  },
142
409
  }),
143
410
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "speech-opencode",
3
- "version": "1.1.1",
3
+ "version": "1.1.3",
4
4
  "description": "Voice input plugin for OpenCode using OpenAI Whisper",
5
5
  "keywords": [
6
6
  "opencode",