speech-opencode 1.1.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +6 -21
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +287 -20
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -8,28 +8,13 @@ export interface VoicePluginOptions {
|
|
|
8
8
|
silenceDuration?: number;
|
|
9
9
|
/** Maximum recording duration in seconds as a safety timeout (default 300 = 5 minutes) */
|
|
10
10
|
maxDuration?: number;
|
|
11
|
+
/** Enable wake word detection (default true). Requires Python + openwakeword */
|
|
12
|
+
enableWakeWord?: boolean;
|
|
13
|
+
/** Wake word to listen for (default "hey_jarvis"). Options: hey_jarvis, alexa, hey_mycroft */
|
|
14
|
+
wakeWord?: string;
|
|
15
|
+
/** Wake word detection threshold 0.0-1.0 (default 0.5, lower = more sensitive) */
|
|
16
|
+
wakeWordThreshold?: number;
|
|
11
17
|
}
|
|
12
|
-
/**
|
|
13
|
-
* OpenCode Voice Plugin
|
|
14
|
-
*
|
|
15
|
-
* Adds a 'voice' tool that records audio from the microphone and transcribes it
|
|
16
|
-
* using OpenAI's Whisper API.
|
|
17
|
-
*
|
|
18
|
-
* @example
|
|
19
|
-
* ```ts
|
|
20
|
-
* // In opencode.json
|
|
21
|
-
* {
|
|
22
|
-
* "plugin": ["opencode-voice"]
|
|
23
|
-
* }
|
|
24
|
-
* ```
|
|
25
|
-
*
|
|
26
|
-
* @example
|
|
27
|
-
* ```ts
|
|
28
|
-
* // With options in .opencode/plugin/voice.ts
|
|
29
|
-
* import { VoicePlugin } from "opencode-voice"
|
|
30
|
-
* export default VoicePlugin({ language: "en", defaultDuration: 10 })
|
|
31
|
-
* ```
|
|
32
|
-
*/
|
|
33
18
|
export declare const VoicePlugin: (options?: VoicePluginOptions) => Plugin;
|
|
34
19
|
declare const _default: Plugin;
|
|
35
20
|
export default _default;
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;AA8GvD,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,+DAA+D;IAC/D,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,0FAA0F;IAC1F,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gFAAgF;IAChF,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kFAAkF;IAClF,iBAAiB,CAAC,EAAE,MAAM,CAAA;CAC3B;AA4TD,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,MAwEnC,CAAA;;AAGH,wBAA4B"}
|
package/dist/index.js
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
import { tool } from "@opencode-ai/plugin";
|
|
2
2
|
import OpenAI from "openai";
|
|
3
3
|
import { spawn } from "child_process";
|
|
4
|
-
import { unlinkSync, readFileSync } from "fs";
|
|
5
|
-
import { tmpdir } from "os";
|
|
6
|
-
import { join } from "path";
|
|
4
|
+
import { unlinkSync, readFileSync, existsSync, watch, mkdirSync, writeFileSync } from "fs";
|
|
5
|
+
import { tmpdir, homedir } from "os";
|
|
6
|
+
import { join, dirname } from "path";
|
|
7
|
+
// Trigger file path for wake word integration
|
|
8
|
+
const TRIGGER_FILE = join(homedir(), ".cache", "opencode", "voice_trigger");
|
|
9
|
+
// Store reference to wake word listener process
|
|
10
|
+
let wakeWordProcess = null;
|
|
7
11
|
/**
|
|
8
12
|
* Records audio from the microphone with automatic silence detection.
|
|
9
13
|
* Recording stops after the specified silence duration.
|
|
@@ -99,11 +103,289 @@ async function transcribeAudio(audioFilePath, apiKey, language) {
|
|
|
99
103
|
* export default VoicePlugin({ language: "en", defaultDuration: 10 })
|
|
100
104
|
* ```
|
|
101
105
|
*/
|
|
106
|
+
/**
|
|
107
|
+
* Records and transcribes audio, returning the transcription
|
|
108
|
+
*/
|
|
109
|
+
async function recordAndTranscribe(apiKey, maxDuration, silenceDuration, language) {
|
|
110
|
+
let audioFile = null;
|
|
111
|
+
try {
|
|
112
|
+
audioFile = await recordAudio(maxDuration, silenceDuration);
|
|
113
|
+
const transcription = await transcribeAudio(audioFile, apiKey, language);
|
|
114
|
+
if (!transcription || transcription.trim() === "") {
|
|
115
|
+
return "No speech detected. Please try again and speak clearly into your microphone.";
|
|
116
|
+
}
|
|
117
|
+
return transcription;
|
|
118
|
+
}
|
|
119
|
+
finally {
|
|
120
|
+
if (audioFile) {
|
|
121
|
+
try {
|
|
122
|
+
unlinkSync(audioFile);
|
|
123
|
+
}
|
|
124
|
+
catch {
|
|
125
|
+
// Ignore cleanup errors
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Clears the wake word trigger file
|
|
132
|
+
*/
|
|
133
|
+
function clearTriggerFile() {
|
|
134
|
+
try {
|
|
135
|
+
if (existsSync(TRIGGER_FILE)) {
|
|
136
|
+
unlinkSync(TRIGGER_FILE);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
// Ignore errors
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Embedded Python wake word listener script
|
|
145
|
+
*/
|
|
146
|
+
const WAKE_WORD_SCRIPT = `
|
|
147
|
+
#!/usr/bin/env python3
|
|
148
|
+
"""Embedded wake word listener for speech-opencode"""
|
|
149
|
+
import sys
|
|
150
|
+
import signal
|
|
151
|
+
from pathlib import Path
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
import os
|
|
155
|
+
# Silence ONNX Runtime warnings
|
|
156
|
+
os.environ["ORT_LOGGING_LEVEL"] = "3"
|
|
157
|
+
|
|
158
|
+
import pyaudio
|
|
159
|
+
import numpy as np
|
|
160
|
+
from openwakeword.model import Model
|
|
161
|
+
except ImportError as e:
|
|
162
|
+
print(f"[WakeWord] Missing dependency: {e}", file=sys.stderr)
|
|
163
|
+
print("[WakeWord] Install with: pip install openwakeword pyaudio numpy scipy", file=sys.stderr)
|
|
164
|
+
sys.exit(1)
|
|
165
|
+
|
|
166
|
+
SAMPLE_RATE = 16000
|
|
167
|
+
CHUNK_SIZE = 1280
|
|
168
|
+
TRIGGER_FILE = Path.home() / ".cache" / "opencode" / "voice_trigger"
|
|
169
|
+
|
|
170
|
+
def get_input_device():
|
|
171
|
+
"""Find the best input device, preferring pipewire"""
|
|
172
|
+
p = pyaudio.PyAudio()
|
|
173
|
+
|
|
174
|
+
# First pass: look for pipewire (usually works best on modern Linux)
|
|
175
|
+
for i in range(p.get_device_count()):
|
|
176
|
+
info = p.get_device_info_by_index(i)
|
|
177
|
+
name = str(info.get("name", "")).lower()
|
|
178
|
+
if info.get("maxInputChannels", 0) > 0 and "pipewire" in name:
|
|
179
|
+
p.terminate()
|
|
180
|
+
return i
|
|
181
|
+
|
|
182
|
+
# Second pass: any non-monitor, non-bluetooth input
|
|
183
|
+
for i in range(p.get_device_count()):
|
|
184
|
+
info = p.get_device_info_by_index(i)
|
|
185
|
+
name = str(info.get("name", "")).lower()
|
|
186
|
+
if info.get("maxInputChannels", 0) > 0:
|
|
187
|
+
if "monitor" not in name and "bluez" not in name and "bluetooth" not in name:
|
|
188
|
+
p.terminate()
|
|
189
|
+
return i
|
|
190
|
+
|
|
191
|
+
p.terminate()
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
def main():
|
|
195
|
+
wake_word = sys.argv[1] if len(sys.argv) > 1 else "hey_jarvis"
|
|
196
|
+
threshold = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
|
|
197
|
+
|
|
198
|
+
print(f"[WakeWord] Loading model: {wake_word}")
|
|
199
|
+
model = Model()
|
|
200
|
+
|
|
201
|
+
device_index = get_input_device()
|
|
202
|
+
audio = pyaudio.PyAudio()
|
|
203
|
+
|
|
204
|
+
if device_index is not None:
|
|
205
|
+
info = audio.get_device_info_by_index(device_index)
|
|
206
|
+
sample_rate = int(info.get('defaultSampleRate', SAMPLE_RATE))
|
|
207
|
+
print(f"[WakeWord] Using device: {info.get('name')} @ {sample_rate}Hz")
|
|
208
|
+
else:
|
|
209
|
+
sample_rate = SAMPLE_RATE
|
|
210
|
+
|
|
211
|
+
stream = audio.open(
|
|
212
|
+
format=pyaudio.paInt16,
|
|
213
|
+
channels=1,
|
|
214
|
+
rate=sample_rate,
|
|
215
|
+
input=True,
|
|
216
|
+
input_device_index=device_index,
|
|
217
|
+
frames_per_buffer=CHUNK_SIZE,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
print(f"[WakeWord] Listening for '{wake_word.replace('_', ' ')}'...")
|
|
221
|
+
|
|
222
|
+
running = True
|
|
223
|
+
cooldown = 0
|
|
224
|
+
|
|
225
|
+
def handle_signal(sig, frame):
|
|
226
|
+
nonlocal running
|
|
227
|
+
running = False
|
|
228
|
+
|
|
229
|
+
signal.signal(signal.SIGINT, handle_signal)
|
|
230
|
+
signal.signal(signal.SIGTERM, handle_signal)
|
|
231
|
+
|
|
232
|
+
try:
|
|
233
|
+
from scipy import signal as scipy_signal
|
|
234
|
+
need_resample = sample_rate != SAMPLE_RATE
|
|
235
|
+
except ImportError:
|
|
236
|
+
need_resample = False
|
|
237
|
+
|
|
238
|
+
while running:
|
|
239
|
+
try:
|
|
240
|
+
data = stream.read(CHUNK_SIZE, exception_on_overflow=False)
|
|
241
|
+
audio_array = np.frombuffer(data, dtype=np.int16)
|
|
242
|
+
|
|
243
|
+
if need_resample and sample_rate != SAMPLE_RATE:
|
|
244
|
+
num_samples = int(len(audio_array) * SAMPLE_RATE / sample_rate)
|
|
245
|
+
audio_array = scipy_signal.resample(audio_array, num_samples).astype(np.int16)
|
|
246
|
+
|
|
247
|
+
if cooldown > 0:
|
|
248
|
+
cooldown -= 1
|
|
249
|
+
continue
|
|
250
|
+
|
|
251
|
+
prediction = model.predict(audio_array)
|
|
252
|
+
score = prediction.get(wake_word, 0)
|
|
253
|
+
|
|
254
|
+
if score > threshold:
|
|
255
|
+
print(f"[WakeWord] Detected! (score: {score:.3f})")
|
|
256
|
+
TRIGGER_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
257
|
+
TRIGGER_FILE.write_text("triggered")
|
|
258
|
+
cooldown = int(SAMPLE_RATE / CHUNK_SIZE * 3) # 3 second cooldown
|
|
259
|
+
|
|
260
|
+
except Exception as e:
|
|
261
|
+
if running:
|
|
262
|
+
print(f"[WakeWord] Error: {e}", file=sys.stderr)
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
stream.stop_stream()
|
|
266
|
+
stream.close()
|
|
267
|
+
audio.terminate()
|
|
268
|
+
print("[WakeWord] Stopped")
|
|
269
|
+
|
|
270
|
+
if __name__ == "__main__":
|
|
271
|
+
main()
|
|
272
|
+
`;
|
|
273
|
+
/**
|
|
274
|
+
* Starts the wake word listener as a background Python process
|
|
275
|
+
*/
|
|
276
|
+
function startWakeWordListener(wakeWord = "hey_jarvis", threshold = 0.5) {
|
|
277
|
+
// Write the script to a temp file
|
|
278
|
+
const scriptPath = join(tmpdir(), "opencode-wakeword-listener.py");
|
|
279
|
+
try {
|
|
280
|
+
writeFileSync(scriptPath, WAKE_WORD_SCRIPT);
|
|
281
|
+
}
|
|
282
|
+
catch (err) {
|
|
283
|
+
console.error("[Voice Plugin] Failed to write wake word script:", err);
|
|
284
|
+
return null;
|
|
285
|
+
}
|
|
286
|
+
// Spawn Python process
|
|
287
|
+
const proc = spawn("python3", [scriptPath, wakeWord, threshold.toString()], {
|
|
288
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
289
|
+
detached: false,
|
|
290
|
+
});
|
|
291
|
+
proc.stdout?.on("data", (data) => {
|
|
292
|
+
const msg = data.toString().trim();
|
|
293
|
+
if (msg)
|
|
294
|
+
console.log(msg);
|
|
295
|
+
});
|
|
296
|
+
proc.stderr?.on("data", (data) => {
|
|
297
|
+
const msg = data.toString().trim();
|
|
298
|
+
if (msg)
|
|
299
|
+
console.error(msg);
|
|
300
|
+
});
|
|
301
|
+
proc.on("error", (err) => {
|
|
302
|
+
console.error("[Voice Plugin] Wake word listener failed to start:", err.message);
|
|
303
|
+
console.error("[Voice Plugin] Make sure Python 3 and dependencies are installed:");
|
|
304
|
+
console.error("[Voice Plugin] pip install openwakeword pyaudio numpy scipy");
|
|
305
|
+
});
|
|
306
|
+
proc.on("exit", (code) => {
|
|
307
|
+
if (code !== 0 && code !== null) {
|
|
308
|
+
console.error(`[Voice Plugin] Wake word listener exited with code ${code}`);
|
|
309
|
+
}
|
|
310
|
+
wakeWordProcess = null;
|
|
311
|
+
});
|
|
312
|
+
return proc;
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Stops the wake word listener process
|
|
316
|
+
*/
|
|
317
|
+
function stopWakeWordListener() {
|
|
318
|
+
if (wakeWordProcess) {
|
|
319
|
+
wakeWordProcess.kill("SIGTERM");
|
|
320
|
+
wakeWordProcess = null;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Sets up wake word trigger file watching
|
|
325
|
+
* When the trigger file is written, it records audio, transcribes it, and appends to the TUI prompt
|
|
326
|
+
*/
|
|
327
|
+
function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, client // OpenCode SDK client
|
|
328
|
+
) {
|
|
329
|
+
// Ensure the directory exists
|
|
330
|
+
const triggerDir = dirname(TRIGGER_FILE);
|
|
331
|
+
mkdirSync(triggerDir, { recursive: true });
|
|
332
|
+
// Clear any existing trigger
|
|
333
|
+
clearTriggerFile();
|
|
334
|
+
console.log("[Voice Plugin] Wake word trigger watcher enabled");
|
|
335
|
+
// Watch the directory for the trigger file
|
|
336
|
+
let isRecording = false;
|
|
337
|
+
watch(triggerDir, async (eventType, filename) => {
|
|
338
|
+
if (filename !== "voice_trigger" || isRecording)
|
|
339
|
+
return;
|
|
340
|
+
if (!existsSync(TRIGGER_FILE))
|
|
341
|
+
return;
|
|
342
|
+
isRecording = true;
|
|
343
|
+
console.log("[Voice Plugin] Wake word triggered! Recording...");
|
|
344
|
+
try {
|
|
345
|
+
// Clear the trigger file immediately
|
|
346
|
+
clearTriggerFile();
|
|
347
|
+
// Record and transcribe
|
|
348
|
+
const transcription = await recordAndTranscribe(apiKey, maxDuration, silenceDuration, language);
|
|
349
|
+
if (transcription && !transcription.startsWith("No speech detected")) {
|
|
350
|
+
console.log(`[Voice Plugin] Transcribed: "${transcription}"`);
|
|
351
|
+
// Append transcription to the TUI prompt
|
|
352
|
+
try {
|
|
353
|
+
await client.tui.appendPrompt({ body: { text: transcription } });
|
|
354
|
+
// Auto-submit the prompt
|
|
355
|
+
await client.tui.submitPrompt();
|
|
356
|
+
}
|
|
357
|
+
catch (err) {
|
|
358
|
+
console.error("[Voice Plugin] Failed to send to TUI:", err);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
catch (error) {
|
|
363
|
+
console.error("[Voice Plugin] Error:", error);
|
|
364
|
+
}
|
|
365
|
+
finally {
|
|
366
|
+
isRecording = false;
|
|
367
|
+
}
|
|
368
|
+
});
|
|
369
|
+
}
|
|
102
370
|
export const VoicePlugin = (options = {}) => async (ctx) => {
|
|
103
|
-
const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, } = options;
|
|
371
|
+
const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, enableWakeWord = true, wakeWord = "hey_jarvis", wakeWordThreshold = 0.5, } = options;
|
|
104
372
|
if (!apiKey) {
|
|
105
373
|
console.warn("[Voice Plugin] Warning: OPENAI_API_KEY not set. Voice transcription will fail.");
|
|
106
374
|
}
|
|
375
|
+
// Start wake word listener and set up file watcher if enabled
|
|
376
|
+
if (enableWakeWord && apiKey && ctx.client) {
|
|
377
|
+
// Start the Python wake word listener
|
|
378
|
+
wakeWordProcess = startWakeWordListener(wakeWord, wakeWordThreshold);
|
|
379
|
+
if (wakeWordProcess) {
|
|
380
|
+
console.log(`[Voice Plugin] Wake word listener started (say "${wakeWord.replace('_', ' ')}")`);
|
|
381
|
+
// Set up the trigger file watcher
|
|
382
|
+
setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, ctx.client);
|
|
383
|
+
// Clean up on process exit
|
|
384
|
+
process.on("exit", stopWakeWordListener);
|
|
385
|
+
process.on("SIGINT", stopWakeWordListener);
|
|
386
|
+
process.on("SIGTERM", stopWakeWordListener);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
107
389
|
return {
|
|
108
390
|
tool: {
|
|
109
391
|
voice: tool({
|
|
@@ -115,29 +397,14 @@ export const VoicePlugin = (options = {}) => async (ctx) => {
|
|
|
115
397
|
if (!apiKey) {
|
|
116
398
|
return "Error: OPENAI_API_KEY environment variable is not set. Please set it to use voice transcription.";
|
|
117
399
|
}
|
|
118
|
-
let audioFile = null;
|
|
119
400
|
try {
|
|
120
|
-
|
|
121
|
-
const transcription = await transcribeAudio(audioFile, apiKey, language);
|
|
122
|
-
if (!transcription || transcription.trim() === "") {
|
|
123
|
-
return "No speech detected. Please try again and speak clearly into your microphone.";
|
|
124
|
-
}
|
|
401
|
+
const transcription = await recordAndTranscribe(apiKey, maxDuration, silenceDuration, language);
|
|
125
402
|
return `Transcribed speech: "${transcription}"`;
|
|
126
403
|
}
|
|
127
404
|
catch (error) {
|
|
128
405
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
129
406
|
return `Voice recording/transcription failed: ${errorMessage}`;
|
|
130
407
|
}
|
|
131
|
-
finally {
|
|
132
|
-
if (audioFile) {
|
|
133
|
-
try {
|
|
134
|
-
unlinkSync(audioFile);
|
|
135
|
-
}
|
|
136
|
-
catch {
|
|
137
|
-
// Ignore cleanup errors
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
408
|
},
|
|
142
409
|
}),
|
|
143
410
|
},
|