speech-opencode 1.1.2 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +5 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +211 -7
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -8,8 +8,12 @@ export interface VoicePluginOptions {
|
|
|
8
8
|
silenceDuration?: number;
|
|
9
9
|
/** Maximum recording duration in seconds as a safety timeout (default 300 = 5 minutes) */
|
|
10
10
|
maxDuration?: number;
|
|
11
|
-
/** Enable wake word
|
|
11
|
+
/** Enable wake word detection (default true). Requires Python + openwakeword */
|
|
12
12
|
enableWakeWord?: boolean;
|
|
13
|
+
/** Wake word to listen for (default "hey_jarvis"). Options: hey_jarvis, alexa, hey_mycroft */
|
|
14
|
+
wakeWord?: string;
|
|
15
|
+
/** Wake word detection threshold 0.0-1.0 (default 0.5, lower = more sensitive) */
|
|
16
|
+
wakeWordThreshold?: number;
|
|
13
17
|
}
|
|
14
18
|
export declare const VoicePlugin: (options?: VoicePluginOptions) => Plugin;
|
|
15
19
|
declare const _default: Plugin;
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;AA8GvD,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,+DAA+D;IAC/D,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,0FAA0F;IAC1F,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,gFAAgF;IAChF,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,kFAAkF;IAClF,iBAAiB,CAAC,EAAE,MAAM,CAAA;CAC3B;AA2UD,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,MAwEnC,CAAA;;AAGH,wBAA4B"}
|
package/dist/index.js
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
import { tool } from "@opencode-ai/plugin";
|
|
2
2
|
import OpenAI from "openai";
|
|
3
3
|
import { spawn } from "child_process";
|
|
4
|
-
import { unlinkSync, readFileSync, existsSync, watch, mkdirSync } from "fs";
|
|
4
|
+
import { unlinkSync, readFileSync, existsSync, watch, mkdirSync, writeFileSync } from "fs";
|
|
5
5
|
import { tmpdir, homedir } from "os";
|
|
6
6
|
import { join, dirname } from "path";
|
|
7
7
|
// Trigger file path for wake word integration
|
|
8
8
|
const TRIGGER_FILE = join(homedir(), ".cache", "opencode", "voice_trigger");
|
|
9
|
+
// Store reference to wake word listener process
|
|
10
|
+
let wakeWordProcess = null;
|
|
9
11
|
/**
|
|
10
12
|
* Records audio from the microphone with automatic silence detection.
|
|
11
13
|
* Recording stops after the specified silence duration.
|
|
@@ -138,6 +140,188 @@ function clearTriggerFile() {
|
|
|
138
140
|
// Ignore errors
|
|
139
141
|
}
|
|
140
142
|
}
|
|
143
|
+
/**
|
|
144
|
+
* Embedded Python wake word listener script
|
|
145
|
+
*/
|
|
146
|
+
const WAKE_WORD_SCRIPT = `
|
|
147
|
+
#!/usr/bin/env python3
|
|
148
|
+
"""Embedded wake word listener for speech-opencode"""
|
|
149
|
+
import sys
|
|
150
|
+
import signal
|
|
151
|
+
from pathlib import Path
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
import os
|
|
155
|
+
# Silence ONNX Runtime warnings
|
|
156
|
+
os.environ["ORT_LOGGING_LEVEL"] = "3"
|
|
157
|
+
|
|
158
|
+
import pyaudio
|
|
159
|
+
import numpy as np
|
|
160
|
+
from openwakeword.model import Model
|
|
161
|
+
except ImportError as e:
|
|
162
|
+
print(f"[WakeWord] Missing dependency: {e}", file=sys.stderr)
|
|
163
|
+
print("[WakeWord] Install with: pip install openwakeword pyaudio numpy scipy", file=sys.stderr)
|
|
164
|
+
sys.exit(1)
|
|
165
|
+
|
|
166
|
+
SAMPLE_RATE = 16000
|
|
167
|
+
CHUNK_SIZE = 1280
|
|
168
|
+
TRIGGER_FILE = Path.home() / ".cache" / "opencode" / "voice_trigger"
|
|
169
|
+
|
|
170
|
+
def get_input_device():
|
|
171
|
+
"""Find the best input device, preferring pipewire"""
|
|
172
|
+
p = pyaudio.PyAudio()
|
|
173
|
+
|
|
174
|
+
# First pass: look for pipewire (usually works best on modern Linux)
|
|
175
|
+
for i in range(p.get_device_count()):
|
|
176
|
+
info = p.get_device_info_by_index(i)
|
|
177
|
+
name = str(info.get("name", "")).lower()
|
|
178
|
+
if info.get("maxInputChannels", 0) > 0 and "pipewire" in name:
|
|
179
|
+
p.terminate()
|
|
180
|
+
return i
|
|
181
|
+
|
|
182
|
+
# Second pass: any non-monitor, non-bluetooth input
|
|
183
|
+
for i in range(p.get_device_count()):
|
|
184
|
+
info = p.get_device_info_by_index(i)
|
|
185
|
+
name = str(info.get("name", "")).lower()
|
|
186
|
+
if info.get("maxInputChannels", 0) > 0:
|
|
187
|
+
if "monitor" not in name and "bluez" not in name and "bluetooth" not in name:
|
|
188
|
+
p.terminate()
|
|
189
|
+
return i
|
|
190
|
+
|
|
191
|
+
p.terminate()
|
|
192
|
+
return None
|
|
193
|
+
|
|
194
|
+
def main():
|
|
195
|
+
wake_word = sys.argv[1] if len(sys.argv) > 1 else "hey_jarvis"
|
|
196
|
+
threshold = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
|
|
197
|
+
|
|
198
|
+
print(f"[WakeWord] Loading model: {wake_word}")
|
|
199
|
+
model = Model()
|
|
200
|
+
|
|
201
|
+
device_index = get_input_device()
|
|
202
|
+
audio = pyaudio.PyAudio()
|
|
203
|
+
|
|
204
|
+
if device_index is not None:
|
|
205
|
+
info = audio.get_device_info_by_index(device_index)
|
|
206
|
+
sample_rate = int(info.get('defaultSampleRate', SAMPLE_RATE))
|
|
207
|
+
print(f"[WakeWord] Using device: {info.get('name')} @ {sample_rate}Hz")
|
|
208
|
+
else:
|
|
209
|
+
sample_rate = SAMPLE_RATE
|
|
210
|
+
|
|
211
|
+
stream = audio.open(
|
|
212
|
+
format=pyaudio.paInt16,
|
|
213
|
+
channels=1,
|
|
214
|
+
rate=sample_rate,
|
|
215
|
+
input=True,
|
|
216
|
+
input_device_index=device_index,
|
|
217
|
+
frames_per_buffer=CHUNK_SIZE,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
print(f"[WakeWord] Listening for '{wake_word.replace('_', ' ')}'...")
|
|
221
|
+
|
|
222
|
+
running = True
|
|
223
|
+
cooldown = 0
|
|
224
|
+
|
|
225
|
+
def handle_signal(sig, frame):
|
|
226
|
+
nonlocal running
|
|
227
|
+
running = False
|
|
228
|
+
|
|
229
|
+
signal.signal(signal.SIGINT, handle_signal)
|
|
230
|
+
signal.signal(signal.SIGTERM, handle_signal)
|
|
231
|
+
|
|
232
|
+
try:
|
|
233
|
+
from scipy import signal as scipy_signal
|
|
234
|
+
need_resample = sample_rate != SAMPLE_RATE
|
|
235
|
+
except ImportError:
|
|
236
|
+
need_resample = False
|
|
237
|
+
|
|
238
|
+
while running:
|
|
239
|
+
try:
|
|
240
|
+
data = stream.read(CHUNK_SIZE, exception_on_overflow=False)
|
|
241
|
+
audio_array = np.frombuffer(data, dtype=np.int16)
|
|
242
|
+
|
|
243
|
+
if need_resample and sample_rate != SAMPLE_RATE:
|
|
244
|
+
num_samples = int(len(audio_array) * SAMPLE_RATE / sample_rate)
|
|
245
|
+
audio_array = scipy_signal.resample(audio_array, num_samples).astype(np.int16)
|
|
246
|
+
|
|
247
|
+
if cooldown > 0:
|
|
248
|
+
cooldown -= 1
|
|
249
|
+
continue
|
|
250
|
+
|
|
251
|
+
prediction = model.predict(audio_array)
|
|
252
|
+
score = prediction.get(wake_word, 0)
|
|
253
|
+
|
|
254
|
+
if score > threshold:
|
|
255
|
+
print(f"[WakeWord] Detected! (score: {score:.3f})")
|
|
256
|
+
TRIGGER_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
257
|
+
TRIGGER_FILE.write_text("triggered")
|
|
258
|
+
cooldown = int(SAMPLE_RATE / CHUNK_SIZE * 3) # 3 second cooldown
|
|
259
|
+
|
|
260
|
+
except Exception as e:
|
|
261
|
+
if running:
|
|
262
|
+
print(f"[WakeWord] Error: {e}", file=sys.stderr)
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
stream.stop_stream()
|
|
266
|
+
stream.close()
|
|
267
|
+
audio.terminate()
|
|
268
|
+
print("[WakeWord] Stopped")
|
|
269
|
+
|
|
270
|
+
if __name__ == "__main__":
|
|
271
|
+
main()
|
|
272
|
+
`;
|
|
273
|
+
/**
|
|
274
|
+
* Starts the wake word listener as a background Python process
|
|
275
|
+
*/
|
|
276
|
+
function startWakeWordListener(wakeWord = "hey_jarvis", threshold = 0.5) {
|
|
277
|
+
// Write the script to a temp file
|
|
278
|
+
const scriptPath = join(tmpdir(), "opencode-wakeword-listener.py");
|
|
279
|
+
try {
|
|
280
|
+
writeFileSync(scriptPath, WAKE_WORD_SCRIPT);
|
|
281
|
+
}
|
|
282
|
+
catch (err) {
|
|
283
|
+
console.error("[Voice Plugin] Failed to write wake word script:", err);
|
|
284
|
+
return null;
|
|
285
|
+
}
|
|
286
|
+
// Spawn Python process
|
|
287
|
+
const proc = spawn("python3", [scriptPath, wakeWord, threshold.toString()], {
|
|
288
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
289
|
+
detached: false,
|
|
290
|
+
});
|
|
291
|
+
proc.stdout?.on("data", (data) => {
|
|
292
|
+
const msg = data.toString().trim();
|
|
293
|
+
if (msg)
|
|
294
|
+
console.log(msg);
|
|
295
|
+
});
|
|
296
|
+
proc.stderr?.on("data", (data) => {
|
|
297
|
+
const msg = data.toString().trim();
|
|
298
|
+
// Filter out ONNX Runtime warnings about missing providers
|
|
299
|
+
if (msg && !msg.includes("UserWarning") && !msg.includes("onnxruntime") && !msg.includes("CUDAExecutionProvider") && !msg.includes("ALSA lib")) {
|
|
300
|
+
console.error(msg);
|
|
301
|
+
}
|
|
302
|
+
});
|
|
303
|
+
proc.on("error", (err) => {
|
|
304
|
+
console.error("[Voice Plugin] Wake word listener failed to start:", err.message);
|
|
305
|
+
console.error("[Voice Plugin] Make sure Python 3 and dependencies are installed:");
|
|
306
|
+
console.error("[Voice Plugin] pip install openwakeword pyaudio numpy scipy");
|
|
307
|
+
});
|
|
308
|
+
proc.on("exit", (code) => {
|
|
309
|
+
if (code !== 0 && code !== null) {
|
|
310
|
+
console.error(`[Voice Plugin] Wake word listener exited with code ${code}`);
|
|
311
|
+
}
|
|
312
|
+
wakeWordProcess = null;
|
|
313
|
+
});
|
|
314
|
+
return proc;
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Stops the wake word listener process
|
|
318
|
+
*/
|
|
319
|
+
function stopWakeWordListener() {
|
|
320
|
+
if (wakeWordProcess) {
|
|
321
|
+
wakeWordProcess.kill("SIGTERM");
|
|
322
|
+
wakeWordProcess = null;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
141
325
|
/**
|
|
142
326
|
* Sets up wake word trigger file watching
|
|
143
327
|
* When the trigger file is written, it records audio, transcribes it, and appends to the TUI prompt
|
|
@@ -149,9 +333,7 @@ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, cl
|
|
|
149
333
|
mkdirSync(triggerDir, { recursive: true });
|
|
150
334
|
// Clear any existing trigger
|
|
151
335
|
clearTriggerFile();
|
|
152
|
-
console.log("[Voice Plugin] Wake word watcher enabled");
|
|
153
|
-
console.log(`[Voice Plugin] Watching: ${TRIGGER_FILE}`);
|
|
154
|
-
console.log("[Voice Plugin] Run 'python wakeword/listener.py' to enable 'Hey Jarvis' wake word");
|
|
336
|
+
console.log("[Voice Plugin] Wake word trigger watcher enabled");
|
|
155
337
|
// Watch the directory for the trigger file
|
|
156
338
|
let isRecording = false;
|
|
157
339
|
watch(triggerDir, async (eventType, filename) => {
|
|
@@ -161,6 +343,18 @@ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, cl
|
|
|
161
343
|
return;
|
|
162
344
|
isRecording = true;
|
|
163
345
|
console.log("[Voice Plugin] Wake word triggered! Recording...");
|
|
346
|
+
// Show toast notification
|
|
347
|
+
try {
|
|
348
|
+
await client.tui.showToast({
|
|
349
|
+
body: {
|
|
350
|
+
message: "Wake word detected! Listening...",
|
|
351
|
+
variant: "info"
|
|
352
|
+
}
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
catch (err) {
|
|
356
|
+
console.error("[Voice Plugin] Failed to show toast:", err);
|
|
357
|
+
}
|
|
164
358
|
try {
|
|
165
359
|
// Clear the trigger file immediately
|
|
166
360
|
clearTriggerFile();
|
|
@@ -188,13 +382,23 @@ function setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, cl
|
|
|
188
382
|
});
|
|
189
383
|
}
|
|
190
384
|
export const VoicePlugin = (options = {}) => async (ctx) => {
|
|
191
|
-
const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, enableWakeWord = true, } = options;
|
|
385
|
+
const { apiKey = process.env.OPENAI_API_KEY, language, silenceDuration = 7, maxDuration = 300, enableWakeWord = true, wakeWord = "hey_jarvis", wakeWordThreshold = 0.5, } = options;
|
|
192
386
|
if (!apiKey) {
|
|
193
387
|
console.warn("[Voice Plugin] Warning: OPENAI_API_KEY not set. Voice transcription will fail.");
|
|
194
388
|
}
|
|
195
|
-
//
|
|
389
|
+
// Start wake word listener and set up file watcher if enabled
|
|
196
390
|
if (enableWakeWord && apiKey && ctx.client) {
|
|
197
|
-
|
|
391
|
+
// Start the Python wake word listener
|
|
392
|
+
wakeWordProcess = startWakeWordListener(wakeWord, wakeWordThreshold);
|
|
393
|
+
if (wakeWordProcess) {
|
|
394
|
+
console.log(`[Voice Plugin] Wake word listener started (say "${wakeWord.replace('_', ' ')}")`);
|
|
395
|
+
// Set up the trigger file watcher
|
|
396
|
+
setupWakeWordWatcher(apiKey, maxDuration, silenceDuration, language, ctx.client);
|
|
397
|
+
// Clean up on process exit
|
|
398
|
+
process.on("exit", stopWakeWordListener);
|
|
399
|
+
process.on("SIGINT", stopWakeWordListener);
|
|
400
|
+
process.on("SIGTERM", stopWakeWordListener);
|
|
401
|
+
}
|
|
198
402
|
}
|
|
199
403
|
return {
|
|
200
404
|
tool: {
|