@hxnnxs/opencode-voice 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/README.md +2 -2
- package/docs/README.es.md +2 -2
- package/docs/README.ru.md +2 -2
- package/docs/README.zh.md +2 -2
- package/index.js +6 -2
- package/lib/engine.js +88 -0
- package/package.json +4 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project are documented here.
|
|
4
4
|
|
|
5
|
+
## 0.1.5 - 2026-06-17
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Recovered missing `ffmpeg-static` at runtime on Windows by installing it locally when not present, preventing `No recorder found` failures after fresh plugin installs.
|
|
10
|
+
|
|
11
|
+
## 0.1.4 - 2026-06-16
|
|
12
|
+
|
|
13
|
+
### Added
|
|
14
|
+
|
|
15
|
+
- Added bundled `ffmpeg` fallback for Windows recorder flow via `ffmpeg-static` so voice input no longer depends on user-installed `ffmpeg`.
|
|
16
|
+
- Added Windows DirectShow microphone discovery and input handling in the recorder layer.
|
|
17
|
+
|
|
5
18
|
## 0.1.3 - 2026-06-15
|
|
6
19
|
|
|
7
20
|
### Fixed
|
package/README.md
CHANGED
|
@@ -112,7 +112,7 @@ Planned sidecar models:
|
|
|
112
112
|
| -------- | ------ |
|
|
113
113
|
| Linux | one-command engine/model install; recording uses `arecord`, `ffmpeg`, or `sox` |
|
|
114
114
|
| macOS | one-command engine/model install; recording uses `ffmpeg` AVFoundation until the native recorder sidecar ships |
|
|
115
|
-
| Windows | engine
|
|
115
|
+
| Windows | one-command engine/model install; recording uses `ffmpeg` with DirectShow (system or bundled `ffmpeg` fallback) |
|
|
116
116
|
|
|
117
117
|
### Architecture
|
|
118
118
|
|
|
@@ -139,7 +139,7 @@ Voice input needs native audio and STT binaries. The JS plugin manages OpenCode
|
|
|
139
139
|
- publish managed `whisper-cli` release assets before npm release
|
|
140
140
|
- Rust recorder sidecar with `cpal` and VAD
|
|
141
141
|
- Parakeet, GigaAM, SenseVoice, Canary, and Moonshine model support
|
|
142
|
-
- Windows recorder
|
|
142
|
+
- Windows recorder stability and UX polish
|
|
143
143
|
- faster streaming-style transcription
|
|
144
144
|
|
|
145
145
|
### Development
|
package/docs/README.es.md
CHANGED
|
@@ -110,7 +110,7 @@ Modelos sidecar planeados:
|
|
|
110
110
|
| ---------- | ------ |
|
|
111
111
|
| Linux | instalación engine/model en una orden; la grabación usa `arecord`, `ffmpeg` o `sox` |
|
|
112
112
|
| macOS | instalación engine/model en una orden; la grabación usa `ffmpeg` AVFoundation hasta el native recorder sidecar |
|
|
113
|
-
| Windows |
|
|
113
|
+
| Windows | instalación one-command de engine/model; grabación con `ffmpeg` + DirectShow (desde ffmpeg del sistema o fallback incluido) |
|
|
114
114
|
|
|
115
115
|
### Arquitectura
|
|
116
116
|
|
|
@@ -137,7 +137,7 @@ La entrada por voz necesita native audio y STT binaries. El plugin JS gestiona O
|
|
|
137
137
|
- publicar managed `whisper-cli` release assets antes del npm release
|
|
138
138
|
- Rust recorder sidecar con `cpal` y VAD
|
|
139
139
|
- soporte para Parakeet, GigaAM, SenseVoice, Canary y Moonshine
|
|
140
|
-
-
|
|
140
|
+
- Mejorar estabilidad y UX del recorder en Windows
|
|
141
141
|
- streaming-style transcription más rápida
|
|
142
142
|
|
|
143
143
|
### Desarrollo
|
package/docs/README.ru.md
CHANGED
|
@@ -110,7 +110,7 @@ Hold-to-talk отключен по умолчанию, потому что termi
|
|
|
110
110
|
| --------- | ------ |
|
|
111
111
|
| Linux | one-command engine/model install; запись использует `arecord`, `ffmpeg` или `sox` |
|
|
112
112
|
| macOS | one-command engine/model install; запись использует `ffmpeg` AVFoundation до native recorder sidecar |
|
|
113
|
-
| Windows |
|
|
113
|
+
| Windows | one-command engine/model install; запись через `ffmpeg` + DirectShow (через системный ffmpeg или встроенный fallback) |
|
|
114
114
|
|
|
115
115
|
### Архитектура
|
|
116
116
|
|
|
@@ -137,7 +137,7 @@ Voice input требует native audio и STT binaries. JS-плагин упр
|
|
|
137
137
|
- опубликовать managed `whisper-cli` release assets перед npm release
|
|
138
138
|
- Rust recorder sidecar с `cpal` и VAD
|
|
139
139
|
- поддержка Parakeet, GigaAM, SenseVoice, Canary и Moonshine
|
|
140
|
-
- Windows recorder
|
|
140
|
+
- Улучшение устойчивости и UX Windows recorder
|
|
141
141
|
- более быстрая streaming-style transcription
|
|
142
142
|
|
|
143
143
|
### Разработка
|
package/docs/README.zh.md
CHANGED
|
@@ -110,7 +110,7 @@ ctrl+r -> 停止、转写并插入文本
|
|
|
110
110
|
| ------- | ---- |
|
|
111
111
|
| Linux | 一条命令安装 engine/model;录音使用 `arecord`、`ffmpeg` 或 `sox` |
|
|
112
112
|
| macOS | 一条命令安装 engine/model;native recorder sidecar 发布前使用 `ffmpeg` AVFoundation |
|
|
113
|
-
| Windows | engine
|
|
113
|
+
| Windows | 下载 engine/model 一条命令完成;通过 `ffmpeg` + DirectShow 录音(使用系统 ffmpeg 或内置备用) |
|
|
114
114
|
|
|
115
115
|
### 架构
|
|
116
116
|
|
|
@@ -137,7 +137,7 @@ ctrl+r -> 停止、转写并插入文本
|
|
|
137
137
|
- npm release 前发布 managed `whisper-cli` release assets
|
|
138
138
|
- 使用 `cpal` 和 VAD 的 Rust recorder sidecar
|
|
139
139
|
- 支持 Parakeet、GigaAM、SenseVoice、Canary 和 Moonshine
|
|
140
|
-
- Windows
|
|
140
|
+
- 更完善的 Windows 录音体验和稳定性
|
|
141
141
|
- 更快的 streaming-style transcription
|
|
142
142
|
|
|
143
143
|
### 开发
|
package/index.js
CHANGED
|
@@ -325,6 +325,7 @@ function showLanguagePicker(ctx) {
|
|
|
325
325
|
|
|
326
326
|
function showMicrophonePicker(ctx) {
|
|
327
327
|
const settings = readSettings(ctx.api.kv);
|
|
328
|
+
const placeholder = process.platform === "win32" ? "default, audio=default, \"Microphone (Name)\"" : "default, hw:0,0, pulse, :0, ...";
|
|
328
329
|
const devices = listMicrophones();
|
|
329
330
|
setDialog(ctx, "large", () =>
|
|
330
331
|
ctx.api.ui.DialogSelect({
|
|
@@ -339,7 +340,7 @@ function showMicrophonePicker(ctx) {
|
|
|
339
340
|
if (option.value === "__custom") {
|
|
340
341
|
showPrompt(ctx, {
|
|
341
342
|
title: "Custom microphone device",
|
|
342
|
-
placeholder
|
|
343
|
+
placeholder,
|
|
343
344
|
value: settings.mic,
|
|
344
345
|
onConfirm: (value) => {
|
|
345
346
|
writeSetting(ctx.api.kv, "mic", value.trim());
|
|
@@ -361,10 +362,13 @@ function showDiagnostics(ctx) {
|
|
|
361
362
|
const model = getModel(settings.model);
|
|
362
363
|
const commandOptions = { ...ctx.options, downloadDir: settings.downloadDir };
|
|
363
364
|
const whisperCli = resolveCommand("whisper-cli", commandOptions);
|
|
365
|
+
const ffmpeg = resolveCommand("ffmpeg", commandOptions);
|
|
366
|
+
const arecord = resolveCommand("arecord", commandOptions);
|
|
367
|
+
const sox = resolveCommand("sox", commandOptions);
|
|
364
368
|
const engine = getEngineStatus("whisper.cpp", ctx.options, settings);
|
|
365
369
|
const lines = [
|
|
366
370
|
`Platform: ${process.platform}-${process.arch}`,
|
|
367
|
-
`Recorder: ffmpeg=${
|
|
371
|
+
`Recorder: ffmpeg=${ffmpeg ? "yes" : "no"}${ffmpeg ? ` (${ffmpeg})` : ""}, arecord=${arecord ? "yes" : "no"}, sox=${sox ? "yes" : "no"}`,
|
|
368
372
|
`Engine: ${engine.id}`,
|
|
369
373
|
`Engine source: ${engine.source}`,
|
|
370
374
|
`whisper-cli: ${whisperCli || "missing"}`,
|
package/lib/engine.js
CHANGED
|
@@ -1,10 +1,18 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import os from "node:os";
|
|
3
3
|
import path from "node:path";
|
|
4
|
+
import { createRequire } from "node:module";
|
|
4
5
|
import { spawn, spawnSync } from "node:child_process";
|
|
6
|
+
import { fileURLToPath } from "node:url";
|
|
5
7
|
import { ensureDir } from "./download.js";
|
|
6
8
|
import { getAudioDir, getEnginesDir, getModelPath } from "./models.js";
|
|
7
9
|
|
|
10
|
+
const require = createRequire(import.meta.url);
|
|
11
|
+
const FFMPEG_STATIC_PACKAGE = "ffmpeg-static@^5.2.0";
|
|
12
|
+
const PLUGIN_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
|
|
13
|
+
let ffmpegStaticPathCache;
|
|
14
|
+
let ffmpegStaticInstallAttempted = false;
|
|
15
|
+
|
|
8
16
|
const RECORDING_MIN_BYTES = 44;
|
|
9
17
|
|
|
10
18
|
function isExecutable(file) {
|
|
@@ -31,6 +39,44 @@ function executableNames(command) {
|
|
|
31
39
|
return [...new Set(names)];
|
|
32
40
|
}
|
|
33
41
|
|
|
42
|
+
function bundledFfmpegPath() {
|
|
43
|
+
if (process.platform !== "win32") return "";
|
|
44
|
+
if (ffmpegStaticPathCache !== undefined) return ffmpegStaticPathCache;
|
|
45
|
+
|
|
46
|
+
const resolveFromDependency = () => {
|
|
47
|
+
try {
|
|
48
|
+
const candidate = require("ffmpeg-static");
|
|
49
|
+
return typeof candidate === "string" ? path.resolve(candidate) : "";
|
|
50
|
+
} catch {
|
|
51
|
+
return "";
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
const installDependency = () => {
|
|
56
|
+
const npm = process.platform === "win32" ? "npm.cmd" : "npm";
|
|
57
|
+
try {
|
|
58
|
+
const result = spawnSync(npm, ["install", "--no-save", "--no-audit", "--no-fund", FFMPEG_STATIC_PACKAGE], {
|
|
59
|
+
cwd: PLUGIN_ROOT,
|
|
60
|
+
stdio: "ignore",
|
|
61
|
+
});
|
|
62
|
+
return result.status === 0;
|
|
63
|
+
} catch {
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
let candidate = resolveFromDependency();
|
|
69
|
+
if (!candidate && !ffmpegStaticInstallAttempted) {
|
|
70
|
+
ffmpegStaticInstallAttempted = true;
|
|
71
|
+
if (installDependency()) {
|
|
72
|
+
candidate = resolveFromDependency();
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
ffmpegStaticPathCache = candidate || "";
|
|
77
|
+
return ffmpegStaticPathCache;
|
|
78
|
+
}
|
|
79
|
+
|
|
34
80
|
function platformKey(options = {}) {
|
|
35
81
|
return `${options.platform || process.platform}-${options.arch || process.arch}`;
|
|
36
82
|
}
|
|
@@ -52,6 +98,11 @@ function candidateCommands(command, options = {}) {
|
|
|
52
98
|
if (process.env.OPENCODE_VOICE_WHISPER_CLI) candidates.push(process.env.OPENCODE_VOICE_WHISPER_CLI);
|
|
53
99
|
}
|
|
54
100
|
|
|
101
|
+
if (command === "ffmpeg") {
|
|
102
|
+
const bundled = bundledFfmpegPath();
|
|
103
|
+
if (bundled) candidates.push(bundled);
|
|
104
|
+
}
|
|
105
|
+
|
|
55
106
|
const bundledDir = getBundledEngineDir(command, options);
|
|
56
107
|
if (bundledDir) {
|
|
57
108
|
for (const name of executableNames(command)) candidates.push(path.join(bundledDir, name));
|
|
@@ -113,6 +164,23 @@ function childEnv(options = {}) {
|
|
|
113
164
|
};
|
|
114
165
|
}
|
|
115
166
|
|
|
167
|
+
function normalizeWindowsAudioInput(device = "") {
|
|
168
|
+
const value = String(device).trim();
|
|
169
|
+
if (!value || value === "default") return "audio=default";
|
|
170
|
+
if (value.startsWith("audio=") || value.startsWith("video=")) return value;
|
|
171
|
+
return `audio=${value}`;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function parseWindowsMicrophones(stderr) {
|
|
175
|
+
const devices = new Set();
|
|
176
|
+
for (const line of stderr.split(/\r?\n/)) {
|
|
177
|
+
const match = line.match(/"([^"]+)"\s+\(audio\)/);
|
|
178
|
+
if (match?.[1]) devices.add(match[1]);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return [...devices].filter(Boolean);
|
|
182
|
+
}
|
|
183
|
+
|
|
116
184
|
export function listMicrophones() {
|
|
117
185
|
if (process.platform === "linux" && commandExists("arecord")) {
|
|
118
186
|
const result = spawnSync("arecord", ["-L"], { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] });
|
|
@@ -135,6 +203,15 @@ export function listMicrophones() {
|
|
|
135
203
|
.map((id) => `:${id}`);
|
|
136
204
|
}
|
|
137
205
|
|
|
206
|
+
if (process.platform === "win32" && commandExists("ffmpeg")) {
|
|
207
|
+
const result = spawnSync("ffmpeg", ["-hide_banner", "-f", "dshow", "-list_devices", "true", "-i", "dummy"], {
|
|
208
|
+
encoding: "utf8",
|
|
209
|
+
stdio: ["ignore", "ignore", "pipe"],
|
|
210
|
+
});
|
|
211
|
+
const devices = parseWindowsMicrophones(result.stderr || "");
|
|
212
|
+
return ["default", ...devices.filter((device) => device !== "default")];
|
|
213
|
+
}
|
|
214
|
+
|
|
138
215
|
return ["default"];
|
|
139
216
|
}
|
|
140
217
|
|
|
@@ -174,6 +251,17 @@ function buildRecorders(file, settings = {}) {
|
|
|
174
251
|
});
|
|
175
252
|
}
|
|
176
253
|
|
|
254
|
+
if (process.platform === "win32" && commandExists("ffmpeg")) {
|
|
255
|
+
const inputs = [...new Set([normalizeWindowsAudioInput(mic), "audio=default"])];
|
|
256
|
+
for (const input of inputs) {
|
|
257
|
+
recorders.push({
|
|
258
|
+
label: `ffmpeg dshow (${input.replace(/^audio=/, "")})`,
|
|
259
|
+
command: "ffmpeg",
|
|
260
|
+
args: ["-hide_banner", "-loglevel", "error", "-y", "-f", "dshow", "-i", input, "-ac", "1", "-ar", "16000", file],
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
177
265
|
if (commandExists("sox")) {
|
|
178
266
|
recorders.push({
|
|
179
267
|
label: "sox default",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hxnnxs/opencode-voice",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "Local voice input plugin for OpenCode",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -45,6 +45,9 @@
|
|
|
45
45
|
"check": "node --check index.js && node --check lib/models.js && node --check lib/download.js && node --check lib/engine.js && node --check lib/engines.js && node --check bin/opencode-voice.js && node --check scripts/package-engine-asset.mjs && node --check scripts/build-engine-registry.mjs",
|
|
46
46
|
"prepack": "npm run check"
|
|
47
47
|
},
|
|
48
|
+
"dependencies": {
|
|
49
|
+
"ffmpeg-static": "^5.2.0"
|
|
50
|
+
},
|
|
48
51
|
"publishConfig": {
|
|
49
52
|
"access": "public",
|
|
50
53
|
"provenance": true
|