@hxnnxs/opencode-voice 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,19 @@
2
2
 
3
3
  All notable changes to this project are documented here.
4
4
 
5
+ ## 0.1.5 - 2026-06-17
6
+
7
+ ### Fixed
8
+
9
+ - Recovered missing `ffmpeg-static` at runtime on Windows by installing it locally when not present, preventing `No recorder found` failures after fresh plugin installs.
10
+
11
+ ## 0.1.4 - 2026-06-16
12
+
13
+ ### Added
14
+
15
+ - Added bundled `ffmpeg` fallback for Windows recorder flow via `ffmpeg-static` so voice input no longer depends on user-installed `ffmpeg`.
16
+ - Added Windows DirectShow microphone discovery and input handling in the recorder layer.
17
+
5
18
  ## 0.1.3 - 2026-06-15
6
19
 
7
20
  ### Fixed
package/README.md CHANGED
@@ -112,7 +112,7 @@ Planned sidecar models:
112
112
  | -------- | ------ |
113
113
  | Linux | one-command engine/model install; recording uses `arecord`, `ffmpeg`, or `sox` |
114
114
  | macOS | one-command engine/model install; recording uses `ffmpeg` AVFoundation until the native recorder sidecar ships |
115
- | Windows | engine download path ready; recording needs the native recorder sidecar |
115
+ | Windows | one-command engine/model install; recording uses `ffmpeg` with DirectShow (system or bundled `ffmpeg` fallback) |
116
116
 
117
117
  ### Architecture
118
118
 
@@ -139,7 +139,7 @@ Voice input needs native audio and STT binaries. The JS plugin manages OpenCode
139
139
  - publish managed `whisper-cli` release assets before npm release
140
140
  - Rust recorder sidecar with `cpal` and VAD
141
141
  - Parakeet, GigaAM, SenseVoice, Canary, and Moonshine model support
142
- - Windows recorder support
142
+ - Windows recorder stability and UX polish
143
143
  - faster streaming-style transcription
144
144
 
145
145
  ### Development
package/docs/README.es.md CHANGED
@@ -110,7 +110,7 @@ Modelos sidecar planeados:
110
110
  | ---------- | ------ |
111
111
  | Linux | instalación engine/model en una orden; la grabación usa `arecord`, `ffmpeg` o `sox` |
112
112
  | macOS | instalación engine/model en una orden; la grabación usa `ffmpeg` AVFoundation hasta el native recorder sidecar |
113
- | Windows | ruta de descarga de engine lista; la grabación necesita el native recorder sidecar |
113
+ | Windows | instalación one-command de engine/model; grabación con `ffmpeg` + DirectShow (desde ffmpeg del sistema o fallback incluido) |
114
114
 
115
115
  ### Arquitectura
116
116
 
@@ -137,7 +137,7 @@ La entrada por voz necesita native audio y STT binaries. El plugin JS gestiona O
137
137
  - publicar managed `whisper-cli` release assets antes del npm release
138
138
  - Rust recorder sidecar con `cpal` y VAD
139
139
  - soporte para Parakeet, GigaAM, SenseVoice, Canary y Moonshine
140
- - Windows recorder support
140
+ - Mejorar estabilidad y UX del recorder en Windows
141
141
  - streaming-style transcription más rápida
142
142
 
143
143
  ### Desarrollo
package/docs/README.ru.md CHANGED
@@ -110,7 +110,7 @@ Hold-to-talk отключен по умолчанию, потому что termi
110
110
  | --------- | ------ |
111
111
  | Linux | one-command engine/model install; запись использует `arecord`, `ffmpeg` или `sox` |
112
112
  | macOS | one-command engine/model install; запись использует `ffmpeg` AVFoundation до native recorder sidecar |
113
- | Windows | путь скачивания engine готов; recording ждёт native recorder sidecar |
113
+ | Windows | one-command engine/model install; запись через `ffmpeg` + DirectShow (через системный ffmpeg или встроенный fallback) |
114
114
 
115
115
  ### Архитектура
116
116
 
@@ -137,7 +137,7 @@ Voice input требует native audio и STT binaries. JS-плагин упр
137
137
  - опубликовать managed `whisper-cli` release assets перед npm release
138
138
  - Rust recorder sidecar с `cpal` и VAD
139
139
  - поддержка Parakeet, GigaAM, SenseVoice, Canary и Moonshine
140
- - Windows recorder support
140
+ - Улучшение устойчивости и UX Windows recorder
141
141
  - более быстрая streaming-style transcription
142
142
 
143
143
  ### Разработка
package/docs/README.zh.md CHANGED
@@ -110,7 +110,7 @@ ctrl+r -> 停止、转写并插入文本
110
110
  | ------- | ---- |
111
111
  | Linux | 一条命令安装 engine/model;录音使用 `arecord`、`ffmpeg` 或 `sox` |
112
112
  | macOS | 一条命令安装 engine/model;native recorder sidecar 发布前使用 `ffmpeg` AVFoundation |
113
- | Windows | engine 下载路径已准备好;录音还需要 native recorder sidecar |
113
+ | Windows | 下载 engine/model 一条命令完成;通过 `ffmpeg` + DirectShow 录音(使用系统 ffmpeg 或内置备用) |
114
114
 
115
115
  ### 架构
116
116
 
@@ -137,7 +137,7 @@ ctrl+r -> 停止、转写并插入文本
137
137
  - npm release 前发布 managed `whisper-cli` release assets
138
138
  - 使用 `cpal` 和 VAD 的 Rust recorder sidecar
139
139
  - 支持 Parakeet、GigaAM、SenseVoice、Canary 和 Moonshine
140
- - Windows recorder support
140
+ - 更完善的 Windows 录音体验和稳定性
141
141
  - 更快的 streaming-style transcription
142
142
 
143
143
  ### 开发
package/index.js CHANGED
@@ -325,6 +325,7 @@ function showLanguagePicker(ctx) {
325
325
 
326
326
  function showMicrophonePicker(ctx) {
327
327
  const settings = readSettings(ctx.api.kv);
328
+ const placeholder = process.platform === "win32" ? "default, audio=default, \"Microphone (Name)\"" : "default, hw:0,0, pulse, :0, ...";
328
329
  const devices = listMicrophones();
329
330
  setDialog(ctx, "large", () =>
330
331
  ctx.api.ui.DialogSelect({
@@ -339,7 +340,7 @@ function showMicrophonePicker(ctx) {
339
340
  if (option.value === "__custom") {
340
341
  showPrompt(ctx, {
341
342
  title: "Custom microphone device",
342
- placeholder: "default, hw:0,0, pulse, :0, ...",
343
+ placeholder,
343
344
  value: settings.mic,
344
345
  onConfirm: (value) => {
345
346
  writeSetting(ctx.api.kv, "mic", value.trim());
@@ -361,10 +362,13 @@ function showDiagnostics(ctx) {
361
362
  const model = getModel(settings.model);
362
363
  const commandOptions = { ...ctx.options, downloadDir: settings.downloadDir };
363
364
  const whisperCli = resolveCommand("whisper-cli", commandOptions);
365
+ const ffmpeg = resolveCommand("ffmpeg", commandOptions);
366
+ const arecord = resolveCommand("arecord", commandOptions);
367
+ const sox = resolveCommand("sox", commandOptions);
364
368
  const engine = getEngineStatus("whisper.cpp", ctx.options, settings);
365
369
  const lines = [
366
370
  `Platform: ${process.platform}-${process.arch}`,
367
- `Recorder: ffmpeg=${commandExists("ffmpeg") ? "yes" : "no"}, arecord=${commandExists("arecord") ? "yes" : "no"}, sox=${commandExists("sox") ? "yes" : "no"}`,
371
+ `Recorder: ffmpeg=${ffmpeg ? "yes" : "no"}${ffmpeg ? ` (${ffmpeg})` : ""}, arecord=${arecord ? "yes" : "no"}, sox=${sox ? "yes" : "no"}`,
368
372
  `Engine: ${engine.id}`,
369
373
  `Engine source: ${engine.source}`,
370
374
  `whisper-cli: ${whisperCli || "missing"}`,
package/lib/engine.js CHANGED
@@ -1,10 +1,18 @@
1
1
  import fs from "node:fs";
2
2
  import os from "node:os";
3
3
  import path from "node:path";
4
+ import { createRequire } from "node:module";
4
5
  import { spawn, spawnSync } from "node:child_process";
6
+ import { fileURLToPath } from "node:url";
5
7
  import { ensureDir } from "./download.js";
6
8
  import { getAudioDir, getEnginesDir, getModelPath } from "./models.js";
7
9
 
10
+ const require = createRequire(import.meta.url);
11
+ const FFMPEG_STATIC_PACKAGE = "ffmpeg-static@^5.2.0";
12
+ const PLUGIN_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
13
+ let ffmpegStaticPathCache;
14
+ let ffmpegStaticInstallAttempted = false;
15
+
8
16
  const RECORDING_MIN_BYTES = 44;
9
17
 
10
18
  function isExecutable(file) {
@@ -31,6 +39,44 @@ function executableNames(command) {
31
39
  return [...new Set(names)];
32
40
  }
33
41
 
42
+ function bundledFfmpegPath() {
43
+ if (process.platform !== "win32") return "";
44
+ if (ffmpegStaticPathCache !== undefined) return ffmpegStaticPathCache;
45
+
46
+ const resolveFromDependency = () => {
47
+ try {
48
+ const candidate = require("ffmpeg-static");
49
+ return typeof candidate === "string" ? path.resolve(candidate) : "";
50
+ } catch {
51
+ return "";
52
+ }
53
+ };
54
+
55
+ const installDependency = () => {
56
+ const npm = process.platform === "win32" ? "npm.cmd" : "npm";
57
+ try {
58
+ const result = spawnSync(npm, ["install", "--no-save", "--no-audit", "--no-fund", FFMPEG_STATIC_PACKAGE], {
59
+ cwd: PLUGIN_ROOT,
60
+ stdio: "ignore",
61
+ });
62
+ return result.status === 0;
63
+ } catch {
64
+ return false;
65
+ }
66
+ };
67
+
68
+ let candidate = resolveFromDependency();
69
+ if (!candidate && !ffmpegStaticInstallAttempted) {
70
+ ffmpegStaticInstallAttempted = true;
71
+ if (installDependency()) {
72
+ candidate = resolveFromDependency();
73
+ }
74
+ }
75
+
76
+ ffmpegStaticPathCache = candidate || "";
77
+ return ffmpegStaticPathCache;
78
+ }
79
+
34
80
  function platformKey(options = {}) {
35
81
  return `${options.platform || process.platform}-${options.arch || process.arch}`;
36
82
  }
@@ -52,6 +98,11 @@ function candidateCommands(command, options = {}) {
52
98
  if (process.env.OPENCODE_VOICE_WHISPER_CLI) candidates.push(process.env.OPENCODE_VOICE_WHISPER_CLI);
53
99
  }
54
100
 
101
+ if (command === "ffmpeg") {
102
+ const bundled = bundledFfmpegPath();
103
+ if (bundled) candidates.push(bundled);
104
+ }
105
+
55
106
  const bundledDir = getBundledEngineDir(command, options);
56
107
  if (bundledDir) {
57
108
  for (const name of executableNames(command)) candidates.push(path.join(bundledDir, name));
@@ -113,6 +164,23 @@ function childEnv(options = {}) {
113
164
  };
114
165
  }
115
166
 
167
+ function normalizeWindowsAudioInput(device = "") {
168
+ const value = String(device).trim();
169
+ if (!value || value === "default") return "audio=default";
170
+ if (value.startsWith("audio=") || value.startsWith("video=")) return value;
171
+ return `audio=${value}`;
172
+ }
173
+
174
+ function parseWindowsMicrophones(stderr) {
175
+ const devices = new Set();
176
+ for (const line of stderr.split(/\r?\n/)) {
177
+ const match = line.match(/"([^"]+)"\s+\(audio\)/);
178
+ if (match?.[1]) devices.add(match[1]);
179
+ }
180
+
181
+ return [...devices].filter(Boolean);
182
+ }
183
+
116
184
  export function listMicrophones() {
117
185
  if (process.platform === "linux" && commandExists("arecord")) {
118
186
  const result = spawnSync("arecord", ["-L"], { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] });
@@ -135,6 +203,15 @@ export function listMicrophones() {
135
203
  .map((id) => `:${id}`);
136
204
  }
137
205
 
206
+ if (process.platform === "win32" && commandExists("ffmpeg")) {
207
+ const result = spawnSync("ffmpeg", ["-hide_banner", "-f", "dshow", "-list_devices", "true", "-i", "dummy"], {
208
+ encoding: "utf8",
209
+ stdio: ["ignore", "ignore", "pipe"],
210
+ });
211
+ const devices = parseWindowsMicrophones(result.stderr || "");
212
+ return ["default", ...devices.filter((device) => device !== "default")];
213
+ }
214
+
138
215
  return ["default"];
139
216
  }
140
217
 
@@ -174,6 +251,17 @@ function buildRecorders(file, settings = {}) {
174
251
  });
175
252
  }
176
253
 
254
+ if (process.platform === "win32" && commandExists("ffmpeg")) {
255
+ const inputs = [...new Set([normalizeWindowsAudioInput(mic), "audio=default"])];
256
+ for (const input of inputs) {
257
+ recorders.push({
258
+ label: `ffmpeg dshow (${input.replace(/^audio=/, "")})`,
259
+ command: "ffmpeg",
260
+ args: ["-hide_banner", "-loglevel", "error", "-y", "-f", "dshow", "-i", input, "-ac", "1", "-ar", "16000", file],
261
+ });
262
+ }
263
+ }
264
+
177
265
  if (commandExists("sox")) {
178
266
  recorders.push({
179
267
  label: "sox default",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hxnnxs/opencode-voice",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "Local voice input plugin for OpenCode",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -45,6 +45,9 @@
45
45
  "check": "node --check index.js && node --check lib/models.js && node --check lib/download.js && node --check lib/engine.js && node --check lib/engines.js && node --check bin/opencode-voice.js && node --check scripts/package-engine-asset.mjs && node --check scripts/build-engine-registry.mjs",
46
46
  "prepack": "npm run check"
47
47
  },
48
+ "dependencies": {
49
+ "ffmpeg-static": "^5.2.0"
50
+ },
48
51
  "publishConfig": {
49
52
  "access": "public",
50
53
  "provenance": true