npm - @hxnnxs/opencode-voice - Versions diffs - 0.1.3 → 0.1.5 - Mend

@hxnnxs/opencode-voice 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,19 @@
 All notable changes to this project are documented here.
+## 0.1.5 - 2026-06-17
+### Fixed
+- Recovered missing `ffmpeg-static` at runtime on Windows by installing it locally when not present, preventing `No recorder found` failures after fresh plugin installs.
+## 0.1.4 - 2026-06-16
+### Added
+- Added bundled `ffmpeg` fallback for Windows recorder flow via `ffmpeg-static` so voice input no longer depends on user-installed `ffmpeg`.
+- Added Windows DirectShow microphone discovery and input handling in the recorder layer.
 ## 0.1.3 - 2026-06-15
 ### Fixed

package/README.md CHANGED Viewed

@@ -112,7 +112,7 @@ Planned sidecar models:
 | -------- | ------ |
 | Linux    | one-command engine/model install; recording uses `arecord`, `ffmpeg`, or `sox` |
 | macOS    | one-command engine/model install; recording uses `ffmpeg` AVFoundation until the native recorder sidecar ships |
-| Windows  | engine download path ready; recording needs the native recorder sidecar |
+| Windows  | one-command engine/model install; recording uses `ffmpeg` with DirectShow (system or bundled `ffmpeg` fallback) |
 ### Architecture
@@ -139,7 +139,7 @@ Voice input needs native audio and STT binaries. The JS plugin manages OpenCode
 - publish managed `whisper-cli` release assets before npm release
 - Rust recorder sidecar with `cpal` and VAD
 - Parakeet, GigaAM, SenseVoice, Canary, and Moonshine model support
-- Windows recorder support
+- Windows recorder stability and UX polish
 - faster streaming-style transcription
 ### Development

package/docs/README.es.md CHANGED Viewed

@@ -110,7 +110,7 @@ Modelos sidecar planeados:
 | ---------- | ------ |
 | Linux      | instalación engine/model en una orden; la grabación usa `arecord`, `ffmpeg` o `sox` |
 | macOS      | instalación engine/model en una orden; la grabación usa `ffmpeg` AVFoundation hasta el native recorder sidecar |
-| Windows    | ruta de descarga de engine lista; la grabación necesita el native recorder sidecar |
+| Windows    | instalación one-command de engine/model; grabación con `ffmpeg` + DirectShow (desde ffmpeg del sistema o fallback incluido) |
 ### Arquitectura
@@ -137,7 +137,7 @@ La entrada por voz necesita native audio y STT binaries. El plugin JS gestiona O
 - publicar managed `whisper-cli` release assets antes del npm release
 - Rust recorder sidecar con `cpal` y VAD
 - soporte para Parakeet, GigaAM, SenseVoice, Canary y Moonshine
-- Windows recorder support
+- Mejorar estabilidad y UX del recorder en Windows
 - streaming-style transcription más rápida
 ### Desarrollo

package/docs/README.ru.md CHANGED Viewed

@@ -110,7 +110,7 @@ Hold-to-talk отключен по умолчанию, потому что termi
 | --------- | ------ |
 | Linux     | one-command engine/model install; запись использует `arecord`, `ffmpeg` или `sox` |
 | macOS     | one-command engine/model install; запись использует `ffmpeg` AVFoundation до native recorder sidecar |
-| Windows   | путь скачивания engine готов; recording ждёт native recorder sidecar |
+| Windows   | one-command engine/model install; запись через `ffmpeg` + DirectShow (через системный ffmpeg или встроенный fallback) |
 ### Архитектура
@@ -137,7 +137,7 @@ Voice input требует native audio и STT binaries. JS-плагин упр
 - опубликовать managed `whisper-cli` release assets перед npm release
 - Rust recorder sidecar с `cpal` и VAD
 - поддержка Parakeet, GigaAM, SenseVoice, Canary и Moonshine
-- Windows recorder support
+- Улучшение устойчивости и UX Windows recorder
 - более быстрая streaming-style transcription
 ### Разработка

package/docs/README.zh.md CHANGED Viewed

@@ -110,7 +110,7 @@ ctrl+r -> 停止、转写并插入文本
 | ------- | ---- |
 | Linux   | 一条命令安装 engine/model；录音使用 `arecord`、`ffmpeg` 或 `sox` |
 | macOS   | 一条命令安装 engine/model；native recorder sidecar 发布前使用 `ffmpeg` AVFoundation |
-| Windows | engine 下载路径已准备好；录音还需要 native recorder sidecar |
+| Windows | 下载 engine/model 一条命令完成；通过 `ffmpeg` + DirectShow 录音（使用系统 ffmpeg 或内置备用） |
 ### 架构
@@ -137,7 +137,7 @@ ctrl+r -> 停止、转写并插入文本
 - npm release 前发布 managed `whisper-cli` release assets
 - 使用 `cpal` 和 VAD 的 Rust recorder sidecar
 - 支持 Parakeet、GigaAM、SenseVoice、Canary 和 Moonshine
-- Windows recorder support
+- 更完善的 Windows 录音体验和稳定性
 - 更快的 streaming-style transcription
 ### 开发

package/index.js CHANGED Viewed

@@ -325,6 +325,7 @@ function showLanguagePicker(ctx) {
 function showMicrophonePicker(ctx) {
   const settings = readSettings(ctx.api.kv);
+  const placeholder = process.platform === "win32" ? "default, audio=default, \"Microphone (Name)\"" : "default, hw:0,0, pulse, :0, ...";
   const devices = listMicrophones();
   setDialog(ctx, "large", () =>
     ctx.api.ui.DialogSelect({
@@ -339,7 +340,7 @@ function showMicrophonePicker(ctx) {
         if (option.value === "__custom") {
           showPrompt(ctx, {
             title: "Custom microphone device",
-            placeholder: "default, hw:0,0, pulse, :0, ...",
+            placeholder,
             value: settings.mic,
             onConfirm: (value) => {
               writeSetting(ctx.api.kv, "mic", value.trim());
@@ -361,10 +362,13 @@ function showDiagnostics(ctx) {
   const model = getModel(settings.model);
   const commandOptions = { ...ctx.options, downloadDir: settings.downloadDir };
   const whisperCli = resolveCommand("whisper-cli", commandOptions);
+  const ffmpeg = resolveCommand("ffmpeg", commandOptions);
+  const arecord = resolveCommand("arecord", commandOptions);
+  const sox = resolveCommand("sox", commandOptions);
   const engine = getEngineStatus("whisper.cpp", ctx.options, settings);
   const lines = [
     `Platform: ${process.platform}-${process.arch}`,
-    `Recorder: ffmpeg=${commandExists("ffmpeg") ? "yes" : "no"}, arecord=${commandExists("arecord") ? "yes" : "no"}, sox=${commandExists("sox") ? "yes" : "no"}`,
+    `Recorder: ffmpeg=${ffmpeg ? "yes" : "no"}${ffmpeg ? ` (${ffmpeg})` : ""}, arecord=${arecord ? "yes" : "no"}, sox=${sox ? "yes" : "no"}`,
     `Engine: ${engine.id}`,
     `Engine source: ${engine.source}`,
     `whisper-cli: ${whisperCli || "missing"}`,

package/lib/engine.js CHANGED Viewed

@@ -1,10 +1,18 @@
 import fs from "node:fs";
 import os from "node:os";
 import path from "node:path";
+import { createRequire } from "node:module";
 import { spawn, spawnSync } from "node:child_process";
+import { fileURLToPath } from "node:url";
 import { ensureDir } from "./download.js";
 import { getAudioDir, getEnginesDir, getModelPath } from "./models.js";
+const require = createRequire(import.meta.url);
+const FFMPEG_STATIC_PACKAGE = "ffmpeg-static@^5.2.0";
+const PLUGIN_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
+let ffmpegStaticPathCache;
+let ffmpegStaticInstallAttempted = false;
 const RECORDING_MIN_BYTES = 44;
 function isExecutable(file) {
@@ -31,6 +39,44 @@ function executableNames(command) {
   return [...new Set(names)];
 }
+function bundledFfmpegPath() {
+  if (process.platform !== "win32") return "";
+  if (ffmpegStaticPathCache !== undefined) return ffmpegStaticPathCache;
+  const resolveFromDependency = () => {
+    try {
+      const candidate = require("ffmpeg-static");
+      return typeof candidate === "string" ? path.resolve(candidate) : "";
+    } catch {
+      return "";
+    }
+  };
+  const installDependency = () => {
+    const npm = process.platform === "win32" ? "npm.cmd" : "npm";
+    try {
+      const result = spawnSync(npm, ["install", "--no-save", "--no-audit", "--no-fund", FFMPEG_STATIC_PACKAGE], {
+        cwd: PLUGIN_ROOT,
+        stdio: "ignore",
+      });
+      return result.status === 0;
+    } catch {
+      return false;
+    }
+  };
+  let candidate = resolveFromDependency();
+  if (!candidate && !ffmpegStaticInstallAttempted) {
+    ffmpegStaticInstallAttempted = true;
+    if (installDependency()) {
+      candidate = resolveFromDependency();
+    }
+  }
+  ffmpegStaticPathCache = candidate || "";
+  return ffmpegStaticPathCache;
+}
 function platformKey(options = {}) {
   return `${options.platform || process.platform}-${options.arch || process.arch}`;
 }
@@ -52,6 +98,11 @@ function candidateCommands(command, options = {}) {
     if (process.env.OPENCODE_VOICE_WHISPER_CLI) candidates.push(process.env.OPENCODE_VOICE_WHISPER_CLI);
   }
+  if (command === "ffmpeg") {
+    const bundled = bundledFfmpegPath();
+    if (bundled) candidates.push(bundled);
+  }
   const bundledDir = getBundledEngineDir(command, options);
   if (bundledDir) {
     for (const name of executableNames(command)) candidates.push(path.join(bundledDir, name));
@@ -113,6 +164,23 @@ function childEnv(options = {}) {
   };
 }
+function normalizeWindowsAudioInput(device = "") {
+  const value = String(device).trim();
+  if (!value || value === "default") return "audio=default";
+  if (value.startsWith("audio=") || value.startsWith("video=")) return value;
+  return `audio=${value}`;
+}
+function parseWindowsMicrophones(stderr) {
+  const devices = new Set();
+  for (const line of stderr.split(/\r?\n/)) {
+    const match = line.match(/"([^"]+)"\s+\(audio\)/);
+    if (match?.[1]) devices.add(match[1]);
+  }
+  return [...devices].filter(Boolean);
+}
 export function listMicrophones() {
   if (process.platform === "linux" && commandExists("arecord")) {
     const result = spawnSync("arecord", ["-L"], { encoding: "utf8", stdio: ["ignore", "pipe", "ignore"] });
@@ -135,6 +203,15 @@ export function listMicrophones() {
       .map((id) => `:${id}`);
   }
+  if (process.platform === "win32" && commandExists("ffmpeg")) {
+    const result = spawnSync("ffmpeg", ["-hide_banner", "-f", "dshow", "-list_devices", "true", "-i", "dummy"], {
+      encoding: "utf8",
+      stdio: ["ignore", "ignore", "pipe"],
+    });
+    const devices = parseWindowsMicrophones(result.stderr || "");
+    return ["default", ...devices.filter((device) => device !== "default")];
+  }
   return ["default"];
 }
@@ -174,6 +251,17 @@ function buildRecorders(file, settings = {}) {
     });
   }
+  if (process.platform === "win32" && commandExists("ffmpeg")) {
+    const inputs = [...new Set([normalizeWindowsAudioInput(mic), "audio=default"])];
+    for (const input of inputs) {
+      recorders.push({
+        label: `ffmpeg dshow (${input.replace(/^audio=/, "")})`,
+        command: "ffmpeg",
+        args: ["-hide_banner", "-loglevel", "error", "-y", "-f", "dshow", "-i", input, "-ac", "1", "-ar", "16000", file],
+      });
+    }
+  }
   if (commandExists("sox")) {
     recorders.push({
       label: "sox default",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hxnnxs/opencode-voice",
-  "version": "0.1.3",
+  "version": "0.1.5",
   "description": "Local voice input plugin for OpenCode",
   "type": "module",
   "license": "MIT",
@@ -45,6 +45,9 @@
     "check": "node --check index.js && node --check lib/models.js && node --check lib/download.js && node --check lib/engine.js && node --check lib/engines.js && node --check bin/opencode-voice.js && node --check scripts/package-engine-asset.mjs && node --check scripts/build-engine-registry.mjs",
     "prepack": "npm run check"
   },
+  "dependencies": {
+    "ffmpeg-static": "^5.2.0"
+  },
   "publishConfig": {
     "access": "public",
     "provenance": true