npm - opencode-voice - Versions diffs - 0.1.0 - Mend

opencode-voice 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/LICENSE +21 -0
package/README.md +142 -0
package/package.json +31 -0
package/src/audio/detector.ts +146 -0
package/src/audio/recorder.ts +118 -0
package/src/config.ts +72 -0
package/src/index.ts +177 -0
package/src/providers/chunked.ts +143 -0
package/src/providers/deepgram.ts +74 -0
package/src/providers/factory.ts +20 -0
package/src/providers/groq.ts +57 -0
package/src/providers/openai.ts +57 -0
package/src/providers/streaming.ts +187 -0
package/src/providers/wav-utils.ts +53 -0
package/src/session.ts +147 -0
package/src/types.ts +90 -0
package/src/ui/recording-indicator.tsx +83 -0

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 opencode-voice contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,142 @@
+# opencode-voice
+> Speech-to-text plugin for [OpenCode](https://opencode.ai) — speak into your microphone and see your words appear in the prompt field in real-time.
+## Features
+- 🎙️ **Real-time transcription** — words appear as you speak
+- 🔌 **3 providers** — Deepgram (streaming), Groq Whisper (fast), OpenAI Whisper
+- 🖥️ **Cross-platform** — macOS, Linux, Windows
+- ⌨️ **Simple toggle** — `Ctrl+Shift+V` to start/stop
+- 👁️ **Live preview** — interim text shown in overlay, final text inserted in prompt
+- ✏️ **Review before send** — text goes to prompt for editing, never auto-sent
+## Requirements
+A recording tool must be installed on your system:
+| Platform | Recommended | Install                                              |
+| -------- | ----------- | ---------------------------------------------------- |
+| macOS    | SoX         | `brew install sox`                                   |
+| Linux    | SoX         | `sudo apt install sox`                               |
+| Windows  | SoX         | `winget install sox` or `choco install sox.portable` |
+> **Fallback**: FFmpeg is also supported on macOS and Linux.
+## Installation
+Add to your OpenCode config (`~/.config/opencode/opencode.json`):
+```json
+{
+  "plugin": [["opencode-voice", { "provider": "deepgram" }]]
+}
+```
+Or without options (configure via environment variables only):
+```json
+{
+  "plugin": ["opencode-voice"]
+}
+```
+## Configuration
+### Plugin Options (in opencode.json)
+| Option            | Type                                           | Default                      | Description                         |
+| ----------------- | ---------------------------------------------- | ---------------------------- | ----------------------------------- |
+| `provider`        | `"deepgram"` \| `"groq"` \| `"openai-whisper"` | —                            | STT provider to use                 |
+| `language`        | `string`                                       | auto-detect                  | Language code (e.g. `"en"`, `"fr"`) |
+| `chunkDurationMs` | `number`                                       | 5000 (Groq) / 10000 (OpenAI) | Chunk size for HTTP providers       |
+### Environment Variables
+| Variable                  | Description                                              |
+| ------------------------- | -------------------------------------------------------- |
+| `OPENCODE_VOICE_PROVIDER` | Override provider (takes precedence over plugin options) |
+| `OPENCODE_VOICE_LANGUAGE` | Override language                                        |
+| `DEEPGRAM_API_KEY`        | Deepgram API key                                         |
+| `GROQ_API_KEY`            | Groq API key                                             |
+| `OPENAI_API_KEY`          | OpenAI API key                                           |
+> **Security**: API keys are read from environment variables only — never stored in config files.
+## Usage
+1. Press **`Ctrl+Shift+V`** to start recording
+2. Speak — you'll see a `● Recording...` indicator with live preview
+3. Press **`Ctrl+Shift+V`** again to stop
+4. The transcribed text appears in the prompt field
+5. Review/edit, then press **Enter** to send
+## Providers
+| Provider           | Protocol  | Latency | Interim Results    | Best For            |
+| ------------------ | --------- | ------- | ------------------ | ------------------- |
+| **Deepgram**       | WebSocket | ~100ms  | ✅ Yes             | Real-time streaming |
+| **Groq**           | HTTP      | ~200ms  | ❌ No (5s chunks)  | Speed + cost        |
+| **OpenAI Whisper** | HTTP      | ~500ms  | ❌ No (10s chunks) | Accuracy            |
+### Deepgram (Recommended)
+Best real-time experience. Uses WebSocket streaming with interim results.
+```json
+["opencode-voice", { "provider": "deepgram" }]
+```
+Get a free API key at [deepgram.com](https://deepgram.com).
+### Groq Whisper
+Ultra-fast HTTP transcription (189x realtime). Good balance of speed and cost.
+```json
+["opencode-voice", { "provider": "groq" }]
+```
+Get a free API key at [console.groq.com](https://console.groq.com).
+### OpenAI Whisper
+Most widely used. Requires an OpenAI API key.
+```json
+["opencode-voice", { "provider": "openai-whisper" }]
+```
+## Troubleshooting
+### "No recording tool found"
+Install SoX for your platform (see Requirements above).
+### "Invalid API key"
+Check that your API key environment variable is set correctly:
+```bash
+echo $DEEPGRAM_API_KEY  # or GROQ_API_KEY / OPENAI_API_KEY
+```
+### No microphone input
+- Check that your microphone is connected and set as the default input device
+- On Linux, ensure PulseAudio is running: `pulseaudio --check`
+- On macOS, grant microphone permissions to your terminal app
+### Text appears in wrong position
+The transcribed text is inserted at the cursor position in the prompt. If you've typed text before recording, the transcription will be appended after it.
+## Roadmap (v2)
+- Google Cloud Speech (gRPC streaming)
+- Local Whisper (whisper.cpp, no API key needed)
+- OpenAI Realtime API (WebSocket, ultra-low latency)
+## License
+MIT

package/package.json ADDED Viewed

@@ -0,0 +1,31 @@
+{
+  "name": "opencode-voice",
+  "version": "0.1.0",
+  "type": "module",
+  "description": "Speech-to-text plugin for OpenCode — voice input with Deepgram, Groq, and OpenAI Whisper",
+  "keywords": ["opencode", "plugin", "voice", "speech-to-text", "stt", "whisper", "deepgram"],
+  "license": "MIT",
+  "author": "unknoownu",
+  "exports": {
+    "./tui": "./src/index.ts"
+  },
+  "files": ["src/", "README.md", "LICENSE"],
+  "engines": {
+    "opencode": ">=1.3.0"
+  },
+  "peerDependencies": {
+    "@opencode-ai/plugin": ">=1.3.0",
+    "@opentui/core": ">=0.1.95",
+    "@opentui/solid": ">=0.1.95",
+    "solid-js": ">=1.0.0"
+  },
+  "devDependencies": {
+    "@opencode-ai/plugin": "1.3.13",
+    "@opencode-ai/sdk": "1.3.13",
+    "@opentui/core": "0.1.95",
+    "@opentui/solid": "0.1.95",
+    "@types/bun": "latest",
+    "solid-js": "latest",
+    "typescript": "^5.0.0"
+  }
+}

package/src/audio/detector.ts ADDED Viewed

@@ -0,0 +1,146 @@
+import type { RecordingTool } from "../types.ts";
+export type DetectedTool = { tool: RecordingTool; path: string };
+/**
+ * Detect available recording tool in priority order (platform-aware).
+ * @param platform - Override platform (defaults to process.platform)
+ */
+export async function detectRecordingTool(
+  platform: string = process.platform,
+): Promise<DetectedTool | null> {
+  // Always check sox first (highest priority)
+  const sox = Bun.which("sox");
+  if (sox) return { tool: "sox", path: sox };
+  // FFmpeg — macOS and Linux only (Windows needs device enumeration)
+  if (platform !== "win32") {
+    const ffmpeg = Bun.which("ffmpeg");
+    if (ffmpeg) return { tool: "ffmpeg", path: ffmpeg };
+  }
+  // Linux-only: arecord (ALSA)
+  if (platform === "linux") {
+    const arecord = Bun.which("arecord");
+    if (arecord) return { tool: "arecord", path: arecord };
+  }
+  // Windows-only: PowerShell fallback
+  if (platform === "win32") {
+    const pwsh = Bun.which("powershell") ?? Bun.which("pwsh");
+    if (pwsh) return { tool: "powershell", path: pwsh };
+  }
+  return null;
+}
+/**
+ * Build the recording command arguments for the given tool and platform.
+ * Returns an array suitable for Bun.spawn([cmd, ...args]).
+ *
+ * @param tool - The recording tool to use
+ * @param sampleRate - Sample rate in Hz (default 16000)
+ * @param platform - Override platform (defaults to process.platform)
+ */
+export function buildRecordCommand(
+  tool: RecordingTool,
+  sampleRate: number = 16000,
+  platform: string = process.platform,
+): string[] {
+  switch (tool) {
+    case "sox": {
+      // Windows uses waveaudio driver; *nix uses default device (-d)
+      const input = platform === "win32" ? ["-t", "waveaudio", "0"] : ["-d"];
+      return [
+        "sox",
+        ...input,
+        "-t",
+        "raw",
+        "-b",
+        "16",
+        "-e",
+        "signed-integer",
+        "-c",
+        "1",
+        "-r",
+        String(sampleRate),
+        "-",
+      ];
+    }
+    case "ffmpeg": {
+      // macOS: AVFoundation; Linux: PulseAudio
+      if (platform === "darwin") {
+        return [
+          "ffmpeg",
+          "-f",
+          "avfoundation",
+          "-i",
+          ":default",
+          "-ar",
+          String(sampleRate),
+          "-ac",
+          "1",
+          "-f",
+          "s16le",
+          "-",
+        ];
+      } else {
+        return [
+          "ffmpeg",
+          "-f",
+          "pulse",
+          "-i",
+          "default",
+          "-ar",
+          String(sampleRate),
+          "-ac",
+          "1",
+          "-f",
+          "s16le",
+          "-",
+        ];
+      }
+    }
+    case "arecord": {
+      return [
+        "arecord",
+        "-f",
+        "S16_LE",
+        "-r",
+        String(sampleRate),
+        "-c",
+        "1",
+        "-t",
+        "raw",
+      ];
+    }
+    case "powershell": {
+      // Windows fallback — basic PowerShell audio capture
+      return [
+        "powershell",
+        "-NoProfile",
+        "-Command",
+        "# PowerShell audio capture — install sox for better results",
+      ];
+    }
+    default: {
+      throw new Error(`Unknown recording tool: ${tool}`);
+    }
+  }
+}
+/**
+ * Returns platform-specific install instructions for SoX.
+ */
+export function getInstallInstructions(platform: string): string {
+  switch (platform) {
+    case "darwin":
+      return "brew install sox";
+    case "linux":
+      return "sudo apt install sox  (Debian/Ubuntu)\nsudo dnf install sox  (Fedora/RHEL)";
+    case "win32":
+      return "winget install sox  OR  choco install sox.portable";
+    default:
+      return "Install sox from https://sox.sourceforge.net";
+  }
+}

package/src/audio/recorder.ts ADDED Viewed

@@ -0,0 +1,118 @@
+import {
+  RecorderError,
+  type RecorderOptions,
+  type RecordingTool,
+} from "../types";
+import { buildRecordCommand } from "./detector";
+type SpawnedProcess = ReturnType<typeof Bun.spawn>;
+type SpawnFn = typeof Bun.spawn;
+export class Recorder {
+  private proc: SpawnedProcess | null = null;
+  private recording = false;
+  private errorCallback: ((err: Error) => void) | null = null;
+  private readonly options: Required<RecorderOptions>;
+  constructor(
+    private readonly tool: RecordingTool,
+    private readonly toolPath: string,
+    options?: RecorderOptions,
+    private readonly _spawn: SpawnFn = Bun.spawn,
+  ) {
+    this.options = {
+      sampleRate: options?.sampleRate ?? 16000,
+      channels: options?.channels ?? 1,
+      bitDepth: options?.bitDepth ?? 16,
+    };
+  }
+  async start(): Promise<ReadableStream<Uint8Array>> {
+    if (this.recording) {
+      throw new RecorderError("Already recording");
+    }
+    const cmd = buildRecordCommand(this.tool, this.options.sampleRate);
+    const fullCmd = [this.toolPath, ...cmd.slice(1)];
+    const proc = this._spawn(fullCmd, {
+      stdout: "pipe",
+      stderr: "pipe",
+    });
+    this.proc = proc;
+    await Bun.sleep(300);
+    if (proc.exitCode !== null) {
+      this.proc = null;
+      throw new RecorderError(
+        "Recording tool exited unexpectedly. Check that your microphone is connected.",
+      );
+    }
+    if (!proc.stdout) {
+      this.proc = null;
+      throw new RecorderError(
+        "Failed to open stdout stream from recording process",
+      );
+    }
+    this.recording = true;
+    void proc.exited.then((exitCode) => {
+      if (!this.recording || this.proc !== proc) {
+        return;
+      }
+      this.recording = false;
+      this.proc = null;
+      this.errorCallback?.(
+        new RecorderError(
+          `Recording process exited unexpectedly with code ${exitCode}`,
+        ),
+      );
+    });
+    return proc.stdout as ReadableStream<Uint8Array>;
+  }
+  async stop(): Promise<void> {
+    if (!this.proc || !this.recording) {
+      return;
+    }
+    const proc = this.proc;
+    this.recording = false;
+    this.proc = null;
+    try {
+      proc.kill("SIGTERM");
+    } catch {
+      try {
+        proc.kill();
+      } catch {
+        // ignore kill failures during shutdown
+      }
+    }
+    const timeout = Bun.sleep(5000).then(() => undefined);
+    await Promise.race([proc.exited.then(() => undefined), timeout]);
+    if (proc.exitCode === null) {
+      try {
+        proc.kill();
+      } catch {
+        // ignore force-kill failures during shutdown
+      }
+    }
+  }
+  isRecording(): boolean {
+    return this.recording;
+  }
+  onError(callback: (err: Error) => void): void {
+    this.errorCallback = callback;
+  }
+}

package/src/config.ts ADDED Viewed

@@ -0,0 +1,72 @@
+import type { VoiceConfig, VoiceProvider } from "./types.ts";
+import { ConfigError } from "./types.ts";
+// PluginOptions is Record<string, unknown> from @opencode-ai/plugin
+type PluginOptions = Record<string, unknown>;
+const VALID_PROVIDERS: VoiceProvider[] = ["groq", "openai-whisper", "deepgram"];
+/**
+ * Load and validate voice plugin configuration.
+ * Config comes from plugin options (opencode.json tuple format) + env var overrides.
+ * API keys come from env vars ONLY (never from config file).
+ *
+ * @param options - Plugin options from TUI plugin's second argument (may be undefined)
+ */
+export function loadVoiceConfig(
+  options: PluginOptions | undefined,
+): VoiceConfig {
+  // Start with defaults
+  let provider: VoiceProvider | undefined;
+  let language: string | undefined;
+  let chunkDurationMs: number | undefined;
+  // 1. Read from plugin options
+  if (options) {
+    if (typeof options.provider === "string") {
+      provider = options.provider as VoiceProvider;
+    }
+    if (typeof options.language === "string") {
+      language = options.language;
+    }
+    if (typeof options.chunkDurationMs === "number") {
+      chunkDurationMs = options.chunkDurationMs;
+    }
+  }
+  // 2. Env var overrides (take precedence over plugin options)
+  const envProvider = process.env.OPENCODE_VOICE_PROVIDER;
+  if (envProvider) provider = envProvider as VoiceProvider;
+  const envLanguage = process.env.OPENCODE_VOICE_LANGUAGE;
+  if (envLanguage) language = envLanguage;
+  // 3. Validate provider
+  if (!provider) {
+    throw new ConfigError(
+      "No voice provider configured. Set OPENCODE_VOICE_PROVIDER env var or add provider to plugin options in opencode.json.",
+    );
+  }
+  if (!VALID_PROVIDERS.includes(provider)) {
+    throw new ConfigError(
+      `Invalid provider "${provider}". Valid providers: ${VALID_PROVIDERS.join(", ")}`,
+    );
+  }
+  return { provider, language, chunkDurationMs };
+}
+/**
+ * Resolve API key for the given provider from environment variables.
+ * Returns undefined if not set (provider will throw AuthError when connecting).
+ */
+export function resolveApiKey(provider: VoiceProvider): string | undefined {
+  switch (provider) {
+    case "groq":
+      return process.env.GROQ_API_KEY;
+    case "openai-whisper":
+      return process.env.OPENAI_API_KEY;
+    case "deepgram":
+      return process.env.DEEPGRAM_API_KEY;
+  }
+}