opencode-voice 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 opencode-voice contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,142 @@
1
+ # opencode-voice
2
+
3
+ > Speech-to-text plugin for [OpenCode](https://opencode.ai) — speak into your microphone and see your words appear in the prompt field in real-time.
4
+
5
+ ## Features
6
+
7
+ - 🎙️ **Real-time transcription** — words appear as you speak
8
+ - 🔌 **3 providers** — Deepgram (streaming), Groq Whisper (fast), OpenAI Whisper
9
+ - 🖥️ **Cross-platform** — macOS, Linux, Windows
10
+ - ⌨️ **Simple toggle** — `Ctrl+Shift+V` to start/stop
11
+ - 👁️ **Live preview** — interim text shown in overlay, final text inserted in prompt
12
+ - ✏️ **Review before send** — text goes to prompt for editing, never auto-sent
13
+
14
+ ## Requirements
15
+
16
+ A recording tool must be installed on your system:
17
+
18
+ | Platform | Recommended | Install |
19
+ | -------- | ----------- | ---------------------------------------------------- |
20
+ | macOS | SoX | `brew install sox` |
21
+ | Linux | SoX | `sudo apt install sox` |
22
+ | Windows | SoX | `winget install sox` or `choco install sox.portable` |
23
+
24
+ > **Fallback**: FFmpeg is also supported on macOS and Linux.
25
+
26
+ ## Installation
27
+
28
+ Add to your OpenCode config (`~/.config/opencode/opencode.json`):
29
+
30
+ ```json
31
+ {
32
+ "plugin": [["opencode-voice", { "provider": "deepgram" }]]
33
+ }
34
+ ```
35
+
36
+ Or without options (configure via environment variables only):
37
+
38
+ ```json
39
+ {
40
+ "plugin": ["opencode-voice"]
41
+ }
42
+ ```
43
+
44
+ ## Configuration
45
+
46
+ ### Plugin Options (in opencode.json)
47
+
48
+ | Option | Type | Default | Description |
49
+ | ----------------- | ---------------------------------------------- | ---------------------------- | ----------------------------------- |
50
+ | `provider` | `"deepgram"` \| `"groq"` \| `"openai-whisper"` | — | STT provider to use |
51
+ | `language` | `string` | auto-detect | Language code (e.g. `"en"`, `"fr"`) |
52
+ | `chunkDurationMs` | `number` | 5000 (Groq) / 10000 (OpenAI) | Chunk size for HTTP providers |
53
+
54
+ ### Environment Variables
55
+
56
+ | Variable | Description |
57
+ | ------------------------- | -------------------------------------------------------- |
58
+ | `OPENCODE_VOICE_PROVIDER` | Override provider (takes precedence over plugin options) |
59
+ | `OPENCODE_VOICE_LANGUAGE` | Override language |
60
+ | `DEEPGRAM_API_KEY` | Deepgram API key |
61
+ | `GROQ_API_KEY` | Groq API key |
62
+ | `OPENAI_API_KEY` | OpenAI API key |
63
+
64
+ > **Security**: API keys are read from environment variables only — never stored in config files.
65
+
66
+ ## Usage
67
+
68
+ 1. Press **`Ctrl+Shift+V`** to start recording
69
+ 2. Speak — you'll see a `● Recording...` indicator with live preview
70
+ 3. Press **`Ctrl+Shift+V`** again to stop
71
+ 4. The transcribed text appears in the prompt field
72
+ 5. Review/edit, then press **Enter** to send
73
+
74
+ ## Providers
75
+
76
+ | Provider | Protocol | Latency | Interim Results | Best For |
77
+ | ------------------ | --------- | ------- | ------------------ | ------------------- |
78
+ | **Deepgram** | WebSocket | ~100ms | ✅ Yes | Real-time streaming |
79
+ | **Groq** | HTTP | ~200ms | ❌ No (5s chunks) | Speed + cost |
80
+ | **OpenAI Whisper** | HTTP | ~500ms | ❌ No (10s chunks) | Accuracy |
81
+
82
+ ### Deepgram (Recommended)
83
+
84
+ Best real-time experience. Uses WebSocket streaming with interim results.
85
+
86
+ ```json
87
+ ["opencode-voice", { "provider": "deepgram" }]
88
+ ```
89
+
90
+ Get a free API key at [deepgram.com](https://deepgram.com).
91
+
92
+ ### Groq Whisper
93
+
94
+ Ultra-fast HTTP transcription (189x realtime). Good balance of speed and cost.
95
+
96
+ ```json
97
+ ["opencode-voice", { "provider": "groq" }]
98
+ ```
99
+
100
+ Get a free API key at [console.groq.com](https://console.groq.com).
101
+
102
+ ### OpenAI Whisper
103
+
104
+ Most widely used. Requires an OpenAI API key.
105
+
106
+ ```json
107
+ ["opencode-voice", { "provider": "openai-whisper" }]
108
+ ```
109
+
110
+ ## Troubleshooting
111
+
112
+ ### "No recording tool found"
113
+
114
+ Install SoX for your platform (see Requirements above).
115
+
116
+ ### "Invalid API key"
117
+
118
+ Check that your API key environment variable is set correctly:
119
+
120
+ ```bash
121
+ echo $DEEPGRAM_API_KEY # or GROQ_API_KEY / OPENAI_API_KEY
122
+ ```
123
+
124
+ ### No microphone input
125
+
126
+ - Check that your microphone is connected and set as the default input device
127
+ - On Linux, ensure PulseAudio is running: `pulseaudio --check`
128
+ - On macOS, grant microphone permissions to your terminal app
129
+
130
+ ### Text appears in wrong position
131
+
132
+ The transcribed text is inserted at the cursor position in the prompt. If you've typed text before recording, the transcription will be appended after it.
133
+
134
+ ## Roadmap (v2)
135
+
136
+ - Google Cloud Speech (gRPC streaming)
137
+ - Local Whisper (whisper.cpp, no API key needed)
138
+ - OpenAI Realtime API (WebSocket, ultra-low latency)
139
+
140
+ ## License
141
+
142
+ MIT
package/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "opencode-voice",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "description": "Speech-to-text plugin for OpenCode — voice input with Deepgram, Groq, and OpenAI Whisper",
6
+ "keywords": ["opencode", "plugin", "voice", "speech-to-text", "stt", "whisper", "deepgram"],
7
+ "license": "MIT",
8
+ "author": "unknoownu",
9
+ "exports": {
10
+ "./tui": "./src/index.ts"
11
+ },
12
+ "files": ["src/", "README.md", "LICENSE"],
13
+ "engines": {
14
+ "opencode": ">=1.3.0"
15
+ },
16
+ "peerDependencies": {
17
+ "@opencode-ai/plugin": ">=1.3.0",
18
+ "@opentui/core": ">=0.1.95",
19
+ "@opentui/solid": ">=0.1.95",
20
+ "solid-js": ">=1.0.0"
21
+ },
22
+ "devDependencies": {
23
+ "@opencode-ai/plugin": "1.3.13",
24
+ "@opencode-ai/sdk": "1.3.13",
25
+ "@opentui/core": "0.1.95",
26
+ "@opentui/solid": "0.1.95",
27
+ "@types/bun": "latest",
28
+ "solid-js": "latest",
29
+ "typescript": "^5.0.0"
30
+ }
31
+ }
@@ -0,0 +1,146 @@
1
+ import type { RecordingTool } from "../types.ts";
2
+
3
+ export type DetectedTool = { tool: RecordingTool; path: string };
4
+
5
+ /**
6
+ * Detect available recording tool in priority order (platform-aware).
7
+ * @param platform - Override platform (defaults to process.platform)
8
+ */
9
+ export async function detectRecordingTool(
10
+ platform: string = process.platform,
11
+ ): Promise<DetectedTool | null> {
12
+ // Always check sox first (highest priority)
13
+ const sox = Bun.which("sox");
14
+ if (sox) return { tool: "sox", path: sox };
15
+
16
+ // FFmpeg — macOS and Linux only (Windows needs device enumeration)
17
+ if (platform !== "win32") {
18
+ const ffmpeg = Bun.which("ffmpeg");
19
+ if (ffmpeg) return { tool: "ffmpeg", path: ffmpeg };
20
+ }
21
+
22
+ // Linux-only: arecord (ALSA)
23
+ if (platform === "linux") {
24
+ const arecord = Bun.which("arecord");
25
+ if (arecord) return { tool: "arecord", path: arecord };
26
+ }
27
+
28
+ // Windows-only: PowerShell fallback
29
+ if (platform === "win32") {
30
+ const pwsh = Bun.which("powershell") ?? Bun.which("pwsh");
31
+ if (pwsh) return { tool: "powershell", path: pwsh };
32
+ }
33
+
34
+ return null;
35
+ }
36
+
37
+ /**
38
+ * Build the recording command arguments for the given tool and platform.
39
+ * Returns an array suitable for Bun.spawn([cmd, ...args]).
40
+ *
41
+ * @param tool - The recording tool to use
42
+ * @param sampleRate - Sample rate in Hz (default 16000)
43
+ * @param platform - Override platform (defaults to process.platform)
44
+ */
45
+ export function buildRecordCommand(
46
+ tool: RecordingTool,
47
+ sampleRate: number = 16000,
48
+ platform: string = process.platform,
49
+ ): string[] {
50
+ switch (tool) {
51
+ case "sox": {
52
+ // Windows uses waveaudio driver; *nix uses default device (-d)
53
+ const input = platform === "win32" ? ["-t", "waveaudio", "0"] : ["-d"];
54
+ return [
55
+ "sox",
56
+ ...input,
57
+ "-t",
58
+ "raw",
59
+ "-b",
60
+ "16",
61
+ "-e",
62
+ "signed-integer",
63
+ "-c",
64
+ "1",
65
+ "-r",
66
+ String(sampleRate),
67
+ "-",
68
+ ];
69
+ }
70
+ case "ffmpeg": {
71
+ // macOS: AVFoundation; Linux: PulseAudio
72
+ if (platform === "darwin") {
73
+ return [
74
+ "ffmpeg",
75
+ "-f",
76
+ "avfoundation",
77
+ "-i",
78
+ ":default",
79
+ "-ar",
80
+ String(sampleRate),
81
+ "-ac",
82
+ "1",
83
+ "-f",
84
+ "s16le",
85
+ "-",
86
+ ];
87
+ } else {
88
+ return [
89
+ "ffmpeg",
90
+ "-f",
91
+ "pulse",
92
+ "-i",
93
+ "default",
94
+ "-ar",
95
+ String(sampleRate),
96
+ "-ac",
97
+ "1",
98
+ "-f",
99
+ "s16le",
100
+ "-",
101
+ ];
102
+ }
103
+ }
104
+ case "arecord": {
105
+ return [
106
+ "arecord",
107
+ "-f",
108
+ "S16_LE",
109
+ "-r",
110
+ String(sampleRate),
111
+ "-c",
112
+ "1",
113
+ "-t",
114
+ "raw",
115
+ ];
116
+ }
117
+ case "powershell": {
118
+ // Windows fallback — basic PowerShell audio capture
119
+ return [
120
+ "powershell",
121
+ "-NoProfile",
122
+ "-Command",
123
+ "# PowerShell audio capture — install sox for better results",
124
+ ];
125
+ }
126
+ default: {
127
+ throw new Error(`Unknown recording tool: ${tool}`);
128
+ }
129
+ }
130
+ }
131
+
132
+ /**
133
+ * Returns platform-specific install instructions for SoX.
134
+ */
135
+ export function getInstallInstructions(platform: string): string {
136
+ switch (platform) {
137
+ case "darwin":
138
+ return "brew install sox";
139
+ case "linux":
140
+ return "sudo apt install sox (Debian/Ubuntu)\nsudo dnf install sox (Fedora/RHEL)";
141
+ case "win32":
142
+ return "winget install sox OR choco install sox.portable";
143
+ default:
144
+ return "Install sox from https://sox.sourceforge.net";
145
+ }
146
+ }
@@ -0,0 +1,118 @@
1
+ import {
2
+ RecorderError,
3
+ type RecorderOptions,
4
+ type RecordingTool,
5
+ } from "../types";
6
+ import { buildRecordCommand } from "./detector";
7
+
8
+ type SpawnedProcess = ReturnType<typeof Bun.spawn>;
9
+ type SpawnFn = typeof Bun.spawn;
10
+
11
+ export class Recorder {
12
+ private proc: SpawnedProcess | null = null;
13
+ private recording = false;
14
+ private errorCallback: ((err: Error) => void) | null = null;
15
+ private readonly options: Required<RecorderOptions>;
16
+
17
+ constructor(
18
+ private readonly tool: RecordingTool,
19
+ private readonly toolPath: string,
20
+ options?: RecorderOptions,
21
+ private readonly _spawn: SpawnFn = Bun.spawn,
22
+ ) {
23
+ this.options = {
24
+ sampleRate: options?.sampleRate ?? 16000,
25
+ channels: options?.channels ?? 1,
26
+ bitDepth: options?.bitDepth ?? 16,
27
+ };
28
+ }
29
+
30
+ async start(): Promise<ReadableStream<Uint8Array>> {
31
+ if (this.recording) {
32
+ throw new RecorderError("Already recording");
33
+ }
34
+
35
+ const cmd = buildRecordCommand(this.tool, this.options.sampleRate);
36
+ const fullCmd = [this.toolPath, ...cmd.slice(1)];
37
+
38
+ const proc = this._spawn(fullCmd, {
39
+ stdout: "pipe",
40
+ stderr: "pipe",
41
+ });
42
+
43
+ this.proc = proc;
44
+
45
+ await Bun.sleep(300);
46
+
47
+ if (proc.exitCode !== null) {
48
+ this.proc = null;
49
+ throw new RecorderError(
50
+ "Recording tool exited unexpectedly. Check that your microphone is connected.",
51
+ );
52
+ }
53
+
54
+ if (!proc.stdout) {
55
+ this.proc = null;
56
+ throw new RecorderError(
57
+ "Failed to open stdout stream from recording process",
58
+ );
59
+ }
60
+
61
+ this.recording = true;
62
+
63
+ void proc.exited.then((exitCode) => {
64
+ if (!this.recording || this.proc !== proc) {
65
+ return;
66
+ }
67
+
68
+ this.recording = false;
69
+ this.proc = null;
70
+ this.errorCallback?.(
71
+ new RecorderError(
72
+ `Recording process exited unexpectedly with code ${exitCode}`,
73
+ ),
74
+ );
75
+ });
76
+
77
+ return proc.stdout as ReadableStream<Uint8Array>;
78
+ }
79
+
80
+ async stop(): Promise<void> {
81
+ if (!this.proc || !this.recording) {
82
+ return;
83
+ }
84
+
85
+ const proc = this.proc;
86
+ this.recording = false;
87
+ this.proc = null;
88
+
89
+ try {
90
+ proc.kill("SIGTERM");
91
+ } catch {
92
+ try {
93
+ proc.kill();
94
+ } catch {
95
+ // ignore kill failures during shutdown
96
+ }
97
+ }
98
+
99
+ const timeout = Bun.sleep(5000).then(() => undefined);
100
+ await Promise.race([proc.exited.then(() => undefined), timeout]);
101
+
102
+ if (proc.exitCode === null) {
103
+ try {
104
+ proc.kill();
105
+ } catch {
106
+ // ignore force-kill failures during shutdown
107
+ }
108
+ }
109
+ }
110
+
111
+ isRecording(): boolean {
112
+ return this.recording;
113
+ }
114
+
115
+ onError(callback: (err: Error) => void): void {
116
+ this.errorCallback = callback;
117
+ }
118
+ }
package/src/config.ts ADDED
@@ -0,0 +1,72 @@
1
+ import type { VoiceConfig, VoiceProvider } from "./types.ts";
2
+ import { ConfigError } from "./types.ts";
3
+
4
+ // PluginOptions is Record<string, unknown> from @opencode-ai/plugin
5
+ type PluginOptions = Record<string, unknown>;
6
+
7
+ const VALID_PROVIDERS: VoiceProvider[] = ["groq", "openai-whisper", "deepgram"];
8
+
9
+ /**
10
+ * Load and validate voice plugin configuration.
11
+ * Config comes from plugin options (opencode.json tuple format) + env var overrides.
12
+ * API keys come from env vars ONLY (never from config file).
13
+ *
14
+ * @param options - Plugin options from TUI plugin's second argument (may be undefined)
15
+ */
16
+ export function loadVoiceConfig(
17
+ options: PluginOptions | undefined,
18
+ ): VoiceConfig {
19
+ // Start with defaults
20
+ let provider: VoiceProvider | undefined;
21
+ let language: string | undefined;
22
+ let chunkDurationMs: number | undefined;
23
+
24
+ // 1. Read from plugin options
25
+ if (options) {
26
+ if (typeof options.provider === "string") {
27
+ provider = options.provider as VoiceProvider;
28
+ }
29
+ if (typeof options.language === "string") {
30
+ language = options.language;
31
+ }
32
+ if (typeof options.chunkDurationMs === "number") {
33
+ chunkDurationMs = options.chunkDurationMs;
34
+ }
35
+ }
36
+
37
+ // 2. Env var overrides (take precedence over plugin options)
38
+ const envProvider = process.env.OPENCODE_VOICE_PROVIDER;
39
+ if (envProvider) provider = envProvider as VoiceProvider;
40
+
41
+ const envLanguage = process.env.OPENCODE_VOICE_LANGUAGE;
42
+ if (envLanguage) language = envLanguage;
43
+
44
+ // 3. Validate provider
45
+ if (!provider) {
46
+ throw new ConfigError(
47
+ "No voice provider configured. Set OPENCODE_VOICE_PROVIDER env var or add provider to plugin options in opencode.json.",
48
+ );
49
+ }
50
+ if (!VALID_PROVIDERS.includes(provider)) {
51
+ throw new ConfigError(
52
+ `Invalid provider "${provider}". Valid providers: ${VALID_PROVIDERS.join(", ")}`,
53
+ );
54
+ }
55
+
56
+ return { provider, language, chunkDurationMs };
57
+ }
58
+
59
+ /**
60
+ * Resolve API key for the given provider from environment variables.
61
+ * Returns undefined if not set (provider will throw AuthError when connecting).
62
+ */
63
+ export function resolveApiKey(provider: VoiceProvider): string | undefined {
64
+ switch (provider) {
65
+ case "groq":
66
+ return process.env.GROQ_API_KEY;
67
+ case "openai-whisper":
68
+ return process.env.OPENAI_API_KEY;
69
+ case "deepgram":
70
+ return process.env.DEEPGRAM_API_KEY;
71
+ }
72
+ }