speech-opencode 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,112 @@
1
+ # speech-opencode
2
+
3
+ Voice input plugin for [OpenCode](https://opencode.ai) using OpenAI Whisper.
4
+
5
+ Record audio from your microphone and transcribe it to text using OpenAI's Whisper API.
6
+
7
+ ## Installation
8
+
9
+ Add the plugin to your `opencode.json`:
10
+
11
+ ```json
12
+ {
13
+ "plugin": ["speech-opencode"]
14
+ }
15
+ ```
16
+
17
+ ## Requirements
18
+
19
+ ### API Key
20
+
21
+ Set your OpenAI API key as an environment variable:
22
+
23
+ ```bash
24
+ export OPENAI_API_KEY=your-api-key
25
+ ```
26
+
27
+ ### Audio Recording Tools
28
+
29
+ **Linux (PulseAudio/PipeWire):**
30
+ ```bash
31
+ # Ubuntu/Debian
32
+ sudo apt install pulseaudio-utils
33
+
34
+ # Fedora
35
+ sudo dnf install pulseaudio-utils
36
+
37
+ # Arch
38
+ sudo pacman -S pulseaudio-utils
39
+ ```
40
+
41
+ **macOS:**
42
+ ```bash
43
+ brew install sox
44
+ ```
45
+
46
+ ## Usage
47
+
48
+ Once installed, OpenCode will have access to a `voice` tool. You can ask OpenCode to use it:
49
+
50
+ - "Listen to my voice"
51
+ - "Record what I say"
52
+ - "Use voice input"
53
+ - "Transcribe my speech for 10 seconds"
54
+
55
+ The tool accepts an optional `duration` parameter (default: 5 seconds, max: 60 seconds).
56
+
57
+ ## Configuration
58
+
59
+ For advanced configuration, create a local plugin file:
60
+
61
+ **.opencode/plugin/voice.ts:**
62
+ ```typescript
63
+ import { VoicePlugin } from "speech-opencode"
64
+
65
+ export default VoicePlugin({
66
+ // Optional: specify language (auto-detects if not set)
67
+ language: "en",
68
+
69
+ // Optional: default recording duration in seconds
70
+ defaultDuration: 5,
71
+
72
+ // Optional: maximum recording duration in seconds
73
+ maxDuration: 60,
74
+
75
+ // Optional: override API key (defaults to OPENAI_API_KEY env var)
76
+ apiKey: process.env.MY_OPENAI_KEY,
77
+ })
78
+ ```
79
+
80
+ ## Supported Languages
81
+
82
+ Whisper supports many languages including:
83
+ - English (`en`)
84
+ - Spanish (`es`)
85
+ - French (`fr`)
86
+ - German (`de`)
87
+ - Japanese (`ja`)
88
+ - Chinese (`zh`)
89
+ - And many more...
90
+
91
+ Leave `language` unset for automatic detection.
92
+
93
+ ## How It Works
94
+
95
+ 1. Records audio from your default microphone using system tools
96
+ 2. Sends the audio to OpenAI's Whisper API for transcription
97
+ 3. Returns the transcribed text to OpenCode
98
+
99
+ ## Troubleshooting
100
+
101
+ ### No audio detected
102
+ - Check that your microphone is not muted
103
+ - Verify the correct input device is selected in your system settings
104
+ - On Linux, use `pavucontrol` to check input sources
105
+
106
+ ### Recording fails
107
+ - Ensure you have the required audio tools installed
108
+ - Check that your microphone permissions are granted
109
+
110
+ ## License
111
+
112
+ MIT
@@ -0,0 +1,36 @@
1
+ import { type Plugin } from "@opencode-ai/plugin";
2
+ export interface VoicePluginOptions {
3
+ /** OpenAI API key. Defaults to OPENAI_API_KEY env var */
4
+ apiKey?: string;
5
+ /** Language code for transcription (e.g., "en", "es", "fr"). Auto-detects if not specified */
6
+ language?: string;
7
+ /** Default recording duration in seconds */
8
+ defaultDuration?: number;
9
+ /** Maximum allowed recording duration in seconds */
10
+ maxDuration?: number;
11
+ }
12
+ /**
13
+ * OpenCode Voice Plugin
14
+ *
15
+ * Adds a 'voice' tool that records audio from the microphone and transcribes it
16
+ * using OpenAI's Whisper API.
17
+ *
18
+ * @example
19
+ * ```ts
20
+ * // In opencode.json
21
+ * {
22
+ * "plugin": ["opencode-voice"]
23
+ * }
24
+ * ```
25
+ *
26
+ * @example
27
+ * ```ts
28
+ * // With options in .opencode/plugin/voice.ts
29
+ * import { VoicePlugin } from "opencode-voice"
30
+ * export default VoicePlugin({ language: "en", defaultDuration: 10 })
31
+ * ```
32
+ */
33
+ export declare const VoicePlugin: (options?: VoicePluginOptions) => Plugin;
34
+ declare const _default: Plugin;
35
+ export default _default;
36
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;AA6LvD,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,4CAA4C;IAC5C,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,oDAAoD;IACpD,WAAW,CAAC,EAAE,MAAM,CAAA;CACrB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,MAoEnC,CAAA;;AAGH,wBAA4B"}
package/dist/index.js ADDED
@@ -0,0 +1,224 @@
1
+ import { tool } from "@opencode-ai/plugin";
2
+ import OpenAI from "openai";
3
+ import { spawn } from "child_process";
4
+ import { unlinkSync, readFileSync } from "fs";
5
+ import { tmpdir } from "os";
6
+ import { join } from "path";
7
+ /**
8
+ * Gets the first available non-monitor, non-bluetooth audio input source
9
+ * Works with PulseAudio and PipeWire on Linux
10
+ */
11
+ async function getDefaultInputDevice() {
12
+ return new Promise((resolve) => {
13
+ const pactl = spawn("pactl", ["list", "sources", "short"]);
14
+ let output = "";
15
+ pactl.stdout.on("data", (data) => {
16
+ output += data.toString();
17
+ });
18
+ pactl.on("close", () => {
19
+ const lines = output.trim().split("\n");
20
+ for (const line of lines) {
21
+ const parts = line.split("\t");
22
+ if (parts.length >= 2) {
23
+ const name = parts[1];
24
+ // Skip monitor sources and bluetooth (prefer hardware input)
25
+ if (!name.includes(".monitor") && !name.includes("bluez")) {
26
+ resolve(name);
27
+ return;
28
+ }
29
+ }
30
+ }
31
+ resolve(null);
32
+ });
33
+ pactl.on("error", () => resolve(null));
34
+ });
35
+ }
36
+ /**
37
+ * Records audio from the microphone
38
+ * - Linux: Uses parecord (PulseAudio/PipeWire) or arecord (ALSA)
39
+ * - macOS: Uses sox (rec command)
40
+ */
41
+ async function recordAudio(durationSeconds = 5) {
42
+ const tempFile = join(tmpdir(), `opencode-voice-${Date.now()}.wav`);
43
+ const platform = process.platform;
44
+ if (platform === "darwin") {
45
+ // macOS: use sox
46
+ return recordWithSox(tempFile, durationSeconds);
47
+ }
48
+ else {
49
+ // Linux: use parecord or arecord
50
+ return recordWithPulseAudio(tempFile, durationSeconds);
51
+ }
52
+ }
53
+ async function recordWithSox(tempFile, durationSeconds) {
54
+ return new Promise((resolve, reject) => {
55
+ const recorder = spawn("rec", [
56
+ "-q",
57
+ "-r",
58
+ "16000",
59
+ "-c",
60
+ "1",
61
+ "-b",
62
+ "16",
63
+ tempFile,
64
+ "trim",
65
+ "0",
66
+ durationSeconds.toString(),
67
+ ]);
68
+ let errorOutput = "";
69
+ recorder.stderr.on("data", (data) => {
70
+ errorOutput += data.toString();
71
+ });
72
+ recorder.on("error", () => {
73
+ reject(new Error("sox not found. Please install it:\n" + " - macOS: brew install sox"));
74
+ });
75
+ recorder.on("close", (code) => {
76
+ if (code === 0) {
77
+ resolve(tempFile);
78
+ }
79
+ else {
80
+ reject(new Error(`Recording failed: ${errorOutput}`));
81
+ }
82
+ });
83
+ });
84
+ }
85
+ async function recordWithPulseAudio(tempFile, durationSeconds) {
86
+ const inputDevice = await getDefaultInputDevice();
87
+ return new Promise((resolve, reject) => {
88
+ const args = [(durationSeconds + 1).toString(), "parecord"];
89
+ if (inputDevice) {
90
+ args.push(`--device=${inputDevice}`);
91
+ }
92
+ args.push("--file-format=wav", tempFile);
93
+ const recorder = spawn("timeout", args);
94
+ let errorOutput = "";
95
+ recorder.stderr.on("data", (data) => {
96
+ errorOutput += data.toString();
97
+ });
98
+ recorder.on("error", () => {
99
+ // Fallback to arecord
100
+ const arecord = spawn("arecord", [
101
+ "-q",
102
+ "-f",
103
+ "S16_LE",
104
+ "-r",
105
+ "16000",
106
+ "-c",
107
+ "1",
108
+ "-d",
109
+ durationSeconds.toString(),
110
+ tempFile,
111
+ ]);
112
+ arecord.on("error", () => {
113
+ reject(new Error("No audio recorder found. Please install:\n" +
114
+ " - Ubuntu/Debian: sudo apt install pulseaudio-utils\n" +
115
+ " - Fedora: sudo dnf install pulseaudio-utils\n" +
116
+ " - Arch: sudo pacman -S pulseaudio-utils"));
117
+ });
118
+ arecord.on("close", (code) => {
119
+ if (code === 0) {
120
+ resolve(tempFile);
121
+ }
122
+ else {
123
+ reject(new Error(`arecord failed with code ${code}`));
124
+ }
125
+ });
126
+ });
127
+ recorder.on("close", (code) => {
128
+ // timeout returns 124 when it kills the process, which is expected
129
+ if (code === 0 || code === 124) {
130
+ resolve(tempFile);
131
+ }
132
+ else {
133
+ reject(new Error(`Recording failed (code ${code}): ${errorOutput}`));
134
+ }
135
+ });
136
+ });
137
+ }
138
+ /**
139
+ * Transcribes audio using OpenAI's Whisper API
140
+ */
141
+ async function transcribeAudio(audioFilePath, apiKey, language) {
142
+ const openai = new OpenAI({ apiKey });
143
+ const audioFile = readFileSync(audioFilePath);
144
+ const file = new File([audioFile], "audio.wav", { type: "audio/wav" });
145
+ const transcription = await openai.audio.transcriptions.create({
146
+ file: file,
147
+ model: "whisper-1",
148
+ ...(language && { language }),
149
+ });
150
+ return transcription.text;
151
+ }
152
+ /**
153
+ * OpenCode Voice Plugin
154
+ *
155
+ * Adds a 'voice' tool that records audio from the microphone and transcribes it
156
+ * using OpenAI's Whisper API.
157
+ *
158
+ * @example
159
+ * ```ts
160
+ * // In opencode.json
161
+ * {
162
+ * "plugin": ["opencode-voice"]
163
+ * }
164
+ * ```
165
+ *
166
+ * @example
167
+ * ```ts
168
+ * // With options in .opencode/plugin/voice.ts
169
+ * import { VoicePlugin } from "opencode-voice"
170
+ * export default VoicePlugin({ language: "en", defaultDuration: 10 })
171
+ * ```
172
+ */
173
+ export const VoicePlugin = (options = {}) => async (ctx) => {
174
+ const { apiKey = process.env.OPENAI_API_KEY, language, defaultDuration = 5, maxDuration = 60, } = options;
175
+ if (!apiKey) {
176
+ console.warn("[Voice Plugin] Warning: OPENAI_API_KEY not set. Voice transcription will fail.");
177
+ }
178
+ return {
179
+ tool: {
180
+ voice: tool({
181
+ description: "Records audio from the user's microphone and transcribes it using OpenAI Whisper. " +
182
+ "Use this tool when the user wants to provide input via voice or speech. " +
183
+ `The tool will record for the specified duration (default ${defaultDuration} seconds) and return the transcribed text.`,
184
+ args: {
185
+ duration: tool.schema
186
+ .number()
187
+ .optional()
188
+ .describe(`Recording duration in seconds. Default is ${defaultDuration} seconds. Max is ${maxDuration} seconds.`),
189
+ },
190
+ async execute(args) {
191
+ if (!apiKey) {
192
+ return "Error: OPENAI_API_KEY environment variable is not set. Please set it to use voice transcription.";
193
+ }
194
+ const duration = Math.min(args.duration || defaultDuration, maxDuration);
195
+ let audioFile = null;
196
+ try {
197
+ audioFile = await recordAudio(duration);
198
+ const transcription = await transcribeAudio(audioFile, apiKey, language);
199
+ if (!transcription || transcription.trim() === "") {
200
+ return "No speech detected. Please try again and speak clearly into your microphone.";
201
+ }
202
+ return `Transcribed speech: "${transcription}"`;
203
+ }
204
+ catch (error) {
205
+ const errorMessage = error instanceof Error ? error.message : String(error);
206
+ return `Voice recording/transcription failed: ${errorMessage}`;
207
+ }
208
+ finally {
209
+ if (audioFile) {
210
+ try {
211
+ unlinkSync(audioFile);
212
+ }
213
+ catch {
214
+ // Ignore cleanup errors
215
+ }
216
+ }
217
+ }
218
+ },
219
+ }),
220
+ },
221
+ };
222
+ };
223
+ // Default export for simple usage
224
+ export default VoicePlugin();
package/package.json ADDED
@@ -0,0 +1,56 @@
1
+ {
2
+ "name": "speech-opencode",
3
+ "version": "1.0.0",
4
+ "description": "Voice input plugin for OpenCode using OpenAI Whisper",
5
+ "keywords": [
6
+ "opencode",
7
+ "opencode-plugin",
8
+ "voice",
9
+ "speech-to-text",
10
+ "whisper",
11
+ "openai",
12
+ "transcription"
13
+ ],
14
+ "author": {
15
+ "name": "Amitav Krishna",
16
+ "email": "amitavkrishna@proton.me",
17
+ "url": "https://amitav.net"
18
+ },
19
+ "license": "MIT",
20
+ "repository": {
21
+ "type": "git",
22
+ "url": "https://github.com/amitav-krishna/speech-opencode"
23
+ },
24
+ "type": "module",
25
+ "main": "./dist/index.js",
26
+ "module": "./dist/index.js",
27
+ "types": "./dist/index.d.ts",
28
+ "exports": {
29
+ ".": {
30
+ "import": "./dist/index.js",
31
+ "types": "./dist/index.d.ts"
32
+ }
33
+ },
34
+ "files": [
35
+ "dist",
36
+ "README.md"
37
+ ],
38
+ "scripts": {
39
+ "build": "tsc",
40
+ "prepublishOnly": "npm run build"
41
+ },
42
+ "dependencies": {
43
+ "openai": "^4.77.0"
44
+ },
45
+ "devDependencies": {
46
+ "@opencode-ai/plugin": "latest",
47
+ "@types/node": "^20.0.0",
48
+ "typescript": "^5.0.0"
49
+ },
50
+ "peerDependencies": {
51
+ "@opencode-ai/plugin": ">=0.1.0"
52
+ },
53
+ "engines": {
54
+ "node": ">=18.0.0"
55
+ }
56
+ }