speech-opencode 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +112 -0
- package/dist/index.d.ts +36 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +224 -0
- package/package.json +56 -0
package/README.md
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# speech-opencode
|
|
2
|
+
|
|
3
|
+
Voice input plugin for [OpenCode](https://opencode.ai) using OpenAI Whisper.
|
|
4
|
+
|
|
5
|
+
Record audio from your microphone and transcribe it to text using OpenAI's Whisper API.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
Add the plugin to your `opencode.json`:
|
|
10
|
+
|
|
11
|
+
```json
|
|
12
|
+
{
|
|
13
|
+
"plugin": ["speech-opencode"]
|
|
14
|
+
}
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Requirements
|
|
18
|
+
|
|
19
|
+
### API Key
|
|
20
|
+
|
|
21
|
+
Set your OpenAI API key as an environment variable:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
export OPENAI_API_KEY=your-api-key
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Audio Recording Tools
|
|
28
|
+
|
|
29
|
+
**Linux (PulseAudio/PipeWire):**
|
|
30
|
+
```bash
|
|
31
|
+
# Ubuntu/Debian
|
|
32
|
+
sudo apt install pulseaudio-utils
|
|
33
|
+
|
|
34
|
+
# Fedora
|
|
35
|
+
sudo dnf install pulseaudio-utils
|
|
36
|
+
|
|
37
|
+
# Arch
|
|
38
|
+
sudo pacman -S pulseaudio-utils
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
**macOS:**
|
|
42
|
+
```bash
|
|
43
|
+
brew install sox
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Usage
|
|
47
|
+
|
|
48
|
+
Once installed, OpenCode will have access to a `voice` tool. You can ask OpenCode to use it:
|
|
49
|
+
|
|
50
|
+
- "Listen to my voice"
|
|
51
|
+
- "Record what I say"
|
|
52
|
+
- "Use voice input"
|
|
53
|
+
- "Transcribe my speech for 10 seconds"
|
|
54
|
+
|
|
55
|
+
The tool accepts an optional `duration` parameter (default: 5 seconds, max: 60 seconds).
|
|
56
|
+
|
|
57
|
+
## Configuration
|
|
58
|
+
|
|
59
|
+
For advanced configuration, create a local plugin file:
|
|
60
|
+
|
|
61
|
+
**.opencode/plugin/voice.ts:**
|
|
62
|
+
```typescript
|
|
63
|
+
import { VoicePlugin } from "speech-opencode"
|
|
64
|
+
|
|
65
|
+
export default VoicePlugin({
|
|
66
|
+
// Optional: specify language (auto-detects if not set)
|
|
67
|
+
language: "en",
|
|
68
|
+
|
|
69
|
+
// Optional: default recording duration in seconds
|
|
70
|
+
defaultDuration: 5,
|
|
71
|
+
|
|
72
|
+
// Optional: maximum recording duration in seconds
|
|
73
|
+
maxDuration: 60,
|
|
74
|
+
|
|
75
|
+
// Optional: override API key (defaults to OPENAI_API_KEY env var)
|
|
76
|
+
apiKey: process.env.MY_OPENAI_KEY,
|
|
77
|
+
})
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Supported Languages
|
|
81
|
+
|
|
82
|
+
Whisper supports many languages including:
|
|
83
|
+
- English (`en`)
|
|
84
|
+
- Spanish (`es`)
|
|
85
|
+
- French (`fr`)
|
|
86
|
+
- German (`de`)
|
|
87
|
+
- Japanese (`ja`)
|
|
88
|
+
- Chinese (`zh`)
|
|
89
|
+
- And many more...
|
|
90
|
+
|
|
91
|
+
Leave `language` unset for automatic detection.
|
|
92
|
+
|
|
93
|
+
## How It Works
|
|
94
|
+
|
|
95
|
+
1. Records audio from your default microphone using system tools
|
|
96
|
+
2. Sends the audio to OpenAI's Whisper API for transcription
|
|
97
|
+
3. Returns the transcribed text to OpenCode
|
|
98
|
+
|
|
99
|
+
## Troubleshooting
|
|
100
|
+
|
|
101
|
+
### No audio detected
|
|
102
|
+
- Check that your microphone is not muted
|
|
103
|
+
- Verify the correct input device is selected in your system settings
|
|
104
|
+
- On Linux, use `pavucontrol` to check input sources
|
|
105
|
+
|
|
106
|
+
### Recording fails
|
|
107
|
+
- Ensure you have the required audio tools installed
|
|
108
|
+
- Check that your microphone permissions are granted
|
|
109
|
+
|
|
110
|
+
## License
|
|
111
|
+
|
|
112
|
+
MIT
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { type Plugin } from "@opencode-ai/plugin";
|
|
2
|
+
export interface VoicePluginOptions {
|
|
3
|
+
/** OpenAI API key. Defaults to OPENAI_API_KEY env var */
|
|
4
|
+
apiKey?: string;
|
|
5
|
+
/** Language code for transcription (e.g., "en", "es", "fr"). Auto-detects if not specified */
|
|
6
|
+
language?: string;
|
|
7
|
+
/** Default recording duration in seconds */
|
|
8
|
+
defaultDuration?: number;
|
|
9
|
+
/** Maximum allowed recording duration in seconds */
|
|
10
|
+
maxDuration?: number;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* OpenCode Voice Plugin
|
|
14
|
+
*
|
|
15
|
+
* Adds a 'voice' tool that records audio from the microphone and transcribes it
|
|
16
|
+
* using OpenAI's Whisper API.
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* ```ts
|
|
20
|
+
* // In opencode.json
|
|
21
|
+
* {
|
|
22
|
+
* "plugin": ["opencode-voice"]
|
|
23
|
+
* }
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* ```ts
|
|
28
|
+
* // With options in .opencode/plugin/voice.ts
|
|
29
|
+
* import { VoicePlugin } from "opencode-voice"
|
|
30
|
+
* export default VoicePlugin({ language: "en", defaultDuration: 10 })
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
33
|
+
export declare const VoicePlugin: (options?: VoicePluginOptions) => Plugin;
|
|
34
|
+
declare const _default: Plugin;
|
|
35
|
+
export default _default;
|
|
36
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,MAAM,EAAQ,MAAM,qBAAqB,CAAA;AA6LvD,MAAM,WAAW,kBAAkB;IACjC,yDAAyD;IACzD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8FAA8F;IAC9F,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,4CAA4C;IAC5C,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,oDAAoD;IACpD,WAAW,CAAC,EAAE,MAAM,CAAA;CACrB;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,eAAO,MAAM,WAAW,GACrB,UAAS,kBAAuB,KAAG,MAoEnC,CAAA;;AAGH,wBAA4B"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import { tool } from "@opencode-ai/plugin";
|
|
2
|
+
import OpenAI from "openai";
|
|
3
|
+
import { spawn } from "child_process";
|
|
4
|
+
import { unlinkSync, readFileSync } from "fs";
|
|
5
|
+
import { tmpdir } from "os";
|
|
6
|
+
import { join } from "path";
|
|
7
|
+
/**
|
|
8
|
+
* Gets the first available non-monitor, non-bluetooth audio input source
|
|
9
|
+
* Works with PulseAudio and PipeWire on Linux
|
|
10
|
+
*/
|
|
11
|
+
async function getDefaultInputDevice() {
|
|
12
|
+
return new Promise((resolve) => {
|
|
13
|
+
const pactl = spawn("pactl", ["list", "sources", "short"]);
|
|
14
|
+
let output = "";
|
|
15
|
+
pactl.stdout.on("data", (data) => {
|
|
16
|
+
output += data.toString();
|
|
17
|
+
});
|
|
18
|
+
pactl.on("close", () => {
|
|
19
|
+
const lines = output.trim().split("\n");
|
|
20
|
+
for (const line of lines) {
|
|
21
|
+
const parts = line.split("\t");
|
|
22
|
+
if (parts.length >= 2) {
|
|
23
|
+
const name = parts[1];
|
|
24
|
+
// Skip monitor sources and bluetooth (prefer hardware input)
|
|
25
|
+
if (!name.includes(".monitor") && !name.includes("bluez")) {
|
|
26
|
+
resolve(name);
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
resolve(null);
|
|
32
|
+
});
|
|
33
|
+
pactl.on("error", () => resolve(null));
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Records audio from the microphone
|
|
38
|
+
* - Linux: Uses parecord (PulseAudio/PipeWire) or arecord (ALSA)
|
|
39
|
+
* - macOS: Uses sox (rec command)
|
|
40
|
+
*/
|
|
41
|
+
async function recordAudio(durationSeconds = 5) {
|
|
42
|
+
const tempFile = join(tmpdir(), `opencode-voice-${Date.now()}.wav`);
|
|
43
|
+
const platform = process.platform;
|
|
44
|
+
if (platform === "darwin") {
|
|
45
|
+
// macOS: use sox
|
|
46
|
+
return recordWithSox(tempFile, durationSeconds);
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
// Linux: use parecord or arecord
|
|
50
|
+
return recordWithPulseAudio(tempFile, durationSeconds);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
async function recordWithSox(tempFile, durationSeconds) {
|
|
54
|
+
return new Promise((resolve, reject) => {
|
|
55
|
+
const recorder = spawn("rec", [
|
|
56
|
+
"-q",
|
|
57
|
+
"-r",
|
|
58
|
+
"16000",
|
|
59
|
+
"-c",
|
|
60
|
+
"1",
|
|
61
|
+
"-b",
|
|
62
|
+
"16",
|
|
63
|
+
tempFile,
|
|
64
|
+
"trim",
|
|
65
|
+
"0",
|
|
66
|
+
durationSeconds.toString(),
|
|
67
|
+
]);
|
|
68
|
+
let errorOutput = "";
|
|
69
|
+
recorder.stderr.on("data", (data) => {
|
|
70
|
+
errorOutput += data.toString();
|
|
71
|
+
});
|
|
72
|
+
recorder.on("error", () => {
|
|
73
|
+
reject(new Error("sox not found. Please install it:\n" + " - macOS: brew install sox"));
|
|
74
|
+
});
|
|
75
|
+
recorder.on("close", (code) => {
|
|
76
|
+
if (code === 0) {
|
|
77
|
+
resolve(tempFile);
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
reject(new Error(`Recording failed: ${errorOutput}`));
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
async function recordWithPulseAudio(tempFile, durationSeconds) {
|
|
86
|
+
const inputDevice = await getDefaultInputDevice();
|
|
87
|
+
return new Promise((resolve, reject) => {
|
|
88
|
+
const args = [(durationSeconds + 1).toString(), "parecord"];
|
|
89
|
+
if (inputDevice) {
|
|
90
|
+
args.push(`--device=${inputDevice}`);
|
|
91
|
+
}
|
|
92
|
+
args.push("--file-format=wav", tempFile);
|
|
93
|
+
const recorder = spawn("timeout", args);
|
|
94
|
+
let errorOutput = "";
|
|
95
|
+
recorder.stderr.on("data", (data) => {
|
|
96
|
+
errorOutput += data.toString();
|
|
97
|
+
});
|
|
98
|
+
recorder.on("error", () => {
|
|
99
|
+
// Fallback to arecord
|
|
100
|
+
const arecord = spawn("arecord", [
|
|
101
|
+
"-q",
|
|
102
|
+
"-f",
|
|
103
|
+
"S16_LE",
|
|
104
|
+
"-r",
|
|
105
|
+
"16000",
|
|
106
|
+
"-c",
|
|
107
|
+
"1",
|
|
108
|
+
"-d",
|
|
109
|
+
durationSeconds.toString(),
|
|
110
|
+
tempFile,
|
|
111
|
+
]);
|
|
112
|
+
arecord.on("error", () => {
|
|
113
|
+
reject(new Error("No audio recorder found. Please install:\n" +
|
|
114
|
+
" - Ubuntu/Debian: sudo apt install pulseaudio-utils\n" +
|
|
115
|
+
" - Fedora: sudo dnf install pulseaudio-utils\n" +
|
|
116
|
+
" - Arch: sudo pacman -S pulseaudio-utils"));
|
|
117
|
+
});
|
|
118
|
+
arecord.on("close", (code) => {
|
|
119
|
+
if (code === 0) {
|
|
120
|
+
resolve(tempFile);
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
reject(new Error(`arecord failed with code ${code}`));
|
|
124
|
+
}
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
recorder.on("close", (code) => {
|
|
128
|
+
// timeout returns 124 when it kills the process, which is expected
|
|
129
|
+
if (code === 0 || code === 124) {
|
|
130
|
+
resolve(tempFile);
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
reject(new Error(`Recording failed (code ${code}): ${errorOutput}`));
|
|
134
|
+
}
|
|
135
|
+
});
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Transcribes audio using OpenAI's Whisper API
|
|
140
|
+
*/
|
|
141
|
+
async function transcribeAudio(audioFilePath, apiKey, language) {
|
|
142
|
+
const openai = new OpenAI({ apiKey });
|
|
143
|
+
const audioFile = readFileSync(audioFilePath);
|
|
144
|
+
const file = new File([audioFile], "audio.wav", { type: "audio/wav" });
|
|
145
|
+
const transcription = await openai.audio.transcriptions.create({
|
|
146
|
+
file: file,
|
|
147
|
+
model: "whisper-1",
|
|
148
|
+
...(language && { language }),
|
|
149
|
+
});
|
|
150
|
+
return transcription.text;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* OpenCode Voice Plugin
|
|
154
|
+
*
|
|
155
|
+
* Adds a 'voice' tool that records audio from the microphone and transcribes it
|
|
156
|
+
* using OpenAI's Whisper API.
|
|
157
|
+
*
|
|
158
|
+
* @example
|
|
159
|
+
* ```ts
|
|
160
|
+
* // In opencode.json
|
|
161
|
+
* {
|
|
162
|
+
* "plugin": ["opencode-voice"]
|
|
163
|
+
* }
|
|
164
|
+
* ```
|
|
165
|
+
*
|
|
166
|
+
* @example
|
|
167
|
+
* ```ts
|
|
168
|
+
* // With options in .opencode/plugin/voice.ts
|
|
169
|
+
* import { VoicePlugin } from "opencode-voice"
|
|
170
|
+
* export default VoicePlugin({ language: "en", defaultDuration: 10 })
|
|
171
|
+
* ```
|
|
172
|
+
*/
|
|
173
|
+
export const VoicePlugin = (options = {}) => async (ctx) => {
|
|
174
|
+
const { apiKey = process.env.OPENAI_API_KEY, language, defaultDuration = 5, maxDuration = 60, } = options;
|
|
175
|
+
if (!apiKey) {
|
|
176
|
+
console.warn("[Voice Plugin] Warning: OPENAI_API_KEY not set. Voice transcription will fail.");
|
|
177
|
+
}
|
|
178
|
+
return {
|
|
179
|
+
tool: {
|
|
180
|
+
voice: tool({
|
|
181
|
+
description: "Records audio from the user's microphone and transcribes it using OpenAI Whisper. " +
|
|
182
|
+
"Use this tool when the user wants to provide input via voice or speech. " +
|
|
183
|
+
`The tool will record for the specified duration (default ${defaultDuration} seconds) and return the transcribed text.`,
|
|
184
|
+
args: {
|
|
185
|
+
duration: tool.schema
|
|
186
|
+
.number()
|
|
187
|
+
.optional()
|
|
188
|
+
.describe(`Recording duration in seconds. Default is ${defaultDuration} seconds. Max is ${maxDuration} seconds.`),
|
|
189
|
+
},
|
|
190
|
+
async execute(args) {
|
|
191
|
+
if (!apiKey) {
|
|
192
|
+
return "Error: OPENAI_API_KEY environment variable is not set. Please set it to use voice transcription.";
|
|
193
|
+
}
|
|
194
|
+
const duration = Math.min(args.duration || defaultDuration, maxDuration);
|
|
195
|
+
let audioFile = null;
|
|
196
|
+
try {
|
|
197
|
+
audioFile = await recordAudio(duration);
|
|
198
|
+
const transcription = await transcribeAudio(audioFile, apiKey, language);
|
|
199
|
+
if (!transcription || transcription.trim() === "") {
|
|
200
|
+
return "No speech detected. Please try again and speak clearly into your microphone.";
|
|
201
|
+
}
|
|
202
|
+
return `Transcribed speech: "${transcription}"`;
|
|
203
|
+
}
|
|
204
|
+
catch (error) {
|
|
205
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
206
|
+
return `Voice recording/transcription failed: ${errorMessage}`;
|
|
207
|
+
}
|
|
208
|
+
finally {
|
|
209
|
+
if (audioFile) {
|
|
210
|
+
try {
|
|
211
|
+
unlinkSync(audioFile);
|
|
212
|
+
}
|
|
213
|
+
catch {
|
|
214
|
+
// Ignore cleanup errors
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
},
|
|
219
|
+
}),
|
|
220
|
+
},
|
|
221
|
+
};
|
|
222
|
+
};
|
|
223
|
+
// Default export for simple usage
|
|
224
|
+
export default VoicePlugin();
|
package/package.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "speech-opencode",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Voice input plugin for OpenCode using OpenAI Whisper",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"opencode",
|
|
7
|
+
"opencode-plugin",
|
|
8
|
+
"voice",
|
|
9
|
+
"speech-to-text",
|
|
10
|
+
"whisper",
|
|
11
|
+
"openai",
|
|
12
|
+
"transcription"
|
|
13
|
+
],
|
|
14
|
+
"author": {
|
|
15
|
+
"name": "Amitav Krishna",
|
|
16
|
+
"email": "amitavkrishna@proton.me",
|
|
17
|
+
"url": "https://amitav.net"
|
|
18
|
+
},
|
|
19
|
+
"license": "MIT",
|
|
20
|
+
"repository": {
|
|
21
|
+
"type": "git",
|
|
22
|
+
"url": "https://github.com/amitav-krishna/speech-opencode"
|
|
23
|
+
},
|
|
24
|
+
"type": "module",
|
|
25
|
+
"main": "./dist/index.js",
|
|
26
|
+
"module": "./dist/index.js",
|
|
27
|
+
"types": "./dist/index.d.ts",
|
|
28
|
+
"exports": {
|
|
29
|
+
".": {
|
|
30
|
+
"import": "./dist/index.js",
|
|
31
|
+
"types": "./dist/index.d.ts"
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
"files": [
|
|
35
|
+
"dist",
|
|
36
|
+
"README.md"
|
|
37
|
+
],
|
|
38
|
+
"scripts": {
|
|
39
|
+
"build": "tsc",
|
|
40
|
+
"prepublishOnly": "npm run build"
|
|
41
|
+
},
|
|
42
|
+
"dependencies": {
|
|
43
|
+
"openai": "^4.77.0"
|
|
44
|
+
},
|
|
45
|
+
"devDependencies": {
|
|
46
|
+
"@opencode-ai/plugin": "latest",
|
|
47
|
+
"@types/node": "^20.0.0",
|
|
48
|
+
"typescript": "^5.0.0"
|
|
49
|
+
},
|
|
50
|
+
"peerDependencies": {
|
|
51
|
+
"@opencode-ai/plugin": ">=0.1.0"
|
|
52
|
+
},
|
|
53
|
+
"engines": {
|
|
54
|
+
"node": ">=18.0.0"
|
|
55
|
+
}
|
|
56
|
+
}
|