claude-voice 1.5.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -380
- package/package.json +1 -1
- package/plugin/plugin.json +1 -1
- package/plugin/skills/listen/SKILL.md +23 -0
- package/plugin/skills/voice-control/SKILL.md +29 -156
- package/scripts/install-plugin.js +15 -8
package/README.md
CHANGED
|
@@ -2,453 +2,122 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://www.npmjs.com/package/claude-voice)
|
|
4
4
|
[](LICENSE)
|
|
5
|
-
[](https://nodejs.org)
|
|
6
5
|
[]()
|
|
7
6
|
|
|
8
|
-
Voice interface for Claude Code
|
|
9
|
-
|
|
10
|
-
<!-- Demo GIF placeholder - replace with actual recording -->
|
|
11
|
-
<!--  -->
|
|
12
|
-
|
|
13
|
-
**Features:**
|
|
14
|
-
|
|
15
|
-
- Speaks Claude's responses aloud (Text-to-Speech)
|
|
16
|
-
- Transcribes your voice commands (Speech-to-Text)
|
|
17
|
-
- Hands-free with wake word detection ("Hey Jarvis")
|
|
18
|
-
- Works offline with local providers - no API keys required
|
|
19
|
-
- Deep integration with Claude Code via hooks system
|
|
20
|
-
|
|
21
|
-
## Quick Start
|
|
7
|
+
Voice interface for Claude Code. Speak commands, hear responses.
|
|
22
8
|
|
|
23
9
|
```bash
|
|
24
10
|
npm install -g claude-voice
|
|
25
11
|
```
|
|
26
12
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
**Upgrade voice quality:**
|
|
30
|
-
- Better TTS: `claude-voice local --download` (Piper neural voices)
|
|
31
|
-
- Best quality: `claude-voice openai` (requires API key)
|
|
32
|
-
- Customize: `claude-voice setup`
|
|
13
|
+
Say **"Hey Jarvis"** followed by your command. The extension auto-starts with Claude Code.
|
|
33
14
|
|
|
34
15
|
## How It Works
|
|
35
16
|
|
|
36
17
|
```
|
|
37
|
-
You speak
|
|
38
|
-
|
|
39
|
-
|--- "Jarvis..." -----> | |
|
|
40
|
-
| (wake word) |--- transcribe ------> |
|
|
41
|
-
| | (STT) |
|
|
42
|
-
| | |
|
|
43
|
-
| | <---- response ------ |
|
|
44
|
-
| <-- speaks aloud ---- | |
|
|
45
|
-
| (TTS) | |
|
|
18
|
+
You speak → "Hey Jarvis..." → Wake word detected → STT transcribes → Claude Code receives
|
|
19
|
+
Claude responds → Hook captures → TTS speaks aloud → You hear the response
|
|
46
20
|
```
|
|
47
21
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
| Hook | Purpose |
|
|
51
|
-
|------|---------|
|
|
52
|
-
| `session-start` | Auto-starts daemon when Claude Code launches |
|
|
53
|
-
| `stop` | Speaks responses when Claude finishes |
|
|
54
|
-
| `post-tool-use` | Announces tool completions (file reads, bash commands) |
|
|
55
|
-
| `notification` | Voice alerts for permission prompts |
|
|
22
|
+
The extension integrates via Claude Code hooks: auto-start on session, speak responses, announce tool completions, and voice alerts for permission prompts.
|
|
56
23
|
|
|
57
24
|
## Providers
|
|
58
25
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
|
62
|
-
|
|
63
|
-
|
|
|
64
|
-
| Speech-to-Text | Sherpa-ONNX | OpenAI Whisper |
|
|
65
|
-
| Wake Word | openWakeWord, Sherpa-ONNX | Picovoice Porcupine |
|
|
66
|
-
|
|
67
|
-
<details>
|
|
68
|
-
<summary><strong>TTS Providers</strong></summary>
|
|
69
|
-
|
|
70
|
-
### Piper (Default)
|
|
71
|
-
|
|
72
|
-
Local neural TTS with high-quality voices. No API key required.
|
|
73
|
-
|
|
74
|
-
```bash
|
|
75
|
-
claude-voice voice list # See available voices
|
|
76
|
-
claude-voice voice download en_US-amy-medium
|
|
77
|
-
claude-voice config set tts.provider=piper
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
### macOS Say
|
|
81
|
-
|
|
82
|
-
Built-in macOS speech synthesis.
|
|
83
|
-
|
|
84
|
-
```bash
|
|
85
|
-
claude-voice voices # List available voices
|
|
86
|
-
claude-voice config set tts.provider=macos-say
|
|
87
|
-
claude-voice config set tts.macos.voice=Samantha
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
### OpenAI TTS
|
|
91
|
-
|
|
92
|
-
High-quality neural voices. Requires `OPENAI_API_KEY`.
|
|
93
|
-
|
|
94
|
-
```bash
|
|
95
|
-
echo "OPENAI_API_KEY=sk-..." >> ~/.claude-voice/.env
|
|
96
|
-
claude-voice config set tts.provider=openai
|
|
97
|
-
claude-voice config set tts.openai.voice=nova
|
|
98
|
-
```
|
|
99
|
-
|
|
100
|
-
### ElevenLabs
|
|
101
|
-
|
|
102
|
-
Premium voice synthesis. Requires `ELEVENLABS_API_KEY`.
|
|
103
|
-
|
|
104
|
-
```bash
|
|
105
|
-
echo "ELEVENLABS_API_KEY=..." >> ~/.claude-voice/.env
|
|
106
|
-
claude-voice config set tts.provider=elevenlabs
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
</details>
|
|
110
|
-
|
|
111
|
-
<details>
|
|
112
|
-
<summary><strong>STT Providers</strong></summary>
|
|
113
|
-
|
|
114
|
-
### Sherpa-ONNX (Default)
|
|
115
|
-
|
|
116
|
-
Local Whisper models. No API key required. Supports 100+ languages.
|
|
117
|
-
|
|
118
|
-
```bash
|
|
119
|
-
claude-voice model list # Available models
|
|
120
|
-
claude-voice model download whisper-small # Best accuracy (488MB)
|
|
121
|
-
claude-voice config set stt.provider=sherpa-onnx
|
|
122
|
-
claude-voice config set stt.language=en # or: tr, de, fr, es, etc.
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
| Model | Size | Speed | Accuracy |
|
|
126
|
-
|-------|------|-------|----------|
|
|
127
|
-
| whisper-tiny | 75 MB | Fast | Good |
|
|
128
|
-
| whisper-base | 142 MB | Medium | Better |
|
|
129
|
-
| whisper-small | 488 MB | Slower | Best |
|
|
130
|
-
|
|
131
|
-
### OpenAI Whisper
|
|
26
|
+
| | Local (Free) | Cloud |
|
|
27
|
+
|---|---|---|
|
|
28
|
+
| **TTS** | macOS Say, Piper, espeak | OpenAI, ElevenLabs |
|
|
29
|
+
| **STT** | Sherpa-ONNX Whisper | OpenAI Whisper |
|
|
30
|
+
| **Wake Word** | openWakeWord, Sherpa-ONNX | Picovoice |
|
|
132
31
|
|
|
133
|
-
|
|
32
|
+
**Quick presets:**
|
|
134
33
|
|
|
135
34
|
```bash
|
|
136
|
-
claude-voice
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
</details>
|
|
140
|
-
|
|
141
|
-
<details>
|
|
142
|
-
<summary><strong>Wake Word Providers</strong></summary>
|
|
143
|
-
|
|
144
|
-
### openWakeWord (Default)
|
|
145
|
-
|
|
146
|
-
Purpose-trained wake word detection with high accuracy. Requires Python 3. Installed automatically during setup.
|
|
147
|
-
|
|
148
|
-
```bash
|
|
149
|
-
claude-voice openwakeword --install # Install/configure openWakeWord
|
|
150
|
-
claude-voice config set wakeWord.provider=openwakeword
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
Available models: `hey_jarvis` (default), `alexa`, `hey_mycroft`, `hey_rhasspy`
|
|
154
|
-
|
|
155
|
-
### Sherpa-ONNX KWS (Fallback)
|
|
156
|
-
|
|
157
|
-
Local keyword spotting, no Python required. Used as fallback when Python is not available.
|
|
158
|
-
|
|
159
|
-
```bash
|
|
160
|
-
claude-voice config set wakeWord.provider=sherpa-onnx
|
|
161
|
-
claude-voice config set wakeWord.keyword=jarvis # or: claude
|
|
162
|
-
```
|
|
163
|
-
|
|
164
|
-
### Picovoice Porcupine
|
|
165
|
-
|
|
166
|
-
High-accuracy wake word detection. Requires `PICOVOICE_ACCESS_KEY`.
|
|
167
|
-
|
|
168
|
-
1. Get a free access key at [Picovoice Console](https://console.picovoice.ai/)
|
|
169
|
-
2. Configure:
|
|
170
|
-
|
|
171
|
-
```bash
|
|
172
|
-
echo "PICOVOICE_ACCESS_KEY=..." >> ~/.claude-voice/.env
|
|
173
|
-
claude-voice config set wakeWord.provider=picovoice
|
|
174
|
-
claude-voice config set wakeWord.keyword=jarvis # jarvis, computer, alexa, etc.
|
|
35
|
+
claude-voice setup # Interactive setup wizard
|
|
36
|
+
claude-voice openai # Cloud TTS + STT (requires API key)
|
|
37
|
+
claude-voice local --download # Piper TTS + larger Whisper model (offline)
|
|
175
38
|
```
|
|
176
39
|
|
|
177
|
-
Built-in keywords: jarvis, computer, alexa, americano, blueberry, bumblebee, grapefruit, grasshopper, hey google, hey siri, ok google, picovoice, porcupine, terminator
|
|
178
|
-
|
|
179
|
-
</details>
|
|
180
|
-
|
|
181
40
|
## Configuration
|
|
182
41
|
|
|
183
|
-
Config file: `~/.claude-voice/config.json`
|
|
184
|
-
|
|
185
42
|
```bash
|
|
186
|
-
claude-voice config
|
|
187
|
-
claude-voice config get tts.provider # Get specific value
|
|
43
|
+
claude-voice config # View all
|
|
188
44
|
claude-voice config set tts.provider=openai # Set value
|
|
189
|
-
claude-voice config
|
|
190
|
-
claude-voice config
|
|
45
|
+
claude-voice config set stt.language=tr # Change language
|
|
46
|
+
claude-voice config edit # Open in editor
|
|
191
47
|
```
|
|
192
48
|
|
|
193
|
-
|
|
194
|
-
<summary><strong>TTS Options</strong></summary>
|
|
195
|
-
|
|
196
|
-
| Option | Default | Description |
|
|
197
|
-
|--------|---------|-------------|
|
|
198
|
-
| `tts.provider` | `piper` | piper, macos-say, openai, elevenlabs, espeak, disabled |
|
|
199
|
-
| `tts.autoSpeak` | `true` | Automatically speak Claude's responses |
|
|
200
|
-
| `tts.maxSpeechLength` | `5000` | Maximum characters to speak |
|
|
201
|
-
| `tts.skipCodeBlocks` | `true` | Skip code blocks when speaking |
|
|
202
|
-
|
|
203
|
-
</details>
|
|
49
|
+
Config file: `~/.claude-voice/config.json`
|
|
204
50
|
|
|
205
51
|
<details>
|
|
206
|
-
<summary><strong>
|
|
52
|
+
<summary><strong>All options</strong></summary>
|
|
207
53
|
|
|
208
54
|
| Option | Default | Description |
|
|
209
55
|
|--------|---------|-------------|
|
|
56
|
+
| `tts.provider` | `macos-say` | macos-say, piper, openai, elevenlabs, espeak, disabled |
|
|
57
|
+
| `tts.autoSpeak` | `true` | Auto-speak Claude responses |
|
|
58
|
+
| `tts.maxSpeechLength` | `5000` | Max characters to speak |
|
|
210
59
|
| `stt.provider` | `sherpa-onnx` | sherpa-onnx, openai, whisper-local, disabled |
|
|
211
|
-
| `stt.language` | `en` | Language code (en, tr, de, fr, es, ja, zh
|
|
212
|
-
|
|
213
|
-
</details>
|
|
214
|
-
|
|
215
|
-
<details>
|
|
216
|
-
<summary><strong>Wake Word Options</strong></summary>
|
|
217
|
-
|
|
218
|
-
| Option | Default | Description |
|
|
219
|
-
|--------|---------|-------------|
|
|
60
|
+
| `stt.language` | `en` | Language code (en, tr, de, fr, es, ja, zh...) |
|
|
220
61
|
| `wakeWord.enabled` | `true` | Enable wake word detection |
|
|
221
|
-
| `wakeWord.provider` | `openwakeword` | openwakeword, sherpa-onnx,
|
|
222
|
-
| `wakeWord.keyword` | `jarvis` | Wake word: jarvis, claude, computer, etc. |
|
|
62
|
+
| `wakeWord.provider` | `openwakeword` | openwakeword, sherpa-onnx, picovoice |
|
|
223
63
|
| `wakeWord.sensitivity` | `0.5` | Detection sensitivity (0.0-1.0) |
|
|
224
|
-
| `
|
|
225
|
-
|
|
226
|
-
</details>
|
|
227
|
-
|
|
228
|
-
<details>
|
|
229
|
-
<summary><strong>Voice Output Options</strong></summary>
|
|
230
|
-
|
|
231
|
-
When enabled, Claude formats responses with a spoken abstract before technical details.
|
|
232
|
-
|
|
233
|
-
| Option | Default | Description |
|
|
234
|
-
|--------|---------|-------------|
|
|
235
|
-
| `voiceOutput.enabled` | `false` | Enable TTS-friendly formatting |
|
|
236
|
-
| `voiceOutput.abstractMarker` | `<!-- TTS -->` | Marker separating spoken/technical content |
|
|
237
|
-
| `voiceOutput.maxAbstractLength` | `200` | Max characters for spoken abstract |
|
|
238
|
-
|
|
239
|
-
```bash
|
|
240
|
-
claude-voice output enable # Enable voice-friendly formatting
|
|
241
|
-
claude-voice output status # Check current status
|
|
242
|
-
```
|
|
243
|
-
|
|
244
|
-
</details>
|
|
245
|
-
|
|
246
|
-
<details>
|
|
247
|
-
<summary><strong>Tool Announcements</strong></summary>
|
|
248
|
-
|
|
249
|
-
| Option | Default | Description |
|
|
250
|
-
|--------|---------|-------------|
|
|
64
|
+
| `voiceOutput.enabled` | `false` | TTS-friendly response formatting |
|
|
251
65
|
| `toolTTS.enabled` | `false` | Announce tool completions |
|
|
252
|
-
| `
|
|
253
|
-
| `toolTTS.announceErrors` | `true` | Announce tool errors |
|
|
254
|
-
|
|
255
|
-
</details>
|
|
256
|
-
|
|
257
|
-
<details>
|
|
258
|
-
<summary><strong>Keyboard Shortcut</strong></summary>
|
|
259
|
-
|
|
260
|
-
| Option | Default | Description |
|
|
261
|
-
|--------|---------|-------------|
|
|
262
|
-
| `shortcut.enabled` | `false` | Enable keyboard shortcut |
|
|
263
|
-
| `shortcut.key` | `CommandOrControl+Shift+Space` | Key combination |
|
|
264
|
-
|
|
265
|
-
**Modifiers:** CommandOrControl, Command, Control, Shift, Alt
|
|
266
|
-
|
|
267
|
-
</details>
|
|
268
|
-
|
|
269
|
-
<details>
|
|
270
|
-
<summary><strong>Recording Options</strong></summary>
|
|
271
|
-
|
|
272
|
-
| Option | Default | Description |
|
|
273
|
-
|--------|---------|-------------|
|
|
274
|
-
| `recording.sampleRate` | `16000` | Audio sample rate (Hz) |
|
|
275
|
-
| `recording.silenceThreshold` | `2500` | Silence duration to stop (ms) |
|
|
276
|
-
| `recording.silenceAmplitude` | `500` | Amplitude threshold |
|
|
66
|
+
| `recording.silenceThreshold` | `3500` | Silence duration to stop recording (ms) |
|
|
277
67
|
| `recording.maxDuration` | `60000` | Max recording length (ms) |
|
|
278
68
|
|
|
279
69
|
</details>
|
|
280
70
|
|
|
281
|
-
<details>
|
|
282
|
-
<summary><strong>Server Options</strong></summary>
|
|
283
|
-
|
|
284
|
-
| Option | Default | Description |
|
|
285
|
-
|--------|---------|-------------|
|
|
286
|
-
| `server.port` | `3456` | HTTP server port |
|
|
287
|
-
| `server.host` | `127.0.0.1` | Server host |
|
|
288
|
-
|
|
289
|
-
</details>
|
|
290
|
-
|
|
291
71
|
## CLI Commands
|
|
292
72
|
|
|
293
73
|
```bash
|
|
294
|
-
# Daemon
|
|
295
|
-
claude-voice start
|
|
296
|
-
claude-voice stop # Stop daemon
|
|
297
|
-
claude-voice restart # Restart daemon
|
|
298
|
-
claude-voice status # Check status
|
|
299
|
-
|
|
300
|
-
# Setup
|
|
301
|
-
claude-voice setup # Interactive setup wizard
|
|
302
|
-
claude-voice doctor # Diagnose issues
|
|
74
|
+
# Daemon
|
|
75
|
+
claude-voice start / stop / restart / status
|
|
303
76
|
|
|
304
|
-
#
|
|
305
|
-
claude-voice
|
|
306
|
-
claude-voice
|
|
307
|
-
claude-voice local # Use Piper TTS + Sherpa-ONNX STT (offline)
|
|
308
|
-
claude-voice local --download # Configure and download required models
|
|
309
|
-
claude-voice openwakeword # Switch to openWakeWord (better wake word detection)
|
|
310
|
-
claude-voice download-models # Download models for current config
|
|
77
|
+
# Setup & Diagnostics
|
|
78
|
+
claude-voice setup # Interactive wizard
|
|
79
|
+
claude-voice doctor # Diagnose issues
|
|
311
80
|
|
|
312
81
|
# Models & Voices
|
|
313
|
-
claude-voice model list
|
|
314
|
-
claude-voice
|
|
315
|
-
claude-voice voice list # List TTS voices
|
|
316
|
-
claude-voice voice download <id>
|
|
82
|
+
claude-voice model list / download <id> # STT models (whisper-tiny/base/small)
|
|
83
|
+
claude-voice voice list / download <id> # Piper TTS voices
|
|
317
84
|
|
|
318
|
-
#
|
|
319
|
-
claude-voice
|
|
320
|
-
claude-voice hooks status # Check installation
|
|
85
|
+
# Wake Word
|
|
86
|
+
claude-voice openwakeword --install # Better wake word detection
|
|
321
87
|
|
|
322
88
|
# Testing
|
|
323
|
-
claude-voice test-tts "Hello"
|
|
324
|
-
claude-voice test-stt
|
|
89
|
+
claude-voice test-tts "Hello"
|
|
90
|
+
claude-voice test-stt recording.wav
|
|
325
91
|
|
|
326
92
|
# Utilities
|
|
327
|
-
claude-voice logs
|
|
328
|
-
claude-voice
|
|
329
|
-
claude-voice devices # List audio devices
|
|
93
|
+
claude-voice logs -f # Follow daemon logs
|
|
94
|
+
claude-voice devices # List audio devices
|
|
330
95
|
```
|
|
331
96
|
|
|
332
|
-
Run `claude-voice --help` for all 50+ commands.
|
|
333
|
-
|
|
334
97
|
## Platform Support
|
|
335
98
|
|
|
336
|
-
|
|
|
337
|
-
|
|
338
|
-
| TTS |
|
|
99
|
+
| | macOS | Linux |
|
|
100
|
+
|---|---|---|
|
|
101
|
+
| TTS | Say, Piper, OpenAI, ElevenLabs | espeak, Piper, OpenAI, ElevenLabs |
|
|
339
102
|
| STT | Sherpa-ONNX, OpenAI | Sherpa-ONNX, OpenAI |
|
|
340
103
|
| Wake Word | openWakeWord, Sherpa-ONNX, Picovoice | openWakeWord, Sherpa-ONNX, Picovoice |
|
|
341
|
-
| Keyboard Shortcut | Cmd+Shift+Space | Ctrl+Shift+Space |
|
|
342
|
-
| Terminal Injection | AppleScript | xdotool (X11), dotool (Wayland) |
|
|
343
104
|
|
|
344
|
-
**
|
|
345
|
-
- Node.js 18+
|
|
346
|
-
- Microphone access
|
|
347
|
-
- Python 3 (for openWakeWord; falls back to Sherpa-ONNX if unavailable)
|
|
105
|
+
**Requires:** Node.js 18+, microphone access. Python 3 recommended (for openWakeWord).
|
|
348
106
|
|
|
349
107
|
## Troubleshooting
|
|
350
108
|
|
|
351
|
-
Run diagnostics:
|
|
352
|
-
|
|
353
|
-
```bash
|
|
354
|
-
claude-voice doctor
|
|
355
|
-
```
|
|
356
|
-
|
|
357
|
-
<details>
|
|
358
|
-
<summary><strong>Common Issues</strong></summary>
|
|
359
|
-
|
|
360
|
-
**Daemon won't start**
|
|
361
109
|
```bash
|
|
362
|
-
claude-voice
|
|
363
|
-
claude-voice
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
**No audio output**
|
|
367
|
-
```bash
|
|
368
|
-
claude-voice test-tts "Hello"
|
|
369
|
-
claude-voice config get tts.provider
|
|
110
|
+
claude-voice doctor # Auto-diagnose and fix issues
|
|
111
|
+
claude-voice logs # Check daemon logs
|
|
112
|
+
claude-voice start -f # Run in foreground for debugging
|
|
370
113
|
```
|
|
371
114
|
|
|
372
|
-
**Wake word not detecting
|
|
373
|
-
- Upgrade to openWakeWord: `claude-voice openwakeword --install`
|
|
374
|
-
- Check microphone permissions in System Preferences
|
|
375
|
-
- Run `claude-voice devices` to verify microphone
|
|
376
|
-
- Adjust sensitivity: `claude-voice config set wakeWord.sensitivity=0.7`
|
|
377
|
-
|
|
378
|
-
**Text not appearing in terminal**
|
|
379
|
-
- macOS: Allow Terminal in System Preferences > Privacy > Accessibility
|
|
380
|
-
- Run `claude-voice doctor` to check terminal injection status
|
|
381
|
-
|
|
382
|
-
</details>
|
|
383
|
-
|
|
384
|
-
## API Reference
|
|
385
|
-
|
|
386
|
-
<details>
|
|
387
|
-
<summary><strong>HTTP API (port 3456)</strong></summary>
|
|
388
|
-
|
|
389
|
-
| Endpoint | Method | Description |
|
|
390
|
-
|----------|--------|-------------|
|
|
391
|
-
| `/status` | GET | Daemon status and provider info |
|
|
392
|
-
| `/tts` | POST | Speak text `{"text": "...", "priority": false}` |
|
|
393
|
-
| `/tts/stop` | POST | Stop current playback |
|
|
394
|
-
| `/stt` | POST | Transcribe audio (multipart/form-data) |
|
|
395
|
-
| `/config` | GET | Get configuration |
|
|
396
|
-
| `/config` | POST | Update configuration |
|
|
397
|
-
|
|
398
|
-
</details>
|
|
399
|
-
|
|
400
|
-
## Changelog
|
|
401
|
-
|
|
402
|
-
### v1.5.0
|
|
403
|
-
- **openWakeWord as default** - Purpose-trained "Hey Jarvis" model (200K+ samples) replaces generic Sherpa-ONNX KWS for much better wake word accuracy
|
|
404
|
-
- **Smart provider detection** - Automatically uses openWakeWord when Python 3 is available, falls back to Sherpa-ONNX KWS otherwise
|
|
405
|
-
- **espeak TTS provider** - Linux native TTS support
|
|
406
|
-
- **Setup wizard upgrade** - All presets now prefer openWakeWord, Custom preset includes wake word provider choice
|
|
407
|
-
|
|
408
|
-
### v1.4.0
|
|
409
|
-
- **License change** - Switched to PolyForm Noncommercial (commercial use restricted)
|
|
410
|
-
|
|
411
|
-
### v1.3.19
|
|
412
|
-
- **`shh` command** - Stop TTS instantly
|
|
413
|
-
|
|
414
|
-
### v1.3.18
|
|
415
|
-
- **`listen` command** - Manual voice trigger without wake word
|
|
416
|
-
|
|
417
|
-
### v1.3.16
|
|
418
|
-
- **`stop talking` command** - Stop speech playback
|
|
419
|
-
- **dotool support** - Alternative input injection for Linux Wayland
|
|
420
|
-
- **Python detection** - Better cross-platform support
|
|
421
|
-
|
|
422
|
-
### v1.3.9
|
|
423
|
-
- **Pure Node.js model downloads** - No system dependencies (curl/wget not required)
|
|
424
|
-
|
|
425
|
-
### v1.3.3
|
|
426
|
-
- **Linux support** - Full Linux platform support added
|
|
427
|
-
|
|
428
|
-
## Contributing
|
|
429
|
-
|
|
430
|
-
Contributions are welcome.
|
|
431
|
-
|
|
432
|
-
```bash
|
|
433
|
-
git clone https://github.com/Menesahin/claude-voice-extension.git
|
|
434
|
-
cd claude-voice-extension
|
|
435
|
-
npm install
|
|
436
|
-
npm run dev
|
|
437
|
-
```
|
|
438
|
-
|
|
439
|
-
**Guidelines:**
|
|
440
|
-
- Run `npm run lint` before committing
|
|
441
|
-
- Add tests for new features
|
|
442
|
-
- Follow existing code patterns
|
|
115
|
+
**Wake word not detecting?** Run `claude-voice openwakeword --install` for better accuracy.
|
|
443
116
|
|
|
444
117
|
## License
|
|
445
118
|
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
Free for personal use, research, education, and non-profit organizations. Commercial use requires a separate license. See [LICENSE](LICENSE) for details.
|
|
119
|
+
[PolyForm Noncommercial 1.0.0](LICENSE) - Free for personal use, research, and education.
|
|
449
120
|
|
|
450
121
|
---
|
|
451
122
|
|
|
452
|
-
[
|
|
453
|
-
[Issues](https://github.com/Menesahin/claude-voice-extension/issues) |
|
|
454
|
-
[Releases](https://github.com/Menesahin/claude-voice-extension/releases)
|
|
123
|
+
[Issues](https://github.com/Menesahin/claude-voice-extension/issues) | [Releases](https://github.com/Menesahin/claude-voice-extension/releases)
|
package/package.json
CHANGED
package/plugin/plugin.json
CHANGED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: listen
|
|
3
|
+
description: Use when the user wants to speak a voice command. Trigger words include listen, dinle, voice, sesli komut, mikrofon, microphone, speak, konuş, söyle, ses, record, kayıt, "I want to say something", "bir şey söylemek istiyorum"
|
|
4
|
+
user_invocable: true
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Voice Listen - Activate Microphone
|
|
8
|
+
|
|
9
|
+
When this skill is triggered, immediately run the following command to activate voice listening:
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
claude-voice listen
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
This activates the microphone and waits for the user to speak a command. The voice extension will:
|
|
16
|
+
1. Play a "ping" sound to indicate it's listening
|
|
17
|
+
2. Record the user's speech
|
|
18
|
+
3. Transcribe it using STT (speech-to-text)
|
|
19
|
+
4. Inject the transcribed text into the terminal
|
|
20
|
+
|
|
21
|
+
After running the command, tell the user you're listening and they can speak their command now.
|
|
22
|
+
|
|
23
|
+
If the command fails with "Daemon not running", run `claude-voice start` first, then retry `claude-voice listen`.
|
|
@@ -1,174 +1,47 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: claude-voice
|
|
3
3
|
description: Use when user asks about voice commands, TTS, STT, wake word, speech, microphone, audio, jarvis, or claude-voice CLI commands
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.5.0
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
# Claude Voice Extension
|
|
8
8
|
|
|
9
|
-
Voice interface for Claude Code - TTS
|
|
9
|
+
Voice interface for Claude Code - TTS, STT, and wake word detection.
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## Key Commands
|
|
12
12
|
|
|
13
13
|
```bash
|
|
14
|
-
claude-voice start
|
|
15
|
-
claude-voice
|
|
16
|
-
claude-voice
|
|
17
|
-
claude-voice
|
|
14
|
+
claude-voice start / stop / restart / status # Daemon management
|
|
15
|
+
claude-voice setup # Interactive setup wizard
|
|
16
|
+
claude-voice doctor # Diagnose issues
|
|
17
|
+
claude-voice listen # Listen for voice command (no wake word needed)
|
|
18
|
+
claude-voice openai # Use OpenAI TTS + STT (cloud)
|
|
19
|
+
claude-voice local --download # Use Piper TTS + Sherpa-ONNX STT (offline)
|
|
20
|
+
claude-voice openwakeword --install # Better wake word detection
|
|
21
|
+
claude-voice config set <key>=<value> # Change settings
|
|
22
|
+
claude-voice model list / download <id> # STT models
|
|
23
|
+
claude-voice voice list / download <id> # Piper TTS voices
|
|
24
|
+
claude-voice test-tts "Hello" # Test TTS
|
|
25
|
+
claude-voice logs -f # Follow daemon logs
|
|
18
26
|
```
|
|
19
27
|
|
|
20
|
-
##
|
|
21
|
-
|
|
22
|
-
### Daemon Management
|
|
23
|
-
```bash
|
|
24
|
-
claude-voice start [-f] # Start daemon (-f for foreground)
|
|
25
|
-
claude-voice stop # Stop daemon
|
|
26
|
-
claude-voice restart # Restart daemon
|
|
27
|
-
claude-voice status # Check daemon status and providers
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
### Setup & Diagnostics
|
|
31
|
-
```bash
|
|
32
|
-
claude-voice setup # Interactive setup wizard
|
|
33
|
-
claude-voice doctor # Full system diagnostics
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
### Configuration
|
|
37
|
-
```bash
|
|
38
|
-
claude-voice config # View full configuration
|
|
39
|
-
claude-voice config get <key> # Get value (e.g., tts.provider)
|
|
40
|
-
claude-voice config set <key>=<value> # Set value (e.g., stt.language=tr)
|
|
41
|
-
claude-voice config reset # Reset to defaults
|
|
42
|
-
claude-voice config edit # Open in $EDITOR
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
### Hooks
|
|
46
|
-
```bash
|
|
47
|
-
claude-voice hooks install # Install Claude Code hooks
|
|
48
|
-
claude-voice hooks uninstall # Remove hooks
|
|
49
|
-
claude-voice hooks status # Check hooks installation
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
### Voice Output (TTS-friendly formatting)
|
|
53
|
-
```bash
|
|
54
|
-
claude-voice output enable # Enable <!-- TTS --> abstract extraction
|
|
55
|
-
claude-voice output disable # Disable voice output formatting
|
|
56
|
-
claude-voice output status # Show current settings
|
|
57
|
-
claude-voice output config # Configure (--length, --marker)
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
### STT Models (Sherpa-ONNX)
|
|
61
|
-
```bash
|
|
62
|
-
claude-voice model list # List available/installed models
|
|
63
|
-
claude-voice model download <id> # Download model (whisper-tiny/base/small)
|
|
64
|
-
claude-voice model remove <id> # Remove installed model
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
### TTS Voices (Piper)
|
|
68
|
-
```bash
|
|
69
|
-
claude-voice voice list # List available/installed voices
|
|
70
|
-
claude-voice voice download <id> # Download voice
|
|
71
|
-
claude-voice voice remove <id> # Remove installed voice
|
|
72
|
-
claude-voice voice status # Show current voice info
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
### Testing
|
|
76
|
-
```bash
|
|
77
|
-
claude-voice test-tts [text] # Test TTS with optional text
|
|
78
|
-
claude-voice test-stt <file> # Test STT transcription
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
### Utilities
|
|
82
|
-
```bash
|
|
83
|
-
claude-voice voices # List system TTS voices (macOS say)
|
|
84
|
-
claude-voice devices # List audio input devices
|
|
85
|
-
claude-voice logs # View daemon logs
|
|
86
|
-
claude-voice logs -f # Follow logs (tail -f)
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
## Configuration Options
|
|
28
|
+
## Configuration
|
|
90
29
|
|
|
91
30
|
Config file: `~/.claude-voice/config.json`
|
|
92
31
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
tts.provider
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
### STT Settings
|
|
105
|
-
```
|
|
106
|
-
stt.provider # sherpa-onnx | openai | whisper-local | disabled
|
|
107
|
-
stt.language # en (ISO code: en, tr, de, fr, es, etc.)
|
|
108
|
-
stt.sherpaOnnx.model # whisper-tiny | whisper-base | whisper-small
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
### Wake Word Settings
|
|
112
|
-
```
|
|
113
|
-
wakeWord.enabled # true
|
|
114
|
-
wakeWord.keyword # jarvis
|
|
115
|
-
wakeWord.sensitivity # 0.5 (0.0-1.0)
|
|
116
|
-
wakeWord.playSound # true - play Ping/Pop sounds
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
### Voice Output Settings
|
|
120
|
-
```
|
|
121
|
-
voiceOutput.enabled # true - TTS-friendly abstracts
|
|
122
|
-
voiceOutput.abstractMarker # <!-- TTS -->
|
|
123
|
-
voiceOutput.maxAbstractLength # 200
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
### Tool TTS Settings
|
|
127
|
-
```
|
|
128
|
-
toolTTS.enabled # true - announce tool completions
|
|
129
|
-
toolTTS.mode # summarize | completion
|
|
130
|
-
toolTTS.tools.* # per-tool toggle (Read, Grep, Bash, Write, Edit, etc.)
|
|
131
|
-
```
|
|
32
|
+
| Setting | Default | Options |
|
|
33
|
+
|---------|---------|---------|
|
|
34
|
+
| `tts.provider` | `macos-say` | macos-say, piper, openai, elevenlabs, espeak |
|
|
35
|
+
| `stt.provider` | `sherpa-onnx` | sherpa-onnx, openai, whisper-local |
|
|
36
|
+
| `stt.language` | `en` | Any ISO code (en, tr, de, fr, es...) |
|
|
37
|
+
| `wakeWord.provider` | `openwakeword` | openwakeword, sherpa-onnx, picovoice |
|
|
38
|
+
| `wakeWord.sensitivity` | `0.5` | 0.0 - 1.0 |
|
|
39
|
+
| `voiceOutput.enabled` | `false` | true/false |
|
|
40
|
+
| `toolTTS.enabled` | `false` | true/false |
|
|
132
41
|
|
|
133
42
|
## File Locations
|
|
134
43
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
| `~/.claude-voice/models/` | STT models (Whisper ONNX) |
|
|
140
|
-
| `~/.claude-voice/voices/` | Piper TTS voices |
|
|
141
|
-
| `~/.claude-voice/daemon.log` | Daemon logs |
|
|
142
|
-
|
|
143
|
-
## Platform Support
|
|
144
|
-
|
|
145
|
-
| Feature | macOS | Linux |
|
|
146
|
-
|---------|-------|-------|
|
|
147
|
-
| TTS | say, piper, openai | espeak, piper, openai |
|
|
148
|
-
| STT | sherpa-onnx, openai | sherpa-onnx, openai |
|
|
149
|
-
| Wake word | sox (rec) | arecord |
|
|
150
|
-
| Terminal inject | AppleScript | xdotool (limited) |
|
|
151
|
-
|
|
152
|
-
## Common Tasks
|
|
153
|
-
|
|
154
|
-
**Enable voice responses:**
|
|
155
|
-
```bash
|
|
156
|
-
claude-voice config set tts.autoSpeak=true
|
|
157
|
-
claude-voice config set voiceOutput.enabled=true
|
|
158
|
-
```
|
|
159
|
-
|
|
160
|
-
**Change TTS provider:**
|
|
161
|
-
```bash
|
|
162
|
-
claude-voice config set tts.provider=openai
|
|
163
|
-
claude-voice config set tts.openai.voice=nova
|
|
164
|
-
```
|
|
165
|
-
|
|
166
|
-
**Change STT language:**
|
|
167
|
-
```bash
|
|
168
|
-
claude-voice config set stt.language=tr
|
|
169
|
-
```
|
|
170
|
-
|
|
171
|
-
**Download better STT model:**
|
|
172
|
-
```bash
|
|
173
|
-
claude-voice model download whisper-small
|
|
174
|
-
```
|
|
44
|
+
- Config: `~/.claude-voice/config.json`
|
|
45
|
+
- API keys: `~/.claude-voice/.env`
|
|
46
|
+
- Models: `~/.claude-voice/models/`
|
|
47
|
+
- Logs: `~/.claude-voice/daemon.log`
|
|
@@ -20,19 +20,23 @@ function installPlugin(sourceDir) {
|
|
|
20
20
|
const pluginsDir = path.join(os.homedir(), '.claude', 'plugins');
|
|
21
21
|
const pluginDir = path.join(pluginsDir, PLUGIN_NAME);
|
|
22
22
|
const manifestDir = path.join(pluginDir, '.claude-plugin');
|
|
23
|
-
const
|
|
23
|
+
const voiceControlSkillDir = path.join(pluginDir, 'skills', 'voice-control');
|
|
24
|
+
const listenSkillDir = path.join(pluginDir, 'skills', 'listen');
|
|
24
25
|
|
|
25
26
|
// Create directories
|
|
26
27
|
fs.mkdirSync(manifestDir, { recursive: true });
|
|
27
|
-
fs.mkdirSync(
|
|
28
|
+
fs.mkdirSync(voiceControlSkillDir, { recursive: true });
|
|
29
|
+
fs.mkdirSync(listenSkillDir, { recursive: true });
|
|
28
30
|
|
|
29
31
|
// Source paths
|
|
30
32
|
const sourcePluginJson = path.join(sourceDir, 'plugin', 'plugin.json');
|
|
31
|
-
const
|
|
33
|
+
const sourceVoiceControlSkill = path.join(sourceDir, 'plugin', 'skills', 'voice-control', 'SKILL.md');
|
|
34
|
+
const sourceListenSkill = path.join(sourceDir, 'plugin', 'skills', 'listen', 'SKILL.md');
|
|
32
35
|
|
|
33
36
|
// Destination paths
|
|
34
37
|
const destPluginJson = path.join(manifestDir, 'plugin.json');
|
|
35
|
-
const
|
|
38
|
+
const destVoiceControlSkill = path.join(voiceControlSkillDir, 'SKILL.md');
|
|
39
|
+
const destListenSkill = path.join(listenSkillDir, 'SKILL.md');
|
|
36
40
|
|
|
37
41
|
// Copy plugin.json
|
|
38
42
|
if (fs.existsSync(sourcePluginJson)) {
|
|
@@ -41,15 +45,18 @@ function installPlugin(sourceDir) {
|
|
|
41
45
|
// Create minimal manifest if source doesn't exist
|
|
42
46
|
const manifest = {
|
|
43
47
|
name: PLUGIN_NAME,
|
|
44
|
-
version: '1.
|
|
48
|
+
version: '1.5.0',
|
|
45
49
|
description: 'Voice interface for Claude Code - TTS, STT, wake word detection'
|
|
46
50
|
};
|
|
47
51
|
fs.writeFileSync(destPluginJson, JSON.stringify(manifest, null, 2));
|
|
48
52
|
}
|
|
49
53
|
|
|
50
|
-
// Copy
|
|
51
|
-
if (fs.existsSync(
|
|
52
|
-
fs.copyFileSync(
|
|
54
|
+
// Copy skills
|
|
55
|
+
if (fs.existsSync(sourceVoiceControlSkill)) {
|
|
56
|
+
fs.copyFileSync(sourceVoiceControlSkill, destVoiceControlSkill);
|
|
57
|
+
}
|
|
58
|
+
if (fs.existsSync(sourceListenSkill)) {
|
|
59
|
+
fs.copyFileSync(sourceListenSkill, destListenSkill);
|
|
53
60
|
}
|
|
54
61
|
|
|
55
62
|
return pluginDir;
|