claude-voice 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +395 -0
- package/bin/claude-voice +29 -0
- package/config/default.json +109 -0
- package/config/voice-prompt.md +27 -0
- package/dist/cli.d.ts +8 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +1103 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +140 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +179 -0
- package/dist/config.js.map +1 -0
- package/dist/env.d.ts +40 -0
- package/dist/env.d.ts.map +1 -0
- package/dist/env.js +175 -0
- package/dist/env.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +140 -0
- package/dist/index.js.map +1 -0
- package/dist/platform/index.d.ts +35 -0
- package/dist/platform/index.d.ts.map +1 -0
- package/dist/platform/index.js +170 -0
- package/dist/platform/index.js.map +1 -0
- package/dist/server.d.ts +5 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +185 -0
- package/dist/server.js.map +1 -0
- package/dist/stt/index.d.ts +15 -0
- package/dist/stt/index.d.ts.map +1 -0
- package/dist/stt/index.js +54 -0
- package/dist/stt/index.js.map +1 -0
- package/dist/stt/providers/openai.d.ts +15 -0
- package/dist/stt/providers/openai.d.ts.map +1 -0
- package/dist/stt/providers/openai.js +74 -0
- package/dist/stt/providers/openai.js.map +1 -0
- package/dist/stt/providers/sherpa-onnx.d.ts +50 -0
- package/dist/stt/providers/sherpa-onnx.d.ts.map +1 -0
- package/dist/stt/providers/sherpa-onnx.js +237 -0
- package/dist/stt/providers/sherpa-onnx.js.map +1 -0
- package/dist/stt/providers/whisper-local.d.ts +19 -0
- package/dist/stt/providers/whisper-local.d.ts.map +1 -0
- package/dist/stt/providers/whisper-local.js +141 -0
- package/dist/stt/providers/whisper-local.js.map +1 -0
- package/dist/terminal/input-injector.d.ts +55 -0
- package/dist/terminal/input-injector.d.ts.map +1 -0
- package/dist/terminal/input-injector.js +189 -0
- package/dist/terminal/input-injector.js.map +1 -0
- package/dist/tts/index.d.ts +20 -0
- package/dist/tts/index.d.ts.map +1 -0
- package/dist/tts/index.js +72 -0
- package/dist/tts/index.js.map +1 -0
- package/dist/tts/providers/elevenlabs.d.ts +23 -0
- package/dist/tts/providers/elevenlabs.d.ts.map +1 -0
- package/dist/tts/providers/elevenlabs.js +142 -0
- package/dist/tts/providers/elevenlabs.js.map +1 -0
- package/dist/tts/providers/macos-say.d.ts +17 -0
- package/dist/tts/providers/macos-say.d.ts.map +1 -0
- package/dist/tts/providers/macos-say.js +72 -0
- package/dist/tts/providers/macos-say.js.map +1 -0
- package/dist/tts/providers/openai.d.ts +19 -0
- package/dist/tts/providers/openai.d.ts.map +1 -0
- package/dist/tts/providers/openai.js +118 -0
- package/dist/tts/providers/openai.js.map +1 -0
- package/dist/tts/providers/piper.d.ts +48 -0
- package/dist/tts/providers/piper.d.ts.map +1 -0
- package/dist/tts/providers/piper.js +417 -0
- package/dist/tts/providers/piper.js.map +1 -0
- package/dist/voice-input.d.ts +9 -0
- package/dist/voice-input.d.ts.map +1 -0
- package/dist/voice-input.js +137 -0
- package/dist/voice-input.js.map +1 -0
- package/dist/wake-word/index.d.ts +19 -0
- package/dist/wake-word/index.d.ts.map +1 -0
- package/dist/wake-word/index.js +200 -0
- package/dist/wake-word/index.js.map +1 -0
- package/dist/wake-word/recorder.d.ts +19 -0
- package/dist/wake-word/recorder.d.ts.map +1 -0
- package/dist/wake-word/recorder.js +145 -0
- package/dist/wake-word/recorder.js.map +1 -0
- package/hooks/notification.js +125 -0
- package/hooks/post-tool-use.js +374 -0
- package/hooks/session-start.js +212 -0
- package/hooks/stop.js +254 -0
- package/models/.gitkeep +0 -0
- package/package.json +80 -0
- package/python/stt_service.py +59 -0
- package/python/voice_input.py +154 -0
- package/scripts/install.sh +147 -0
- package/scripts/listen.py +161 -0
- package/scripts/postinstall.js +57 -0
- package/scripts/record.sh +79 -0
- package/scripts/setup-hooks.sh +22 -0
- package/scripts/voice-input.sh +66 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
# Claude Voice Extension
|
|
2
|
+
|
|
3
|
+
Voice interface extension for Claude Code - enables speech-to-text input, text-to-speech output, and wake word detection.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Voice Input (STT)**: Speak commands to Claude using wake word or push-to-talk
|
|
8
|
+
- **Voice Output (TTS)**: Claude's responses are spoken aloud
|
|
9
|
+
- **Voice-Friendly Formatting**: Claude structures responses with TTS-optimized abstracts
|
|
10
|
+
- **Wake Word**: Say "Jarvis" to start speaking a command
|
|
11
|
+
- **Voice Notifications**: Audio alerts for permission prompts and idle states
|
|
12
|
+
- **Multiple Providers**: Supports local and cloud-based speech services
|
|
13
|
+
|
|
14
|
+
## Quick Start
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# Install globally
|
|
18
|
+
npm install -g claude-voice
|
|
19
|
+
|
|
20
|
+
# Run interactive setup
|
|
21
|
+
claude-voice setup
|
|
22
|
+
|
|
23
|
+
# Start the daemon
|
|
24
|
+
claude-voice start
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
### Prerequisites
|
|
30
|
+
|
|
31
|
+
- Node.js 18+
|
|
32
|
+
- macOS (primary) or Linux
|
|
33
|
+
- Microphone access
|
|
34
|
+
|
|
35
|
+
### Install from npm
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
npm install -g claude-voice
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Install from source
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
git clone https://github.com/anthropics/claude-voice.git
|
|
45
|
+
cd claude-voice
|
|
46
|
+
npm install
|
|
47
|
+
npm run build
|
|
48
|
+
npm link
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Configuration
|
|
52
|
+
|
|
53
|
+
Configuration is stored in `~/.claude-voice/config.json`.
|
|
54
|
+
|
|
55
|
+
### Interactive Setup
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
claude-voice setup
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Using the CLI
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# View full configuration
|
|
65
|
+
claude-voice config
|
|
66
|
+
|
|
67
|
+
# Get a specific value
|
|
68
|
+
claude-voice config get tts.provider
|
|
69
|
+
|
|
70
|
+
# Set a value
|
|
71
|
+
claude-voice config set tts.autoSpeak=false
|
|
72
|
+
|
|
73
|
+
# Reset to defaults
|
|
74
|
+
claude-voice config reset
|
|
75
|
+
|
|
76
|
+
# Edit in your editor
|
|
77
|
+
claude-voice config edit
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Configuration Options
|
|
81
|
+
|
|
82
|
+
| Option | Type | Default | Description |
|
|
83
|
+
|--------|------|---------|-------------|
|
|
84
|
+
| `tts.provider` | string | `macos-say` | TTS provider: `macos-say`, `openai`, `elevenlabs`, `piper`, `espeak`, `disabled` |
|
|
85
|
+
| `tts.autoSpeak` | boolean | `false` | Automatically speak Claude's responses |
|
|
86
|
+
| `tts.maxSpeechLength` | number | `500` | Maximum characters to speak |
|
|
87
|
+
| `tts.skipCodeBlocks` | boolean | `true` | Skip code blocks when speaking |
|
|
88
|
+
| `stt.provider` | string | `openai` | STT provider: `openai`, `whisper-local`, `sherpa-onnx`, `disabled` |
|
|
89
|
+
| `stt.language` | string | `en` | Default language for transcription |
|
|
90
|
+
| `wakeWord.enabled` | boolean | `true` | Enable wake word detection |
|
|
91
|
+
| `wakeWord.keyword` | string | `jarvis` | Wake word keyword |
|
|
92
|
+
| `wakeWord.sensitivity` | number | `0.5` | Wake word sensitivity (0.0-1.0) |
|
|
93
|
+
| `wakeWord.playSound` | boolean | `true` | Play sound when wake word detected |
|
|
94
|
+
| `notifications.enabled` | boolean | `true` | Enable voice notifications |
|
|
95
|
+
| `notifications.permissionPrompt` | boolean | `true` | Speak permission prompts |
|
|
96
|
+
| `notifications.idlePrompt` | boolean | `true` | Speak idle prompts |
|
|
97
|
+
| `notifications.errors` | boolean | `false` | Speak error notifications |
|
|
98
|
+
| `voiceOutput.enabled` | boolean | `false` | Enable TTS-friendly response formatting |
|
|
99
|
+
| `voiceOutput.abstractMarker` | string | `<!-- TTS -->` | Marker separating spoken/technical content |
|
|
100
|
+
| `voiceOutput.maxAbstractLength` | number | `200` | Max characters for spoken abstract |
|
|
101
|
+
| `voiceOutput.promptTemplate` | string | `null` | Custom prompt template path |
|
|
102
|
+
| `toolTTS.enabled` | boolean | `true` | Enable TTS announcements for tool use |
|
|
103
|
+
| `toolTTS.mode` | string | `summarize` | Tool announcement mode: `summarize` or `completion` |
|
|
104
|
+
| `toolTTS.announceErrors` | boolean | `true` | Announce tool execution errors |
|
|
105
|
+
| `toolTTS.maxSummaryLength` | number | `100` | Max characters for tool summaries |
|
|
106
|
+
| `terminal.injectionMethod` | string | `auto` | Terminal injection: `auto`, `applescript`, `xdotool` |
|
|
107
|
+
| `terminal.targetTerminal` | string | `auto` | Target terminal: `auto`, `iterm`, `terminal`, etc. |
|
|
108
|
+
| `terminal.pressEnterAfterInput` | boolean | `true` | Press enter after injecting text |
|
|
109
|
+
| `recording.sampleRate` | number | `16000` | Audio sample rate in Hz |
|
|
110
|
+
| `recording.channels` | number | `1` | Audio channels (mono=1, stereo=2) |
|
|
111
|
+
| `recording.silenceThreshold` | number | `2000` | Silence duration (ms) to stop recording |
|
|
112
|
+
| `recording.silenceAmplitude` | number | `500` | Amplitude threshold for silence detection |
|
|
113
|
+
| `recording.maxDuration` | number | `30000` | Maximum recording duration (ms) |
|
|
114
|
+
| `server.port` | number | `3456` | Daemon HTTP server port |
|
|
115
|
+
| `server.host` | string | `127.0.0.1` | Daemon HTTP server host |
|
|
116
|
+
| `debug` | boolean | `false` | Enable debug logging |
|
|
117
|
+
|
|
118
|
+
## TTS Providers
|
|
119
|
+
|
|
120
|
+
### macOS Say (Default on macOS)
|
|
121
|
+
No API key required. Uses built-in macOS speech synthesis.
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# List available voices
|
|
125
|
+
claude-voice voices
|
|
126
|
+
|
|
127
|
+
# Change voice
|
|
128
|
+
claude-voice config set tts.macos.voice=Alex
|
|
129
|
+
|
|
130
|
+
# Adjust speed (words per minute)
|
|
131
|
+
claude-voice config set tts.macos.rate=180
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### OpenAI TTS
|
|
135
|
+
Requires `OPENAI_API_KEY`. High-quality neural voices.
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
# Set API key
|
|
139
|
+
export OPENAI_API_KEY="sk-..."
|
|
140
|
+
|
|
141
|
+
# Or save to .env file
|
|
142
|
+
echo "OPENAI_API_KEY=sk-..." >> ~/.claude-voice/.env
|
|
143
|
+
|
|
144
|
+
# Switch to OpenAI
|
|
145
|
+
claude-voice config set tts.provider=openai
|
|
146
|
+
|
|
147
|
+
# Choose voice (alloy, echo, fable, onyx, nova, shimmer)
|
|
148
|
+
claude-voice config set tts.openai.voice=nova
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### ElevenLabs
|
|
152
|
+
Requires `ELEVENLABS_API_KEY`. Premium voice cloning.
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
export ELEVENLABS_API_KEY="..."
|
|
156
|
+
claude-voice config set tts.provider=elevenlabs
|
|
157
|
+
claude-voice config set tts.elevenlabs.voiceId=YOUR_VOICE_ID
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## STT Providers
|
|
161
|
+
|
|
162
|
+
### Sherpa-ONNX (FREE - Recommended)
|
|
163
|
+
Embedded, offline speech recognition. No API key required!
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
# List available models
|
|
167
|
+
claude-voice model list
|
|
168
|
+
|
|
169
|
+
# Download a model (75-488MB)
|
|
170
|
+
claude-voice model download whisper-tiny
|
|
171
|
+
|
|
172
|
+
# Switch to Sherpa-ONNX
|
|
173
|
+
claude-voice config set stt.provider=sherpa-onnx
|
|
174
|
+
|
|
175
|
+
# Set language (supports 100+ languages including Turkish)
|
|
176
|
+
claude-voice config set stt.language=tr
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Available models:
|
|
180
|
+
| Model | Size | Speed | Accuracy |
|
|
181
|
+
|-------|------|-------|----------|
|
|
182
|
+
| `whisper-tiny` | 75 MB | Fast | Good |
|
|
183
|
+
| `whisper-base` | 142 MB | Medium | Better |
|
|
184
|
+
| `whisper-small` | 488 MB | Slower | Best |
|
|
185
|
+
|
|
186
|
+
### OpenAI Whisper API
|
|
187
|
+
Fast cloud transcription. Requires `OPENAI_API_KEY`.
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
claude-voice config set stt.provider=openai
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### Local Whisper
|
|
194
|
+
Runs locally on your machine. Requires Python and whisper:
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
pip install openai-whisper
|
|
198
|
+
claude-voice config set stt.provider=whisper-local
|
|
199
|
+
|
|
200
|
+
# Choose model (tiny, base, small, medium, large)
|
|
201
|
+
claude-voice config set stt.whisperLocal.model=base
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
## Wake Word Detection
|
|
205
|
+
|
|
206
|
+
Wake word detection uses Picovoice Porcupine. Get a free API key at [picovoice.ai](https://picovoice.ai/).
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
# Set API key
|
|
210
|
+
export PICOVOICE_ACCESS_KEY="your-key"
|
|
211
|
+
# Or save to .env
|
|
212
|
+
echo "PICOVOICE_ACCESS_KEY=your-key" >> ~/.claude-voice/.env
|
|
213
|
+
|
|
214
|
+
# Enable wake word
|
|
215
|
+
claude-voice config set wakeWord.enabled=true
|
|
216
|
+
|
|
217
|
+
# Adjust sensitivity (0.0-1.0, higher = more sensitive)
|
|
218
|
+
claude-voice config set wakeWord.sensitivity=0.6
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## Voice Output Formatting
|
|
222
|
+
|
|
223
|
+
When enabled, Claude structures responses with a TTS-friendly abstract at the beginning. This makes voice output more natural and conversational.
|
|
224
|
+
|
|
225
|
+
### How It Works
|
|
226
|
+
|
|
227
|
+
1. Claude adds a brief conversational summary before the `<!-- TTS -->` marker
|
|
228
|
+
2. Technical details (code, file paths, etc.) go after the marker
|
|
229
|
+
3. Only the abstract portion is spoken via TTS
|
|
230
|
+
|
|
231
|
+
**Example Claude Response:**
|
|
232
|
+
```
|
|
233
|
+
I found and fixed the authentication bug. The issue was a missing null check.
|
|
234
|
+
|
|
235
|
+
<!-- TTS -->
|
|
236
|
+
|
|
237
|
+
**Technical Details:**
|
|
238
|
+
Modified `auth.ts:45` to add proper null checking...
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
The TTS will only speak: *"I found and fixed the authentication bug. The issue was a missing null check."*
|
|
242
|
+
|
|
243
|
+
### Configuration
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
# Enable/disable voice output formatting
|
|
247
|
+
claude-voice output enable
|
|
248
|
+
claude-voice output disable
|
|
249
|
+
|
|
250
|
+
# Check current status
|
|
251
|
+
claude-voice output status
|
|
252
|
+
|
|
253
|
+
# Configure settings
|
|
254
|
+
claude-voice output config --length 300 # Max abstract length
|
|
255
|
+
claude-voice output config --marker "---" # Custom marker
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
### Custom Prompt Template
|
|
259
|
+
|
|
260
|
+
Create `~/.claude-voice/voice-prompt.md` to customize how Claude formats responses:
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
# Copy the default template
|
|
264
|
+
cp /path/to/claude-voice/config/voice-prompt.md ~/.claude-voice/
|
|
265
|
+
|
|
266
|
+
# Edit to your preferences
|
|
267
|
+
nano ~/.claude-voice/voice-prompt.md
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
Template variables:
|
|
271
|
+
- `{{MARKER}}` - Replaced with your configured marker
|
|
272
|
+
- `{{MAX_LENGTH}}` - Replaced with max abstract length
|
|
273
|
+
|
|
274
|
+
## CLI Reference
|
|
275
|
+
|
|
276
|
+
```
|
|
277
|
+
claude-voice <command>
|
|
278
|
+
|
|
279
|
+
Core Commands:
|
|
280
|
+
start Start the daemon
|
|
281
|
+
stop Stop the daemon
|
|
282
|
+
restart Restart the daemon
|
|
283
|
+
status Check status
|
|
284
|
+
|
|
285
|
+
Setup:
|
|
286
|
+
setup Interactive setup wizard
|
|
287
|
+
doctor Diagnose issues
|
|
288
|
+
|
|
289
|
+
Configuration:
|
|
290
|
+
config View configuration
|
|
291
|
+
config get <key> Get a value
|
|
292
|
+
config set <k>=<v> Set a value
|
|
293
|
+
config reset Reset to defaults
|
|
294
|
+
config edit Edit in $EDITOR
|
|
295
|
+
|
|
296
|
+
Hooks:
|
|
297
|
+
hooks install Install Claude Code hooks
|
|
298
|
+
hooks uninstall Remove hooks
|
|
299
|
+
hooks status Check hooks status
|
|
300
|
+
|
|
301
|
+
Voice Output:
|
|
302
|
+
output enable Enable TTS-friendly formatting
|
|
303
|
+
output disable Disable TTS-friendly formatting
|
|
304
|
+
output status Show voice output settings
|
|
305
|
+
output config Configure voice output options
|
|
306
|
+
|
|
307
|
+
Testing:
|
|
308
|
+
test-tts [text] Test TTS
|
|
309
|
+
test-stt <file> Test STT
|
|
310
|
+
|
|
311
|
+
Models:
|
|
312
|
+
model list List available STT models
|
|
313
|
+
model download <id> Download a model
|
|
314
|
+
model remove <id> Remove an installed model
|
|
315
|
+
|
|
316
|
+
Utilities:
|
|
317
|
+
voices List TTS voices
|
|
318
|
+
devices List audio devices
|
|
319
|
+
logs View logs
|
|
320
|
+
logs -f Follow logs
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
## Troubleshooting
|
|
324
|
+
|
|
325
|
+
### Run the doctor command
|
|
326
|
+
|
|
327
|
+
```bash
|
|
328
|
+
claude-voice doctor
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
This will check:
|
|
332
|
+
- Node.js version
|
|
333
|
+
- Platform support
|
|
334
|
+
- Native TTS availability
|
|
335
|
+
- Terminal injection support
|
|
336
|
+
- Configuration validity
|
|
337
|
+
- Hooks installation
|
|
338
|
+
- API keys
|
|
339
|
+
- Daemon status
|
|
340
|
+
|
|
341
|
+
### Common Issues
|
|
342
|
+
|
|
343
|
+
**Daemon won't start**
|
|
344
|
+
```bash
|
|
345
|
+
# Check logs
|
|
346
|
+
claude-voice logs
|
|
347
|
+
|
|
348
|
+
# Run in foreground for debugging
|
|
349
|
+
claude-voice start -f
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
**No audio output**
|
|
353
|
+
```bash
|
|
354
|
+
# Test TTS directly
|
|
355
|
+
claude-voice test-tts "Hello world"
|
|
356
|
+
|
|
357
|
+
# Check provider
|
|
358
|
+
claude-voice config get tts.provider
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
**Wake word not detecting**
|
|
362
|
+
- Ensure `PICOVOICE_ACCESS_KEY` is set
|
|
363
|
+
- Check microphone permissions in System Preferences
|
|
364
|
+
- Try `claude-voice devices` to see available microphones
|
|
365
|
+
|
|
366
|
+
**Text not appearing in terminal**
|
|
367
|
+
- On macOS: Allow Terminal in Accessibility settings
|
|
368
|
+
- Check `claude-voice doctor` for terminal injection status
|
|
369
|
+
|
|
370
|
+
## API Reference
|
|
371
|
+
|
|
372
|
+
The daemon exposes an HTTP API on port 3456:
|
|
373
|
+
|
|
374
|
+
| Endpoint | Method | Description |
|
|
375
|
+
|----------|--------|-------------|
|
|
376
|
+
| `/status` | GET | Daemon status |
|
|
377
|
+
| `/tts` | POST | Speak text `{ "text": "...", "priority": false }` |
|
|
378
|
+
| `/stt` | POST | Transcribe audio (multipart/form-data) |
|
|
379
|
+
| `/config` | GET | Get configuration |
|
|
380
|
+
| `/config` | POST | Update configuration |
|
|
381
|
+
| `/tts/stop` | POST | Stop current playback |
|
|
382
|
+
|
|
383
|
+
## Environment Variables
|
|
384
|
+
|
|
385
|
+
| Variable | Purpose |
|
|
386
|
+
|----------|---------|
|
|
387
|
+
| `OPENAI_API_KEY` | OpenAI TTS and Whisper API |
|
|
388
|
+
| `ELEVENLABS_API_KEY` | ElevenLabs TTS |
|
|
389
|
+
| `PICOVOICE_ACCESS_KEY` | Wake word detection |
|
|
390
|
+
|
|
391
|
+
Store in `~/.claude-voice/.env` or export in your shell.
|
|
392
|
+
|
|
393
|
+
## License
|
|
394
|
+
|
|
395
|
+
MIT License
|
package/bin/claude-voice
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Claude Voice Extension - CLI wrapper
|
|
3
|
+
# Sets up library paths for sherpa-onnx native bindings
|
|
4
|
+
|
|
5
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
6
|
+
ROOT_DIR="$(dirname "$SCRIPT_DIR")"
|
|
7
|
+
|
|
8
|
+
# Detect platform and set library path
|
|
9
|
+
case "$(uname -s)-$(uname -m)" in
|
|
10
|
+
Darwin-arm64)
|
|
11
|
+
PLATFORM_PKG="sherpa-onnx-darwin-arm64"
|
|
12
|
+
export DYLD_LIBRARY_PATH="$ROOT_DIR/node_modules/$PLATFORM_PKG:$DYLD_LIBRARY_PATH"
|
|
13
|
+
;;
|
|
14
|
+
Darwin-x86_64)
|
|
15
|
+
PLATFORM_PKG="sherpa-onnx-darwin-x64"
|
|
16
|
+
export DYLD_LIBRARY_PATH="$ROOT_DIR/node_modules/$PLATFORM_PKG:$DYLD_LIBRARY_PATH"
|
|
17
|
+
;;
|
|
18
|
+
Linux-x86_64)
|
|
19
|
+
PLATFORM_PKG="sherpa-onnx-linux-x64"
|
|
20
|
+
export LD_LIBRARY_PATH="$ROOT_DIR/node_modules/$PLATFORM_PKG:$LD_LIBRARY_PATH"
|
|
21
|
+
;;
|
|
22
|
+
Linux-aarch64)
|
|
23
|
+
PLATFORM_PKG="sherpa-onnx-linux-arm64"
|
|
24
|
+
export LD_LIBRARY_PATH="$ROOT_DIR/node_modules/$PLATFORM_PKG:$LD_LIBRARY_PATH"
|
|
25
|
+
;;
|
|
26
|
+
esac
|
|
27
|
+
|
|
28
|
+
# Run the CLI
|
|
29
|
+
exec node "$ROOT_DIR/dist/cli.js" "$@"
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 1,
|
|
3
|
+
"tts": {
|
|
4
|
+
"provider": "macos-say",
|
|
5
|
+
"autoSpeak": false,
|
|
6
|
+
"maxSpeechLength": 500,
|
|
7
|
+
"skipCodeBlocks": true,
|
|
8
|
+
"macos": {
|
|
9
|
+
"voice": "Samantha",
|
|
10
|
+
"rate": 200
|
|
11
|
+
},
|
|
12
|
+
"openai": {
|
|
13
|
+
"model": "tts-1",
|
|
14
|
+
"voice": "nova",
|
|
15
|
+
"speed": 1.0
|
|
16
|
+
},
|
|
17
|
+
"elevenlabs": {
|
|
18
|
+
"voiceId": "",
|
|
19
|
+
"modelId": "eleven_monolingual_v1",
|
|
20
|
+
"stability": 0.5,
|
|
21
|
+
"similarityBoost": 0.75
|
|
22
|
+
},
|
|
23
|
+
"espeak": {
|
|
24
|
+
"voice": "en",
|
|
25
|
+
"speed": 175,
|
|
26
|
+
"pitch": 50
|
|
27
|
+
},
|
|
28
|
+
"piper": {
|
|
29
|
+
"voice": "en_US-lessac-medium",
|
|
30
|
+
"speaker": 0
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"stt": {
|
|
34
|
+
"provider": "openai",
|
|
35
|
+
"language": "en",
|
|
36
|
+
"sherpaOnnx": {
|
|
37
|
+
"model": "whisper-tiny"
|
|
38
|
+
},
|
|
39
|
+
"whisperLocal": {
|
|
40
|
+
"model": "base",
|
|
41
|
+
"device": "cpu"
|
|
42
|
+
},
|
|
43
|
+
"openai": {
|
|
44
|
+
"model": "whisper-1"
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"wakeWord": {
|
|
48
|
+
"enabled": true,
|
|
49
|
+
"keyword": "jarvis",
|
|
50
|
+
"sensitivity": 0.5,
|
|
51
|
+
"playSound": true
|
|
52
|
+
},
|
|
53
|
+
"notifications": {
|
|
54
|
+
"enabled": true,
|
|
55
|
+
"permissionPrompt": true,
|
|
56
|
+
"idlePrompt": true,
|
|
57
|
+
"errors": false,
|
|
58
|
+
"customMessages": {
|
|
59
|
+
"permissionPrompt": "Claude needs your permission.",
|
|
60
|
+
"idlePrompt": "Claude is waiting for your input."
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
"voiceOutput": {
|
|
64
|
+
"enabled": false,
|
|
65
|
+
"abstractMarker": "<!-- TTS -->",
|
|
66
|
+
"maxAbstractLength": 200,
|
|
67
|
+
"promptTemplate": null
|
|
68
|
+
},
|
|
69
|
+
"toolTTS": {
|
|
70
|
+
"enabled": true,
|
|
71
|
+
"mode": "summarize",
|
|
72
|
+
"tools": {
|
|
73
|
+
"Read": true,
|
|
74
|
+
"Grep": true,
|
|
75
|
+
"Glob": false,
|
|
76
|
+
"Bash": true,
|
|
77
|
+
"Write": true,
|
|
78
|
+
"Edit": true,
|
|
79
|
+
"MultiEdit": true,
|
|
80
|
+
"WebFetch": false,
|
|
81
|
+
"WebSearch": false,
|
|
82
|
+
"Task": false,
|
|
83
|
+
"default": false
|
|
84
|
+
},
|
|
85
|
+
"customMessages": {
|
|
86
|
+
"completion": "Done.",
|
|
87
|
+
"error": "Operation failed."
|
|
88
|
+
},
|
|
89
|
+
"announceErrors": true,
|
|
90
|
+
"maxSummaryLength": 100
|
|
91
|
+
},
|
|
92
|
+
"terminal": {
|
|
93
|
+
"injectionMethod": "auto",
|
|
94
|
+
"targetTerminal": "auto",
|
|
95
|
+
"pressEnterAfterInput": true
|
|
96
|
+
},
|
|
97
|
+
"recording": {
|
|
98
|
+
"sampleRate": 16000,
|
|
99
|
+
"channels": 1,
|
|
100
|
+
"silenceThreshold": 2000,
|
|
101
|
+
"silenceAmplitude": 500,
|
|
102
|
+
"maxDuration": 30000
|
|
103
|
+
},
|
|
104
|
+
"server": {
|
|
105
|
+
"port": 3456,
|
|
106
|
+
"host": "127.0.0.1"
|
|
107
|
+
},
|
|
108
|
+
"debug": false
|
|
109
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Voice Mode Instructions
|
|
2
|
+
|
|
3
|
+
[Voice Mode Active]
|
|
4
|
+
|
|
5
|
+
Your responses will be spoken aloud via TTS. Structure your responses as follows:
|
|
6
|
+
|
|
7
|
+
1. Start with a brief conversational summary (1-2 sentences) that sounds natural when spoken
|
|
8
|
+
2. Use a `{{MARKER}}` marker after the spoken portion
|
|
9
|
+
3. Then provide detailed technical content
|
|
10
|
+
|
|
11
|
+
## Example
|
|
12
|
+
|
|
13
|
+
"I found and fixed the authentication bug. The issue was a missing null check in the login handler."
|
|
14
|
+
|
|
15
|
+
{{MARKER}}
|
|
16
|
+
|
|
17
|
+
**Details:**
|
|
18
|
+
The bug was in `auth.ts:45` where...
|
|
19
|
+
|
|
20
|
+
## Guidelines for the Spoken Portion
|
|
21
|
+
|
|
22
|
+
- Use natural, conversational language
|
|
23
|
+
- Avoid technical jargon, file paths, and code references
|
|
24
|
+
- Keep it under {{MAX_LENGTH}} characters
|
|
25
|
+
- Speak in first person ("I did..." not "The system...")
|
|
26
|
+
- Be concise but informative
|
|
27
|
+
- Sound like you're talking to a colleague
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA;;;;GAIG"}
|