claude-voice 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +395 -0
- package/bin/claude-voice +29 -0
- package/config/default.json +109 -0
- package/config/voice-prompt.md +27 -0
- package/dist/cli.d.ts +8 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +1103 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +140 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +179 -0
- package/dist/config.js.map +1 -0
- package/dist/env.d.ts +40 -0
- package/dist/env.d.ts.map +1 -0
- package/dist/env.js +175 -0
- package/dist/env.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +140 -0
- package/dist/index.js.map +1 -0
- package/dist/platform/index.d.ts +35 -0
- package/dist/platform/index.d.ts.map +1 -0
- package/dist/platform/index.js +170 -0
- package/dist/platform/index.js.map +1 -0
- package/dist/server.d.ts +5 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +185 -0
- package/dist/server.js.map +1 -0
- package/dist/stt/index.d.ts +15 -0
- package/dist/stt/index.d.ts.map +1 -0
- package/dist/stt/index.js +54 -0
- package/dist/stt/index.js.map +1 -0
- package/dist/stt/providers/openai.d.ts +15 -0
- package/dist/stt/providers/openai.d.ts.map +1 -0
- package/dist/stt/providers/openai.js +74 -0
- package/dist/stt/providers/openai.js.map +1 -0
- package/dist/stt/providers/sherpa-onnx.d.ts +50 -0
- package/dist/stt/providers/sherpa-onnx.d.ts.map +1 -0
- package/dist/stt/providers/sherpa-onnx.js +237 -0
- package/dist/stt/providers/sherpa-onnx.js.map +1 -0
- package/dist/stt/providers/whisper-local.d.ts +19 -0
- package/dist/stt/providers/whisper-local.d.ts.map +1 -0
- package/dist/stt/providers/whisper-local.js +141 -0
- package/dist/stt/providers/whisper-local.js.map +1 -0
- package/dist/terminal/input-injector.d.ts +55 -0
- package/dist/terminal/input-injector.d.ts.map +1 -0
- package/dist/terminal/input-injector.js +189 -0
- package/dist/terminal/input-injector.js.map +1 -0
- package/dist/tts/index.d.ts +20 -0
- package/dist/tts/index.d.ts.map +1 -0
- package/dist/tts/index.js +72 -0
- package/dist/tts/index.js.map +1 -0
- package/dist/tts/providers/elevenlabs.d.ts +23 -0
- package/dist/tts/providers/elevenlabs.d.ts.map +1 -0
- package/dist/tts/providers/elevenlabs.js +142 -0
- package/dist/tts/providers/elevenlabs.js.map +1 -0
- package/dist/tts/providers/macos-say.d.ts +17 -0
- package/dist/tts/providers/macos-say.d.ts.map +1 -0
- package/dist/tts/providers/macos-say.js +72 -0
- package/dist/tts/providers/macos-say.js.map +1 -0
- package/dist/tts/providers/openai.d.ts +19 -0
- package/dist/tts/providers/openai.d.ts.map +1 -0
- package/dist/tts/providers/openai.js +118 -0
- package/dist/tts/providers/openai.js.map +1 -0
- package/dist/tts/providers/piper.d.ts +48 -0
- package/dist/tts/providers/piper.d.ts.map +1 -0
- package/dist/tts/providers/piper.js +417 -0
- package/dist/tts/providers/piper.js.map +1 -0
- package/dist/voice-input.d.ts +9 -0
- package/dist/voice-input.d.ts.map +1 -0
- package/dist/voice-input.js +137 -0
- package/dist/voice-input.js.map +1 -0
- package/dist/wake-word/index.d.ts +19 -0
- package/dist/wake-word/index.d.ts.map +1 -0
- package/dist/wake-word/index.js +200 -0
- package/dist/wake-word/index.js.map +1 -0
- package/dist/wake-word/recorder.d.ts +19 -0
- package/dist/wake-word/recorder.d.ts.map +1 -0
- package/dist/wake-word/recorder.js +145 -0
- package/dist/wake-word/recorder.js.map +1 -0
- package/hooks/notification.js +125 -0
- package/hooks/post-tool-use.js +374 -0
- package/hooks/session-start.js +212 -0
- package/hooks/stop.js +254 -0
- package/models/.gitkeep +0 -0
- package/package.json +80 -0
- package/python/stt_service.py +59 -0
- package/python/voice_input.py +154 -0
- package/scripts/install.sh +147 -0
- package/scripts/listen.py +161 -0
- package/scripts/postinstall.js +57 -0
- package/scripts/record.sh +79 -0
- package/scripts/setup-hooks.sh +22 -0
- package/scripts/voice-input.sh +66 -0
package/hooks/stop.js
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Claude Code Hook: Stop
|
|
4
|
+
*
|
|
5
|
+
* This hook runs when Claude finishes responding.
|
|
6
|
+
* It extracts Claude's response and sends it to the TTS service.
|
|
7
|
+
* Respects user configuration for auto-speak.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
const http = require('http');
|
|
11
|
+
const fs = require('fs');
|
|
12
|
+
const path = require('path');
|
|
13
|
+
const os = require('os');
|
|
14
|
+
const readline = require('readline');
|
|
15
|
+
|
|
16
|
+
const API_URL = 'http://127.0.0.1:3456';
|
|
17
|
+
const CONFIG_FILE = path.join(os.homedir(), '.claude-voice', 'config.json');
|
|
18
|
+
|
|
19
|
+
// Load configuration
|
|
20
|
+
function loadConfig() {
|
|
21
|
+
try {
|
|
22
|
+
if (fs.existsSync(CONFIG_FILE)) {
|
|
23
|
+
return JSON.parse(fs.readFileSync(CONFIG_FILE, 'utf-8'));
|
|
24
|
+
}
|
|
25
|
+
} catch {
|
|
26
|
+
// Use defaults
|
|
27
|
+
}
|
|
28
|
+
return {
|
|
29
|
+
tts: {
|
|
30
|
+
autoSpeak: true,
|
|
31
|
+
maxSpeechLength: 500,
|
|
32
|
+
skipCodeBlocks: true
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
async function sendToTTS(text, priority = false) {
|
|
38
|
+
return new Promise((resolve, reject) => {
|
|
39
|
+
const data = JSON.stringify({ text, priority });
|
|
40
|
+
|
|
41
|
+
const req = http.request(`${API_URL}/tts`, {
|
|
42
|
+
method: 'POST',
|
|
43
|
+
headers: {
|
|
44
|
+
'Content-Type': 'application/json',
|
|
45
|
+
'Content-Length': Buffer.byteLength(data, 'utf8')
|
|
46
|
+
},
|
|
47
|
+
timeout: 5000
|
|
48
|
+
}, (res) => {
|
|
49
|
+
let body = '';
|
|
50
|
+
res.on('data', chunk => body += chunk);
|
|
51
|
+
res.on('end', () => {
|
|
52
|
+
try {
|
|
53
|
+
const response = JSON.parse(body);
|
|
54
|
+
resolve(response);
|
|
55
|
+
} catch {
|
|
56
|
+
resolve({ success: false });
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
req.on('error', () => resolve({ success: false }));
|
|
62
|
+
req.on('timeout', () => {
|
|
63
|
+
req.destroy();
|
|
64
|
+
resolve({ success: false });
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
req.write(data);
|
|
68
|
+
req.end();
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async function extractLastResponse(transcriptPath) {
|
|
73
|
+
if (!fs.existsSync(transcriptPath)) {
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const fileStream = fs.createReadStream(transcriptPath);
|
|
78
|
+
const rl = readline.createInterface({
|
|
79
|
+
input: fileStream,
|
|
80
|
+
crlfDelay: Infinity
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
let lastAssistantMessage = null;
|
|
84
|
+
|
|
85
|
+
for await (const line of rl) {
|
|
86
|
+
try {
|
|
87
|
+
const entry = JSON.parse(line);
|
|
88
|
+
|
|
89
|
+
// Look for assistant messages
|
|
90
|
+
if (entry.type === 'assistant' && entry.message) {
|
|
91
|
+
// Extract text content from the message
|
|
92
|
+
if (entry.message.content) {
|
|
93
|
+
const textContent = entry.message.content
|
|
94
|
+
.filter(c => c.type === 'text')
|
|
95
|
+
.map(c => c.text)
|
|
96
|
+
.join('\n');
|
|
97
|
+
|
|
98
|
+
if (textContent) {
|
|
99
|
+
lastAssistantMessage = textContent;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
} catch {
|
|
104
|
+
// Skip malformed lines
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
return lastAssistantMessage;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Extract abstract portion if marker is present
|
|
112
|
+
function extractAbstract(text, config) {
|
|
113
|
+
const marker = config.voiceOutput?.abstractMarker || '<!-- TTS -->';
|
|
114
|
+
|
|
115
|
+
if (text.includes(marker)) {
|
|
116
|
+
// Extract text before the marker (the spoken abstract)
|
|
117
|
+
const abstract = text.split(marker)[0].trim();
|
|
118
|
+
return abstract || null;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Clean text for TTS (remove markdown, code, etc.)
|
|
125
|
+
function cleanForTTS(text) {
|
|
126
|
+
let cleaned = text;
|
|
127
|
+
|
|
128
|
+
// Remove code blocks
|
|
129
|
+
cleaned = cleaned.replace(/```[\s\S]*?```/g, '');
|
|
130
|
+
|
|
131
|
+
// Remove inline code
|
|
132
|
+
cleaned = cleaned.replace(/`[^`]+`/g, '');
|
|
133
|
+
|
|
134
|
+
// Remove markdown formatting
|
|
135
|
+
cleaned = cleaned.replace(/[*_~]/g, '');
|
|
136
|
+
|
|
137
|
+
// Remove URLs
|
|
138
|
+
cleaned = cleaned.replace(/https?:\/\/\S+/g, '');
|
|
139
|
+
|
|
140
|
+
// Remove file paths
|
|
141
|
+
cleaned = cleaned.replace(/\/[a-zA-Z0-9_\-./]+/g, '');
|
|
142
|
+
|
|
143
|
+
// Collapse multiple newlines
|
|
144
|
+
cleaned = cleaned.replace(/\n{2,}/g, '. ');
|
|
145
|
+
cleaned = cleaned.replace(/\n/g, ' ');
|
|
146
|
+
|
|
147
|
+
// Trim whitespace
|
|
148
|
+
cleaned = cleaned.trim();
|
|
149
|
+
|
|
150
|
+
return cleaned;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function summarizeForSpeech(text, config) {
|
|
154
|
+
if (!text) return null;
|
|
155
|
+
|
|
156
|
+
// First, try to extract abstract if voiceOutput is enabled
|
|
157
|
+
if (config.voiceOutput?.enabled !== false) {
|
|
158
|
+
const abstract = extractAbstract(text, config);
|
|
159
|
+
if (abstract) {
|
|
160
|
+
// Clean the abstract for TTS
|
|
161
|
+
return cleanForTTS(abstract) || null;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Fallback: use existing logic for responses without abstract marker
|
|
166
|
+
let cleaned = text;
|
|
167
|
+
const maxLength = config.voiceOutput?.maxAbstractLength || config.tts?.maxSpeechLength || 500;
|
|
168
|
+
const skipCodeBlocks = config.tts?.skipCodeBlocks !== false;
|
|
169
|
+
|
|
170
|
+
// Remove code blocks if configured
|
|
171
|
+
if (skipCodeBlocks) {
|
|
172
|
+
cleaned = cleaned.replace(/```[\s\S]*?```/g, '');
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Remove inline code
|
|
176
|
+
cleaned = cleaned.replace(/`[^`]+`/g, '');
|
|
177
|
+
|
|
178
|
+
// Remove markdown formatting
|
|
179
|
+
cleaned = cleaned.replace(/[*_~]/g, '');
|
|
180
|
+
|
|
181
|
+
// Remove URLs
|
|
182
|
+
cleaned = cleaned.replace(/https?:\/\/\S+/g, '');
|
|
183
|
+
|
|
184
|
+
// Remove file paths
|
|
185
|
+
cleaned = cleaned.replace(/\/[a-zA-Z0-9_\-./]+/g, '');
|
|
186
|
+
|
|
187
|
+
// Collapse multiple newlines
|
|
188
|
+
cleaned = cleaned.replace(/\n{2,}/g, '. ');
|
|
189
|
+
cleaned = cleaned.replace(/\n/g, ' ');
|
|
190
|
+
|
|
191
|
+
// Trim whitespace
|
|
192
|
+
cleaned = cleaned.trim();
|
|
193
|
+
|
|
194
|
+
// Truncate to max length
|
|
195
|
+
if (cleaned.length > maxLength) {
|
|
196
|
+
const truncated = cleaned.substring(0, maxLength);
|
|
197
|
+
const lastSentence = truncated.lastIndexOf('.');
|
|
198
|
+
if (lastSentence > maxLength * 0.7) {
|
|
199
|
+
cleaned = truncated.substring(0, lastSentence + 1);
|
|
200
|
+
} else {
|
|
201
|
+
cleaned = truncated + '...';
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
return cleaned || null;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
async function main() {
|
|
209
|
+
const config = loadConfig();
|
|
210
|
+
|
|
211
|
+
// Check if auto-speak is enabled
|
|
212
|
+
if (!config.tts || config.tts.autoSpeak === false) {
|
|
213
|
+
console.log(JSON.stringify({}));
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Read hook input from stdin
|
|
218
|
+
let input = '';
|
|
219
|
+
for await (const chunk of process.stdin) {
|
|
220
|
+
input += chunk;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const hookData = JSON.parse(input);
|
|
224
|
+
const { transcript_path } = hookData;
|
|
225
|
+
|
|
226
|
+
if (!transcript_path) {
|
|
227
|
+
console.log(JSON.stringify({}));
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
try {
|
|
232
|
+
// Extract Claude's last response
|
|
233
|
+
const lastResponse = await extractLastResponse(transcript_path);
|
|
234
|
+
|
|
235
|
+
if (lastResponse) {
|
|
236
|
+
// Summarize and clean for speech
|
|
237
|
+
const speechText = summarizeForSpeech(lastResponse, config);
|
|
238
|
+
|
|
239
|
+
if (speechText) {
|
|
240
|
+
await sendToTTS(speechText);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
} catch {
|
|
244
|
+
// Silently fail - we don't want to interrupt Claude Code
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Output empty response
|
|
248
|
+
console.log(JSON.stringify({}));
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
main().catch(() => {
|
|
252
|
+
console.log(JSON.stringify({}));
|
|
253
|
+
process.exit(0); // Don't fail the hook
|
|
254
|
+
});
|
package/models/.gitkeep
ADDED
|
File without changes
|
package/package.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "claude-voice",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Voice interface extension for Claude Code - TTS, STT, and wake word detection",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"bin": {
|
|
8
|
+
"claude-voice": "./bin/claude-voice"
|
|
9
|
+
},
|
|
10
|
+
"scripts": {
|
|
11
|
+
"build": "tsc",
|
|
12
|
+
"prepublishOnly": "npm run build",
|
|
13
|
+
"postinstall": "node scripts/postinstall.js",
|
|
14
|
+
"start": "node dist/index.js",
|
|
15
|
+
"dev": "ts-node src/index.ts",
|
|
16
|
+
"lint": "eslint src/**/*.ts",
|
|
17
|
+
"test": "jest"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [
|
|
20
|
+
"claude",
|
|
21
|
+
"claude-code",
|
|
22
|
+
"voice",
|
|
23
|
+
"tts",
|
|
24
|
+
"stt",
|
|
25
|
+
"speech",
|
|
26
|
+
"ai",
|
|
27
|
+
"extension",
|
|
28
|
+
"jarvis",
|
|
29
|
+
"wake-word"
|
|
30
|
+
],
|
|
31
|
+
"author": "Enes Sahin <menesahin99@gmail.com>",
|
|
32
|
+
"license": "MIT",
|
|
33
|
+
"repository": {
|
|
34
|
+
"type": "git",
|
|
35
|
+
"url": "git+https://github.com/Menesahin/claude-voice-extension.git"
|
|
36
|
+
},
|
|
37
|
+
"homepage": "https://github.com/Menesahin/claude-voice-extension#readme",
|
|
38
|
+
"bugs": {
|
|
39
|
+
"url": "https://github.com/Menesahin/claude-voice-extension/issues"
|
|
40
|
+
},
|
|
41
|
+
"dependencies": {
|
|
42
|
+
"axios": "^1.6.0",
|
|
43
|
+
"chalk": "^4.1.2",
|
|
44
|
+
"commander": "^12.0.0",
|
|
45
|
+
"express": "^4.18.2",
|
|
46
|
+
"inquirer": "^8.2.7",
|
|
47
|
+
"openai": "^4.20.0",
|
|
48
|
+
"ora": "^5.4.1",
|
|
49
|
+
"sherpa-onnx-darwin-arm64": "^1.12.20",
|
|
50
|
+
"sherpa-onnx-node": "^1.12.20"
|
|
51
|
+
},
|
|
52
|
+
"optionalDependencies": {
|
|
53
|
+
"@picovoice/porcupine-node": "^4.0.1",
|
|
54
|
+
"@picovoice/pvrecorder-node": "^1.2.8"
|
|
55
|
+
},
|
|
56
|
+
"devDependencies": {
|
|
57
|
+
"@types/express": "^4.17.21",
|
|
58
|
+
"@types/inquirer": "^9.0.9",
|
|
59
|
+
"@types/node": "^20.10.0",
|
|
60
|
+
"typescript": "^5.3.0"
|
|
61
|
+
},
|
|
62
|
+
"engines": {
|
|
63
|
+
"node": ">=18.0.0"
|
|
64
|
+
},
|
|
65
|
+
"os": [
|
|
66
|
+
"darwin",
|
|
67
|
+
"linux"
|
|
68
|
+
],
|
|
69
|
+
"files": [
|
|
70
|
+
"bin/**/*",
|
|
71
|
+
"dist/**/*",
|
|
72
|
+
"hooks/**/*",
|
|
73
|
+
"config/**/*",
|
|
74
|
+
"scripts/**/*",
|
|
75
|
+
"models/.gitkeep",
|
|
76
|
+
"python/**/*",
|
|
77
|
+
"README.md",
|
|
78
|
+
"LICENSE"
|
|
79
|
+
]
|
|
80
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Speech-to-Text service using OpenAI Whisper.
|
|
4
|
+
Can be run as a standalone script or as a Flask server.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import argparse
|
|
8
|
+
import json
|
|
9
|
+
import sys
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
def transcribe_audio(audio_path: str, model: str = "base", language: str = "en") -> dict:
|
|
13
|
+
"""Transcribe audio file using Whisper."""
|
|
14
|
+
try:
|
|
15
|
+
import whisper
|
|
16
|
+
except ImportError:
|
|
17
|
+
return {"error": "openai-whisper not installed. Run: pip install openai-whisper"}
|
|
18
|
+
|
|
19
|
+
if not os.path.exists(audio_path):
|
|
20
|
+
return {"error": f"Audio file not found: {audio_path}"}
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
# Load model (cached after first load)
|
|
24
|
+
model_instance = whisper.load_model(model)
|
|
25
|
+
|
|
26
|
+
# Transcribe
|
|
27
|
+
result = model_instance.transcribe(
|
|
28
|
+
audio_path,
|
|
29
|
+
language=language if language != "auto" else None,
|
|
30
|
+
fp16=False # Use FP32 for better compatibility
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
return {
|
|
34
|
+
"transcript": result["text"].strip(),
|
|
35
|
+
"language": result.get("language", language),
|
|
36
|
+
"segments": result.get("segments", [])
|
|
37
|
+
}
|
|
38
|
+
except Exception as e:
|
|
39
|
+
return {"error": str(e)}
|
|
40
|
+
|
|
41
|
+
def main():
|
|
42
|
+
parser = argparse.ArgumentParser(description="Whisper STT Service")
|
|
43
|
+
parser.add_argument("--audio", "-a", required=True, help="Path to audio file")
|
|
44
|
+
parser.add_argument("--model", "-m", default="base",
|
|
45
|
+
choices=["tiny", "base", "small", "medium", "large"],
|
|
46
|
+
help="Whisper model size")
|
|
47
|
+
parser.add_argument("--language", "-l", default="en",
|
|
48
|
+
help="Language code (e.g., 'en', 'es') or 'auto' for detection")
|
|
49
|
+
|
|
50
|
+
args = parser.parse_args()
|
|
51
|
+
|
|
52
|
+
result = transcribe_audio(args.audio, args.model, args.language)
|
|
53
|
+
print(json.dumps(result))
|
|
54
|
+
|
|
55
|
+
if "error" in result:
|
|
56
|
+
sys.exit(1)
|
|
57
|
+
|
|
58
|
+
if __name__ == "__main__":
|
|
59
|
+
main()
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Voice Input - Record and transcribe speech
|
|
4
|
+
Press ENTER to start recording, ENTER again to stop.
|
|
5
|
+
Uses OpenAI Whisper API for transcription.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
import tempfile
|
|
11
|
+
import wave
|
|
12
|
+
import struct
|
|
13
|
+
|
|
14
|
+
def record_audio(filename, sample_rate=16000):
|
|
15
|
+
"""Record audio using PyAudio"""
|
|
16
|
+
try:
|
|
17
|
+
import pyaudio
|
|
18
|
+
except ImportError:
|
|
19
|
+
print("Installing pyaudio...")
|
|
20
|
+
os.system("pip3 install pyaudio --quiet")
|
|
21
|
+
import pyaudio
|
|
22
|
+
|
|
23
|
+
CHUNK = 1024
|
|
24
|
+
FORMAT = pyaudio.paInt16
|
|
25
|
+
CHANNELS = 1
|
|
26
|
+
|
|
27
|
+
p = pyaudio.PyAudio()
|
|
28
|
+
|
|
29
|
+
stream = p.open(format=FORMAT,
|
|
30
|
+
channels=CHANNELS,
|
|
31
|
+
rate=sample_rate,
|
|
32
|
+
input=True,
|
|
33
|
+
frames_per_buffer=CHUNK)
|
|
34
|
+
|
|
35
|
+
print("\n🎤 Recording... (press ENTER to stop)")
|
|
36
|
+
|
|
37
|
+
frames = []
|
|
38
|
+
|
|
39
|
+
# Record until Enter is pressed
|
|
40
|
+
import select
|
|
41
|
+
while True:
|
|
42
|
+
data = stream.read(CHUNK, exception_on_overflow=False)
|
|
43
|
+
frames.append(data)
|
|
44
|
+
|
|
45
|
+
# Check for Enter key (non-blocking)
|
|
46
|
+
if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
|
|
47
|
+
sys.stdin.readline()
|
|
48
|
+
break
|
|
49
|
+
|
|
50
|
+
print("⏹️ Recording stopped.")
|
|
51
|
+
|
|
52
|
+
stream.stop_stream()
|
|
53
|
+
stream.close()
|
|
54
|
+
p.terminate()
|
|
55
|
+
|
|
56
|
+
# Save as WAV
|
|
57
|
+
wf = wave.open(filename, 'wb')
|
|
58
|
+
wf.setnchannels(CHANNELS)
|
|
59
|
+
wf.setsampwidth(p.get_sample_size(FORMAT))
|
|
60
|
+
wf.setframerate(sample_rate)
|
|
61
|
+
wf.writeframes(b''.join(frames))
|
|
62
|
+
wf.close()
|
|
63
|
+
|
|
64
|
+
return filename
|
|
65
|
+
|
|
66
|
+
def transcribe_openai(audio_path):
|
|
67
|
+
"""Transcribe using OpenAI Whisper API"""
|
|
68
|
+
try:
|
|
69
|
+
from openai import OpenAI
|
|
70
|
+
except ImportError:
|
|
71
|
+
print("Installing openai...")
|
|
72
|
+
os.system("pip3 install openai --quiet")
|
|
73
|
+
from openai import OpenAI
|
|
74
|
+
|
|
75
|
+
api_key = os.environ.get('OPENAI_API_KEY')
|
|
76
|
+
if not api_key:
|
|
77
|
+
print("❌ OPENAI_API_KEY not set")
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
client = OpenAI(api_key=api_key)
|
|
81
|
+
|
|
82
|
+
with open(audio_path, 'rb') as audio_file:
|
|
83
|
+
transcript = client.audio.transcriptions.create(
|
|
84
|
+
model="whisper-1",
|
|
85
|
+
file=audio_file
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return transcript.text
|
|
89
|
+
|
|
90
|
+
def transcribe_local(audio_path):
|
|
91
|
+
"""Transcribe using local Whisper"""
|
|
92
|
+
try:
|
|
93
|
+
import whisper
|
|
94
|
+
except ImportError:
|
|
95
|
+
print("Local whisper not available, using OpenAI API")
|
|
96
|
+
return transcribe_openai(audio_path)
|
|
97
|
+
|
|
98
|
+
model = whisper.load_model("base")
|
|
99
|
+
result = model.transcribe(audio_path)
|
|
100
|
+
return result["text"]
|
|
101
|
+
|
|
102
|
+
def main():
|
|
103
|
+
print("╔════════════════════════════════════════╗")
|
|
104
|
+
print("║ Claude Voice Input ║")
|
|
105
|
+
print("╠════════════════════════════════════════╣")
|
|
106
|
+
print("║ Press ENTER to start recording ║")
|
|
107
|
+
print("║ Press ENTER again to stop & transcribe║")
|
|
108
|
+
print("║ Type 'q' to quit ║")
|
|
109
|
+
print("╚════════════════════════════════════════╝")
|
|
110
|
+
|
|
111
|
+
use_openai = os.environ.get('OPENAI_API_KEY') is not None
|
|
112
|
+
print(f"\nUsing: {'OpenAI Whisper API' if use_openai else 'Local Whisper'}")
|
|
113
|
+
|
|
114
|
+
while True:
|
|
115
|
+
print("\n" + "="*40)
|
|
116
|
+
user_input = input("Press ENTER to record (or 'q' to quit): ").strip().lower()
|
|
117
|
+
|
|
118
|
+
if user_input == 'q':
|
|
119
|
+
print("Goodbye!")
|
|
120
|
+
break
|
|
121
|
+
|
|
122
|
+
# Record
|
|
123
|
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
|
124
|
+
temp_path = f.name
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
record_audio(temp_path)
|
|
128
|
+
|
|
129
|
+
print("⏳ Transcribing...")
|
|
130
|
+
|
|
131
|
+
if use_openai:
|
|
132
|
+
text = transcribe_openai(temp_path)
|
|
133
|
+
else:
|
|
134
|
+
text = transcribe_local(temp_path)
|
|
135
|
+
|
|
136
|
+
if text:
|
|
137
|
+
print(f"\n📝 Transcript: \"{text}\"")
|
|
138
|
+
|
|
139
|
+
# Copy to clipboard on macOS
|
|
140
|
+
try:
|
|
141
|
+
import subprocess
|
|
142
|
+
subprocess.run(['pbcopy'], input=text.encode(), check=True)
|
|
143
|
+
print("✅ Copied to clipboard! Paste with Cmd+V")
|
|
144
|
+
except:
|
|
145
|
+
pass
|
|
146
|
+
else:
|
|
147
|
+
print("❌ No speech detected")
|
|
148
|
+
|
|
149
|
+
finally:
|
|
150
|
+
if os.path.exists(temp_path):
|
|
151
|
+
os.unlink(temp_path)
|
|
152
|
+
|
|
153
|
+
if __name__ == "__main__":
|
|
154
|
+
main()
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
#
|
|
3
|
+
# Claude Voice Extension - Installation Script
|
|
4
|
+
#
|
|
5
|
+
# This script installs all dependencies and sets up the voice extension.
|
|
6
|
+
|
|
7
|
+
set -e
|
|
8
|
+
|
|
9
|
+
echo "╔══════════════════════════════════════════════════════════════╗"
|
|
10
|
+
echo "║ Claude Voice Extension - Installation ║"
|
|
11
|
+
echo "╚══════════════════════════════════════════════════════════════╝"
|
|
12
|
+
echo ""
|
|
13
|
+
|
|
14
|
+
# Colors
|
|
15
|
+
RED='\033[0;31m'
|
|
16
|
+
GREEN='\033[0;32m'
|
|
17
|
+
YELLOW='\033[1;33m'
|
|
18
|
+
NC='\033[0m' # No Color
|
|
19
|
+
|
|
20
|
+
# Get the script directory
|
|
21
|
+
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|
22
|
+
PROJECT_DIR="$( cd "$SCRIPT_DIR/.." && pwd )"
|
|
23
|
+
|
|
24
|
+
echo "Project directory: $PROJECT_DIR"
|
|
25
|
+
echo ""
|
|
26
|
+
|
|
27
|
+
# Check prerequisites
|
|
28
|
+
check_command() {
|
|
29
|
+
if command -v "$1" &> /dev/null; then
|
|
30
|
+
echo -e "${GREEN}✓${NC} $1 found"
|
|
31
|
+
return 0
|
|
32
|
+
else
|
|
33
|
+
echo -e "${RED}✗${NC} $1 not found"
|
|
34
|
+
return 1
|
|
35
|
+
fi
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
echo "Checking prerequisites..."
|
|
39
|
+
echo ""
|
|
40
|
+
|
|
41
|
+
# Check Node.js
|
|
42
|
+
if ! check_command node; then
|
|
43
|
+
echo -e "${RED}Node.js is required. Please install Node.js 18+ and try again.${NC}"
|
|
44
|
+
echo " brew install node"
|
|
45
|
+
exit 1
|
|
46
|
+
fi
|
|
47
|
+
|
|
48
|
+
NODE_VERSION=$(node -v | cut -d'v' -f2 | cut -d'.' -f1)
|
|
49
|
+
if [ "$NODE_VERSION" -lt 18 ]; then
|
|
50
|
+
echo -e "${RED}Node.js 18+ is required. Current version: $(node -v)${NC}"
|
|
51
|
+
exit 1
|
|
52
|
+
fi
|
|
53
|
+
|
|
54
|
+
# Check npm
|
|
55
|
+
check_command npm || exit 1
|
|
56
|
+
|
|
57
|
+
# Check Python3
|
|
58
|
+
if ! check_command python3; then
|
|
59
|
+
echo -e "${YELLOW}Python3 not found. Local Whisper STT will not work.${NC}"
|
|
60
|
+
echo " brew install python@3.11"
|
|
61
|
+
fi
|
|
62
|
+
|
|
63
|
+
echo ""
|
|
64
|
+
echo "Installing Node.js dependencies..."
|
|
65
|
+
cd "$PROJECT_DIR"
|
|
66
|
+
npm install
|
|
67
|
+
|
|
68
|
+
echo ""
|
|
69
|
+
echo "Building TypeScript..."
|
|
70
|
+
npm run build
|
|
71
|
+
|
|
72
|
+
echo ""
|
|
73
|
+
echo "Installing Python dependencies (for local Whisper)..."
|
|
74
|
+
if command -v python3 &> /dev/null; then
|
|
75
|
+
if command -v pip3 &> /dev/null; then
|
|
76
|
+
pip3 install -r requirements.txt --user || {
|
|
77
|
+
echo -e "${YELLOW}Warning: Failed to install Python dependencies.${NC}"
|
|
78
|
+
echo "Local Whisper STT may not work. You can install manually:"
|
|
79
|
+
echo " pip3 install openai-whisper sounddevice numpy"
|
|
80
|
+
}
|
|
81
|
+
else
|
|
82
|
+
echo -e "${YELLOW}pip3 not found. Skipping Python dependencies.${NC}"
|
|
83
|
+
fi
|
|
84
|
+
else
|
|
85
|
+
echo -e "${YELLOW}Skipping Python dependencies (Python3 not found).${NC}"
|
|
86
|
+
fi
|
|
87
|
+
|
|
88
|
+
echo ""
|
|
89
|
+
echo "Setting up configuration..."
|
|
90
|
+
CONFIG_DIR="$HOME/.claude-voice"
|
|
91
|
+
mkdir -p "$CONFIG_DIR"
|
|
92
|
+
|
|
93
|
+
if [ ! -f "$CONFIG_DIR/config.json" ]; then
|
|
94
|
+
cp "$PROJECT_DIR/config/default.json" "$CONFIG_DIR/config.json"
|
|
95
|
+
echo "Created default configuration at $CONFIG_DIR/config.json"
|
|
96
|
+
else
|
|
97
|
+
echo "Configuration already exists at $CONFIG_DIR/config.json"
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
echo ""
|
|
101
|
+
echo "Making CLI executable..."
|
|
102
|
+
chmod +x "$PROJECT_DIR/dist/cli.js"
|
|
103
|
+
chmod +x "$PROJECT_DIR/dist/index.js"
|
|
104
|
+
|
|
105
|
+
# Create symlink for global access
|
|
106
|
+
echo ""
|
|
107
|
+
echo "Creating global command link..."
|
|
108
|
+
LINK_PATH="/usr/local/bin/claude-voice"
|
|
109
|
+
if [ -L "$LINK_PATH" ] || [ -f "$LINK_PATH" ]; then
|
|
110
|
+
echo "Removing existing link..."
|
|
111
|
+
sudo rm -f "$LINK_PATH"
|
|
112
|
+
fi
|
|
113
|
+
sudo ln -s "$PROJECT_DIR/dist/cli.js" "$LINK_PATH" || {
|
|
114
|
+
echo -e "${YELLOW}Could not create global link. You can run the CLI with:${NC}"
|
|
115
|
+
echo " node $PROJECT_DIR/dist/cli.js"
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
echo ""
|
|
119
|
+
echo "Installing Claude Code hooks..."
|
|
120
|
+
node "$PROJECT_DIR/dist/cli.js" install-hooks
|
|
121
|
+
|
|
122
|
+
echo ""
|
|
123
|
+
echo "╔══════════════════════════════════════════════════════════════╗"
|
|
124
|
+
echo "║ Installation Complete! ║"
|
|
125
|
+
echo "╚══════════════════════════════════════════════════════════════╝"
|
|
126
|
+
echo ""
|
|
127
|
+
echo "Next steps:"
|
|
128
|
+
echo ""
|
|
129
|
+
echo "1. Set up API keys (optional, for cloud providers):"
|
|
130
|
+
echo " export OPENAI_API_KEY='your-key' # For OpenAI TTS/STT"
|
|
131
|
+
echo " export ELEVENLABS_API_KEY='your-key' # For ElevenLabs TTS"
|
|
132
|
+
echo " export PICOVOICE_ACCESS_KEY='your-key' # For wake word (free at picovoice.ai)"
|
|
133
|
+
echo ""
|
|
134
|
+
echo "2. Start the voice extension:"
|
|
135
|
+
echo " claude-voice start"
|
|
136
|
+
echo ""
|
|
137
|
+
echo "3. Test TTS:"
|
|
138
|
+
echo " claude-voice test-tts 'Hello, world!'"
|
|
139
|
+
echo ""
|
|
140
|
+
echo "4. Check status:"
|
|
141
|
+
echo " claude-voice status"
|
|
142
|
+
echo ""
|
|
143
|
+
echo "5. View/modify configuration:"
|
|
144
|
+
echo " claude-voice config"
|
|
145
|
+
echo ""
|
|
146
|
+
echo "For wake word detection, get a free API key at https://picovoice.ai"
|
|
147
|
+
echo ""
|