claude-voice 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +395 -0
  3. package/bin/claude-voice +29 -0
  4. package/config/default.json +109 -0
  5. package/config/voice-prompt.md +27 -0
  6. package/dist/cli.d.ts +8 -0
  7. package/dist/cli.d.ts.map +1 -0
  8. package/dist/cli.js +1103 -0
  9. package/dist/cli.js.map +1 -0
  10. package/dist/config.d.ts +140 -0
  11. package/dist/config.d.ts.map +1 -0
  12. package/dist/config.js +179 -0
  13. package/dist/config.js.map +1 -0
  14. package/dist/env.d.ts +40 -0
  15. package/dist/env.d.ts.map +1 -0
  16. package/dist/env.js +175 -0
  17. package/dist/env.js.map +1 -0
  18. package/dist/index.d.ts +10 -0
  19. package/dist/index.d.ts.map +1 -0
  20. package/dist/index.js +140 -0
  21. package/dist/index.js.map +1 -0
  22. package/dist/platform/index.d.ts +35 -0
  23. package/dist/platform/index.d.ts.map +1 -0
  24. package/dist/platform/index.js +170 -0
  25. package/dist/platform/index.js.map +1 -0
  26. package/dist/server.d.ts +5 -0
  27. package/dist/server.d.ts.map +1 -0
  28. package/dist/server.js +185 -0
  29. package/dist/server.js.map +1 -0
  30. package/dist/stt/index.d.ts +15 -0
  31. package/dist/stt/index.d.ts.map +1 -0
  32. package/dist/stt/index.js +54 -0
  33. package/dist/stt/index.js.map +1 -0
  34. package/dist/stt/providers/openai.d.ts +15 -0
  35. package/dist/stt/providers/openai.d.ts.map +1 -0
  36. package/dist/stt/providers/openai.js +74 -0
  37. package/dist/stt/providers/openai.js.map +1 -0
  38. package/dist/stt/providers/sherpa-onnx.d.ts +50 -0
  39. package/dist/stt/providers/sherpa-onnx.d.ts.map +1 -0
  40. package/dist/stt/providers/sherpa-onnx.js +237 -0
  41. package/dist/stt/providers/sherpa-onnx.js.map +1 -0
  42. package/dist/stt/providers/whisper-local.d.ts +19 -0
  43. package/dist/stt/providers/whisper-local.d.ts.map +1 -0
  44. package/dist/stt/providers/whisper-local.js +141 -0
  45. package/dist/stt/providers/whisper-local.js.map +1 -0
  46. package/dist/terminal/input-injector.d.ts +55 -0
  47. package/dist/terminal/input-injector.d.ts.map +1 -0
  48. package/dist/terminal/input-injector.js +189 -0
  49. package/dist/terminal/input-injector.js.map +1 -0
  50. package/dist/tts/index.d.ts +20 -0
  51. package/dist/tts/index.d.ts.map +1 -0
  52. package/dist/tts/index.js +72 -0
  53. package/dist/tts/index.js.map +1 -0
  54. package/dist/tts/providers/elevenlabs.d.ts +23 -0
  55. package/dist/tts/providers/elevenlabs.d.ts.map +1 -0
  56. package/dist/tts/providers/elevenlabs.js +142 -0
  57. package/dist/tts/providers/elevenlabs.js.map +1 -0
  58. package/dist/tts/providers/macos-say.d.ts +17 -0
  59. package/dist/tts/providers/macos-say.d.ts.map +1 -0
  60. package/dist/tts/providers/macos-say.js +72 -0
  61. package/dist/tts/providers/macos-say.js.map +1 -0
  62. package/dist/tts/providers/openai.d.ts +19 -0
  63. package/dist/tts/providers/openai.d.ts.map +1 -0
  64. package/dist/tts/providers/openai.js +118 -0
  65. package/dist/tts/providers/openai.js.map +1 -0
  66. package/dist/tts/providers/piper.d.ts +48 -0
  67. package/dist/tts/providers/piper.d.ts.map +1 -0
  68. package/dist/tts/providers/piper.js +417 -0
  69. package/dist/tts/providers/piper.js.map +1 -0
  70. package/dist/voice-input.d.ts +9 -0
  71. package/dist/voice-input.d.ts.map +1 -0
  72. package/dist/voice-input.js +137 -0
  73. package/dist/voice-input.js.map +1 -0
  74. package/dist/wake-word/index.d.ts +19 -0
  75. package/dist/wake-word/index.d.ts.map +1 -0
  76. package/dist/wake-word/index.js +200 -0
  77. package/dist/wake-word/index.js.map +1 -0
  78. package/dist/wake-word/recorder.d.ts +19 -0
  79. package/dist/wake-word/recorder.d.ts.map +1 -0
  80. package/dist/wake-word/recorder.js +145 -0
  81. package/dist/wake-word/recorder.js.map +1 -0
  82. package/hooks/notification.js +125 -0
  83. package/hooks/post-tool-use.js +374 -0
  84. package/hooks/session-start.js +212 -0
  85. package/hooks/stop.js +254 -0
  86. package/models/.gitkeep +0 -0
  87. package/package.json +80 -0
  88. package/python/stt_service.py +59 -0
  89. package/python/voice_input.py +154 -0
  90. package/scripts/install.sh +147 -0
  91. package/scripts/listen.py +161 -0
  92. package/scripts/postinstall.js +57 -0
  93. package/scripts/record.sh +79 -0
  94. package/scripts/setup-hooks.sh +22 -0
  95. package/scripts/voice-input.sh +66 -0
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Voice Input for Claude - Simple and Reliable
4
+ Records audio, transcribes with OpenAI Whisper, copies to clipboard.
5
+
6
+ Usage: python3 listen.py [seconds]
7
+ Default: 5 seconds recording
8
+ """
9
+
10
+ import os
11
+ import sys
12
+ import subprocess
13
+ import tempfile
14
+ import json
15
+ import urllib.request
16
+
17
+ def record_with_sounddevice(filename, duration, sample_rate=16000):
18
+ """Record using sounddevice (pip install sounddevice soundfile)"""
19
+ try:
20
+ import sounddevice as sd
21
+ import soundfile as sf
22
+ except ImportError:
23
+ return False
24
+
25
+ print(f"🎤 Recording for {duration} seconds... Speak now!")
26
+
27
+ audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1, dtype='int16')
28
+ sd.wait()
29
+
30
+ sf.write(filename, audio, sample_rate)
31
+ return True
32
+
33
+ def record_with_pyaudio(filename, duration, sample_rate=16000):
34
+ """Record using PyAudio"""
35
+ try:
36
+ import pyaudio
37
+ import wave
38
+ except ImportError:
39
+ return False
40
+
41
+ CHUNK = 1024
42
+ FORMAT = pyaudio.paInt16
43
+ CHANNELS = 1
44
+
45
+ p = pyaudio.PyAudio()
46
+ stream = p.open(format=FORMAT, channels=CHANNELS, rate=sample_rate,
47
+ input=True, frames_per_buffer=CHUNK)
48
+
49
+ print(f"🎤 Recording for {duration} seconds... Speak now!")
50
+
51
+ frames = []
52
+ for _ in range(0, int(sample_rate / CHUNK * duration)):
53
+ data = stream.read(CHUNK, exception_on_overflow=False)
54
+ frames.append(data)
55
+
56
+ stream.stop_stream()
57
+ stream.close()
58
+ p.terminate()
59
+
60
+ wf = wave.open(filename, 'wb')
61
+ wf.setnchannels(CHANNELS)
62
+ wf.setsampwidth(p.get_sample_size(FORMAT))
63
+ wf.setframerate(sample_rate)
64
+ wf.writeframes(b''.join(frames))
65
+ wf.close()
66
+ return True
67
+
68
+ def transcribe_openai(audio_path):
69
+ """Transcribe using OpenAI Whisper API with curl"""
70
+ api_key = os.environ.get('OPENAI_API_KEY')
71
+ if not api_key:
72
+ print("❌ OPENAI_API_KEY not set")
73
+ return None
74
+
75
+ result = subprocess.run([
76
+ 'curl', '-s',
77
+ 'https://api.openai.com/v1/audio/transcriptions',
78
+ '-H', f'Authorization: Bearer {api_key}',
79
+ '-F', f'file=@{audio_path}',
80
+ '-F', 'model=whisper-1'
81
+ ], capture_output=True, text=True)
82
+
83
+ try:
84
+ response = json.loads(result.stdout)
85
+ return response.get('text', '')
86
+ except:
87
+ print(f"❌ API Error: {result.stdout}")
88
+ return None
89
+
90
+ def copy_to_clipboard(text):
91
+ """Copy text to macOS clipboard"""
92
+ subprocess.run(['pbcopy'], input=text.encode(), check=True)
93
+
94
+ def type_to_terminal(text):
95
+ """Auto-type text into the active terminal using AppleScript"""
96
+ # Escape special characters for AppleScript
97
+ escaped = text.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
98
+
99
+ script = f'''
100
+ tell application "System Events"
101
+ keystroke "{escaped}"
102
+ key code 36
103
+ end tell
104
+ '''
105
+
106
+ subprocess.run(['osascript', '-e', script], check=True)
107
+
108
+ def main():
109
+ duration = int(sys.argv[1]) if len(sys.argv) > 1 else 5
110
+
111
+ print("╔════════════════════════════════════════╗")
112
+ print("║ Claude Voice Input ║")
113
+ print("╚════════════════════════════════════════╝")
114
+ print("")
115
+
116
+ # Check API key
117
+ if not os.environ.get('OPENAI_API_KEY'):
118
+ print("❌ OPENAI_API_KEY not set")
119
+ print(" Run: export OPENAI_API_KEY='your-key'")
120
+ sys.exit(1)
121
+
122
+ # Create temp file
123
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
124
+ temp_path = f.name
125
+
126
+ try:
127
+ # Try recording methods
128
+ recorded = record_with_sounddevice(temp_path, duration)
129
+
130
+ if not recorded:
131
+ recorded = record_with_pyaudio(temp_path, duration)
132
+
133
+ if not recorded:
134
+ print("❌ No recording library available.")
135
+ print(" Install: pip3 install sounddevice soundfile")
136
+ sys.exit(1)
137
+
138
+ print("⏳ Transcribing with OpenAI Whisper...")
139
+
140
+ text = transcribe_openai(temp_path)
141
+
142
+ if text:
143
+ print(f"\n📝 \"{text}\"\n")
144
+
145
+ # Auto-type to terminal
146
+ try:
147
+ type_to_terminal(text)
148
+ print("✅ Typed into terminal!")
149
+ except:
150
+ # Fallback to clipboard
151
+ copy_to_clipboard(text)
152
+ print("✅ Copied to clipboard! Paste with Cmd+V")
153
+ else:
154
+ print("❌ No speech detected")
155
+
156
+ finally:
157
+ if os.path.exists(temp_path):
158
+ os.unlink(temp_path)
159
+
160
+ if __name__ == "__main__":
161
+ main()
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Claude Voice Extension - Post-Install Setup
4
+ *
5
+ * This script runs after npm install to set up the extension.
6
+ */
7
+
8
+ const fs = require('fs');
9
+ const path = require('path');
10
+ const os = require('os');
11
+
12
+ const CONFIG_DIR = path.join(os.homedir(), '.claude-voice');
13
+ const CONFIG_FILE = path.join(CONFIG_DIR, 'config.json');
14
+ const DEFAULT_CONFIG = path.join(__dirname, '..', 'config', 'default.json');
15
+
16
+ console.log('\n Claude Voice Extension - Post-Install Setup\n');
17
+
18
+ // 1. Create config directory
19
+ if (!fs.existsSync(CONFIG_DIR)) {
20
+ fs.mkdirSync(CONFIG_DIR, { recursive: true });
21
+ console.log(' Created config directory:', CONFIG_DIR);
22
+ }
23
+
24
+ // 2. Copy default config if none exists
25
+ if (!fs.existsSync(CONFIG_FILE)) {
26
+ if (fs.existsSync(DEFAULT_CONFIG)) {
27
+ fs.copyFileSync(DEFAULT_CONFIG, CONFIG_FILE);
28
+ console.log(' Created default configuration');
29
+ }
30
+ }
31
+
32
+ // 3. Detect platform and show relevant info
33
+ const platform = os.platform();
34
+ console.log(`\n Platform: ${platform}`);
35
+
36
+ if (platform === 'darwin') {
37
+ console.log(' TTS: macOS "say" command available (built-in)');
38
+ console.log(' Terminal: AppleScript injection available');
39
+ } else if (platform === 'linux') {
40
+ console.log(' TTS: Install espeak for local TTS: sudo apt install espeak');
41
+ console.log(' Terminal: Install xdotool for input: sudo apt install xdotool');
42
+ }
43
+
44
+ // 4. Show optional dependencies
45
+ console.log('\n Optional Features:');
46
+ console.log(' - Wake word detection: Requires PICOVOICE_ACCESS_KEY');
47
+ console.log(' Get a free key at: https://picovoice.ai/');
48
+ console.log(' - OpenAI TTS/STT: Requires OPENAI_API_KEY');
49
+ console.log(' - ElevenLabs TTS: Requires ELEVENLABS_API_KEY');
50
+
51
+ // 5. Next steps
52
+ console.log('\n Next Steps:');
53
+ console.log(' 1. Run interactive setup: claude-voice setup');
54
+ console.log(' 2. Or start directly: claude-voice start');
55
+ console.log(' 3. Check status: claude-voice status');
56
+ console.log(' 4. Diagnose issues: claude-voice doctor');
57
+ console.log('');
@@ -0,0 +1,79 @@
1
+ #!/bin/bash
2
+ #
3
+ # Simple Voice Input for Claude (no dependencies)
4
+ # Uses macOS QuickTime for recording
5
+ #
6
+
7
+ TEMP_FILE="/tmp/claude-voice-$$.m4a"
8
+ DURATION=${1:-5}
9
+
10
+ echo "╔════════════════════════════════════════╗"
11
+ echo "║ Claude Voice Input ║"
12
+ echo "╚════════════════════════════════════════╝"
13
+ echo ""
14
+
15
+ if [ -z "$OPENAI_API_KEY" ]; then
16
+ echo "❌ OPENAI_API_KEY not set"
17
+ exit 1
18
+ fi
19
+
20
+ echo "🎤 Recording for ${DURATION} seconds... Speak now!"
21
+
22
+ # Use afrecord (built into macOS) or screencapture for audio
23
+ # Alternative: use say -i for interactive input
24
+ osascript -e "
25
+ set tempFile to \"$TEMP_FILE\"
26
+ set duration to $DURATION
27
+
28
+ tell application \"QuickTime Player\"
29
+ activate
30
+ set newRecording to new audio recording
31
+ delay 0.5
32
+ start newRecording
33
+ delay duration
34
+ stop newRecording
35
+ export document 1 in POSIX file tempFile using settings preset \"Audio Only\"
36
+ close document 1 saving no
37
+ quit
38
+ end tell
39
+ " 2>/dev/null
40
+
41
+ # Wait for file
42
+ sleep 1
43
+
44
+ if [ ! -f "$TEMP_FILE" ]; then
45
+ echo "❌ Recording failed. Trying alternative method..."
46
+
47
+ # Fallback: Use afplay/arecord if available
48
+ if command -v rec &> /dev/null; then
49
+ rec -r 16000 -c 1 "$TEMP_FILE" trim 0 $DURATION 2>/dev/null
50
+ else
51
+ echo "❌ No recording tool available."
52
+ echo " Install ffmpeg: brew install ffmpeg"
53
+ echo " Or install sox: brew install sox"
54
+ exit 1
55
+ fi
56
+ fi
57
+
58
+ echo "⏳ Transcribing..."
59
+
60
+ # Call OpenAI Whisper API
61
+ RESPONSE=$(curl -s https://api.openai.com/v1/audio/transcriptions \
62
+ -H "Authorization: Bearer $OPENAI_API_KEY" \
63
+ -F "file=@$TEMP_FILE" \
64
+ -F "model=whisper-1")
65
+
66
+ TEXT=$(echo "$RESPONSE" | python3 -c "import sys, json; print(json.load(sys.stdin).get('text', ''))" 2>/dev/null)
67
+
68
+ rm -f "$TEMP_FILE"
69
+
70
+ if [ -z "$TEXT" ]; then
71
+ echo "❌ Transcription failed"
72
+ exit 1
73
+ fi
74
+
75
+ echo ""
76
+ echo "📝 \"$TEXT\""
77
+ echo ""
78
+ echo -n "$TEXT" | pbcopy
79
+ echo "✅ Copied to clipboard!"
@@ -0,0 +1,22 @@
1
+ #!/bin/bash
2
+ #
3
+ # Quick script to install/uninstall Claude Code hooks
4
+ #
5
+
6
+ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
7
+ PROJECT_DIR="$( cd "$SCRIPT_DIR/.." && pwd )"
8
+
9
+ case "$1" in
10
+ install)
11
+ echo "Installing hooks..."
12
+ node "$PROJECT_DIR/dist/cli.js" install-hooks
13
+ ;;
14
+ uninstall)
15
+ echo "Uninstalling hooks..."
16
+ node "$PROJECT_DIR/dist/cli.js" uninstall-hooks
17
+ ;;
18
+ *)
19
+ echo "Usage: $0 {install|uninstall}"
20
+ exit 1
21
+ ;;
22
+ esac
@@ -0,0 +1,66 @@
1
+ #!/bin/bash
2
+ #
3
+ # Simple Voice Input for Claude
4
+ # Records audio, transcribes with OpenAI Whisper, copies to clipboard
5
+ #
6
+
7
+ TEMP_FILE="/tmp/claude-voice-$$.wav"
8
+ DURATION=${1:-5} # Default 5 seconds, or pass as argument
9
+
10
+ echo "╔════════════════════════════════════════╗"
11
+ echo "║ Claude Voice Input ║"
12
+ echo "╚════════════════════════════════════════╝"
13
+ echo ""
14
+
15
+ # Check for OPENAI_API_KEY
16
+ if [ -z "$OPENAI_API_KEY" ]; then
17
+ echo "❌ OPENAI_API_KEY not set"
18
+ exit 1
19
+ fi
20
+
21
+ # Check for ffmpeg (can install with: brew install ffmpeg)
22
+ if ! command -v ffmpeg &> /dev/null; then
23
+ echo "❌ ffmpeg not found. Install with: brew install ffmpeg"
24
+ exit 1
25
+ fi
26
+
27
+ echo "🎤 Recording for ${DURATION} seconds..."
28
+ echo " (Speak now!)"
29
+ echo ""
30
+
31
+ # Record using ffmpeg with macOS audio input
32
+ ffmpeg -f avfoundation -i ":0" -t "$DURATION" -ar 16000 -ac 1 -y "$TEMP_FILE" 2>/dev/null
33
+
34
+ if [ ! -f "$TEMP_FILE" ]; then
35
+ echo "❌ Recording failed"
36
+ exit 1
37
+ fi
38
+
39
+ echo "⏳ Transcribing with OpenAI Whisper..."
40
+
41
+ # Call OpenAI Whisper API
42
+ RESPONSE=$(curl -s https://api.openai.com/v1/audio/transcriptions \
43
+ -H "Authorization: Bearer $OPENAI_API_KEY" \
44
+ -F "file=@$TEMP_FILE" \
45
+ -F "model=whisper-1")
46
+
47
+ # Extract text from response
48
+ TEXT=$(echo "$RESPONSE" | python3 -c "import sys, json; print(json.load(sys.stdin).get('text', ''))" 2>/dev/null)
49
+
50
+ # Cleanup
51
+ rm -f "$TEMP_FILE"
52
+
53
+ if [ -z "$TEXT" ]; then
54
+ echo "❌ No speech detected or transcription failed"
55
+ echo "Response: $RESPONSE"
56
+ exit 1
57
+ fi
58
+
59
+ echo ""
60
+ echo "📝 Transcript:"
61
+ echo " \"$TEXT\""
62
+ echo ""
63
+
64
+ # Copy to clipboard
65
+ echo -n "$TEXT" | pbcopy
66
+ echo "✅ Copied to clipboard! Paste with Cmd+V in Claude"