voicesmith-mcp 1.0.18 → 1.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -2
- package/bin/install.js +117 -0
- package/bin/uninstall.js +17 -0
- package/config.json +1 -0
- package/config.py +13 -0
- package/hooks/session-start.sh +19 -26
- package/launcher/Info.plist +25 -0
- package/launcher/audio_service.c +267 -0
- package/launcher/com.voicesmith-mcp.audio.plist +41 -0
- package/launcher/main.c +91 -0
- package/launcher/mic_capture.c +161 -0
- package/menubar/VoiceSmithMenu.swift +1668 -0
- package/menubar/app-icon.png +0 -0
- package/menubar/com.voicesmith-mcp.menubar.plist +27 -0
- package/package.json +6 -2
- package/requirements.txt +1 -0
- package/server.py +439 -42
- package/stt/mic_capture.py +6 -2
- package/templates/voice-rules.md +2 -1
- package/tts/__pycache__/audio_player.cpython-314.pyc +0 -0
- package/tts/__pycache__/kokoro_engine.cpython-314.pyc +0 -0
- package/tts/audio_player.py +80 -3
- package/tts/kokoro_engine.py +11 -4
- package/voice_registry.py +23 -10
package/README.md
CHANGED
|
@@ -77,6 +77,43 @@ In a meeting or shared space? Just ask:
|
|
|
77
77
|
|
|
78
78
|
The AI continues working normally — it just won't play audio. Say *"unmute"* when you're ready.
|
|
79
79
|
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
### Menu Bar App (macOS)
|
|
83
|
+
|
|
84
|
+
On macOS, VoiceSmith includes a native menu bar app for hands-free control:
|
|
85
|
+
|
|
86
|
+
- **Session Activity** — see all active sessions with real-time sparkline graphs
|
|
87
|
+
- **Quick toggles** — Media Ducking, Nudge on Timeout
|
|
88
|
+
- **Voice switcher** — browse and change from 54 voices, nested by language
|
|
89
|
+
- **Whisper model** — switch between base/small/medium/large-v3 with inline download progress
|
|
90
|
+
- **Audio devices** — choose audio output and input devices
|
|
91
|
+
- **Voice rules** — edit or reset to default
|
|
92
|
+
- **Updates** — check and install new versions
|
|
93
|
+
|
|
94
|
+
The menu bar app starts automatically at login and runs independently from IDE sessions.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
### Audio Device Selection
|
|
99
|
+
|
|
100
|
+
Choose specific audio output (speakers/headphones) and input (microphone) devices from the menu bar app, or in config:
|
|
101
|
+
|
|
102
|
+
```json
|
|
103
|
+
{
|
|
104
|
+
"tts": { "audio_output_device": "coreaudio/BuiltInSpeakerDevice" },
|
|
105
|
+
"stt": { "audio_input_device": 1 }
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Changes take effect immediately — no restart needed. If a configured device is unavailable, falls back to system default.
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
### Interrupting Speech
|
|
114
|
+
|
|
115
|
+
Press **Escape** while the AI is speaking to stop audio immediately. The AI stops mid-sentence and waits for your next input.
|
|
116
|
+
|
|
80
117
|
## Alternative Install
|
|
81
118
|
|
|
82
119
|
If you don't have Node.js or prefer a shell script:
|
|
@@ -87,7 +124,7 @@ cd voicesmith-mcp
|
|
|
87
124
|
./install.sh
|
|
88
125
|
```
|
|
89
126
|
|
|
90
|
-
Supports the same flags: `--claude`, `--cursor`, `--codex`, `--all`.
|
|
127
|
+
Supports the same flags: `--claude`, `--cursor`, `--codex`, `--all`, `--uninstall`.
|
|
91
128
|
|
|
92
129
|
## MCP Tools
|
|
93
130
|
|
|
@@ -104,6 +141,7 @@ Once installed, your AI assistant has access to these tools:
|
|
|
104
141
|
| `mute` / `unmute` | Silence or resume voice output |
|
|
105
142
|
| `stop` | Stop playback or cancel an active recording |
|
|
106
143
|
| `status` | Server health and session info |
|
|
144
|
+
| `list_audio_devices` | List available audio input and output devices |
|
|
107
145
|
|
|
108
146
|
## How It Works
|
|
109
147
|
|
|
@@ -216,9 +254,11 @@ This can happen when another session is holding your preferred voice name. Ask t
|
|
|
216
254
|
|
|
217
255
|
```bash
|
|
218
256
|
npx voicesmith-mcp uninstall
|
|
257
|
+
# or if installed via git clone:
|
|
258
|
+
./install.sh --uninstall
|
|
219
259
|
```
|
|
220
260
|
|
|
221
|
-
Removes all files, models, MCP config entries,
|
|
261
|
+
Removes all files, models, MCP config entries, voice rules, LaunchAgents, and hooks cleanly.
|
|
222
262
|
|
|
223
263
|
## License
|
|
224
264
|
|
package/bin/install.js
CHANGED
|
@@ -462,6 +462,122 @@ except Exception as e:
|
|
|
462
462
|
}
|
|
463
463
|
}
|
|
464
464
|
|
|
465
|
+
// ─── Step 5b: Menu Bar App (macOS only) ──────────────────────────────────────
|
|
466
|
+
|
|
467
|
+
async function step5b_menuBar() {
|
|
468
|
+
if (process.platform !== "darwin") return;
|
|
469
|
+
|
|
470
|
+
const menubarSrc = path.join(__dirname, "..", "menubar", "VoiceSmithMenu.swift");
|
|
471
|
+
const menubarIconSrc = path.join(__dirname, "..", "menubar", "app-icon.png");
|
|
472
|
+
const menubarPlistTemplate = path.join(__dirname, "..", "menubar", "com.voicesmith-mcp.menubar.plist");
|
|
473
|
+
|
|
474
|
+
if (!fs.existsSync(menubarSrc)) {
|
|
475
|
+
logWarn("Menu bar source not found — skipping");
|
|
476
|
+
return;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// Check for swiftc
|
|
480
|
+
if (!(await commandExists("swiftc"))) {
|
|
481
|
+
logWarn("swiftc not found — menu bar app requires Xcode Command Line Tools");
|
|
482
|
+
logInfo("Install with: xcode-select --install");
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
const menubarApp = path.join(INSTALL_DIR, "VoiceSmith.app");
|
|
487
|
+
const menubarBinDir = path.join(menubarApp, "Contents", "MacOS");
|
|
488
|
+
const menubarResDir = path.join(menubarApp, "Contents", "Resources");
|
|
489
|
+
const menubarBinary = path.join(menubarBinDir, "VoiceSmith");
|
|
490
|
+
const menubarPlist = path.join(os.homedir(), "Library", "LaunchAgents", "com.voicesmith-mcp.menubar.plist");
|
|
491
|
+
|
|
492
|
+
// Create app bundle structure
|
|
493
|
+
fs.mkdirSync(menubarBinDir, { recursive: true });
|
|
494
|
+
fs.mkdirSync(menubarResDir, { recursive: true });
|
|
495
|
+
|
|
496
|
+
// Create Info.plist
|
|
497
|
+
const infoPlist = `<?xml version="1.0" encoding="UTF-8"?>
|
|
498
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
499
|
+
<plist version="1.0">
|
|
500
|
+
<dict>
|
|
501
|
+
<key>CFBundleExecutable</key>
|
|
502
|
+
<string>VoiceSmith</string>
|
|
503
|
+
<key>CFBundleIdentifier</key>
|
|
504
|
+
<string>com.voicesmith-mcp.menubar</string>
|
|
505
|
+
<key>CFBundleName</key>
|
|
506
|
+
<string>VoiceSmith</string>
|
|
507
|
+
<key>CFBundleDisplayName</key>
|
|
508
|
+
<string>VoiceSmith</string>
|
|
509
|
+
<key>CFBundlePackageType</key>
|
|
510
|
+
<string>APPL</string>
|
|
511
|
+
<key>CFBundleShortVersionString</key>
|
|
512
|
+
<string>1.0</string>
|
|
513
|
+
<key>CFBundleVersion</key>
|
|
514
|
+
<string>1</string>
|
|
515
|
+
<key>CFBundleIconFile</key>
|
|
516
|
+
<string>AppIcon</string>
|
|
517
|
+
<key>LSBackgroundOnly</key>
|
|
518
|
+
<true/>
|
|
519
|
+
<key>LSUIElement</key>
|
|
520
|
+
<true/>
|
|
521
|
+
</dict>
|
|
522
|
+
</plist>`;
|
|
523
|
+
fs.writeFileSync(path.join(menubarApp, "Contents", "Info.plist"), infoPlist);
|
|
524
|
+
|
|
525
|
+
// Compile Swift
|
|
526
|
+
logAction("Building VoiceSmith menu bar app...");
|
|
527
|
+
const buildResult = await runCommand("swiftc", [
|
|
528
|
+
"-parse-as-library",
|
|
529
|
+
"-framework", "SwiftUI",
|
|
530
|
+
"-framework", "AppKit",
|
|
531
|
+
menubarSrc,
|
|
532
|
+
"-o", menubarBinary,
|
|
533
|
+
]);
|
|
534
|
+
|
|
535
|
+
if (!buildResult.success) {
|
|
536
|
+
logWarn("Menu bar build failed — menu bar will not be available");
|
|
537
|
+
return;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// Generate icon
|
|
541
|
+
if (fs.existsSync(menubarIconSrc)) {
|
|
542
|
+
const { execSync } = require("child_process");
|
|
543
|
+
const iconsetDir = path.join(os.tmpdir(), "VoiceSmithIcon.iconset");
|
|
544
|
+
fs.mkdirSync(iconsetDir, { recursive: true });
|
|
545
|
+
const sizes = [16, 32, 64, 128, 256, 512];
|
|
546
|
+
try {
|
|
547
|
+
for (const s of sizes) {
|
|
548
|
+
execSync(`sips -z ${s} ${s} "${menubarIconSrc}" --out "${path.join(iconsetDir, `icon_${s}x${s}.png`)}"`, { stdio: "ignore" });
|
|
549
|
+
}
|
|
550
|
+
execSync(`sips -z 32 32 "${menubarIconSrc}" --out "${path.join(iconsetDir, "icon_16x16@2x.png")}"`, { stdio: "ignore" });
|
|
551
|
+
execSync(`sips -z 64 64 "${menubarIconSrc}" --out "${path.join(iconsetDir, "icon_32x32@2x.png")}"`, { stdio: "ignore" });
|
|
552
|
+
execSync(`sips -z 256 256 "${menubarIconSrc}" --out "${path.join(iconsetDir, "icon_128x128@2x.png")}"`, { stdio: "ignore" });
|
|
553
|
+
execSync(`sips -z 512 512 "${menubarIconSrc}" --out "${path.join(iconsetDir, "icon_256x256@2x.png")}"`, { stdio: "ignore" });
|
|
554
|
+
execSync(`sips -z 1024 1024 "${menubarIconSrc}" --out "${path.join(iconsetDir, "icon_512x512@2x.png")}"`, { stdio: "ignore" });
|
|
555
|
+
execSync(`iconutil -c icns "${iconsetDir}" -o "${path.join(menubarResDir, "AppIcon.icns")}"`, { stdio: "ignore" });
|
|
556
|
+
} catch (e) { /* icon generation is optional */ }
|
|
557
|
+
fs.rmSync(iconsetDir, { recursive: true, force: true });
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// Codesign
|
|
561
|
+
await runCommand("codesign", ["-s", "-", "--force", menubarApp]);
|
|
562
|
+
logActionDone("VoiceSmith menu bar app built");
|
|
563
|
+
|
|
564
|
+
// Install LaunchAgent
|
|
565
|
+
if (fs.existsSync(menubarPlistTemplate)) {
|
|
566
|
+
fs.mkdirSync(path.dirname(menubarPlist), { recursive: true });
|
|
567
|
+
let plistContent = fs.readFileSync(menubarPlistTemplate, "utf8");
|
|
568
|
+
plistContent = plistContent.replace(/MENUBAR_BINARY/g, menubarBinary);
|
|
569
|
+
fs.writeFileSync(menubarPlist, plistContent);
|
|
570
|
+
|
|
571
|
+
await runCommand("launchctl", ["unload", menubarPlist]);
|
|
572
|
+
const loadResult = await runCommand("launchctl", ["load", "-w", menubarPlist]);
|
|
573
|
+
if (loadResult.success) {
|
|
574
|
+
logOk("VoiceSmith menu bar started (runs at login)");
|
|
575
|
+
} else {
|
|
576
|
+
logWarn("Menu bar LaunchAgent install failed");
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
465
581
|
// ─── Voice Picker ────────────────────────────────────────────────────────────
|
|
466
582
|
|
|
467
583
|
const DEFAULT_VOICES = [
|
|
@@ -726,6 +842,7 @@ async function run() {
|
|
|
726
842
|
await step3_models();
|
|
727
843
|
const configuredIdes = await step4_mcpConfig(targetIdes);
|
|
728
844
|
await step5_microphone();
|
|
845
|
+
await step5b_menuBar();
|
|
729
846
|
await step6_voiceRules(configuredIdes);
|
|
730
847
|
|
|
731
848
|
const ideNames = (configuredIdes || [])
|
package/bin/uninstall.js
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
const fs = require("fs");
|
|
8
8
|
const path = require("path");
|
|
9
|
+
const os = require("os");
|
|
9
10
|
|
|
10
11
|
const {
|
|
11
12
|
INSTALL_DIR,
|
|
@@ -60,6 +61,22 @@ async function run() {
|
|
|
60
61
|
|
|
61
62
|
console.log("");
|
|
62
63
|
|
|
64
|
+
// Unload and remove LaunchAgents before deleting the install directory
|
|
65
|
+
const { execSync } = require("child_process");
|
|
66
|
+
const launchAgents = [
|
|
67
|
+
path.join(os.homedir(), "Library", "LaunchAgents", "com.voicesmith-mcp.audio.plist"),
|
|
68
|
+
path.join(os.homedir(), "Library", "LaunchAgents", "com.voicesmith-mcp.menubar.plist"),
|
|
69
|
+
];
|
|
70
|
+
for (const plist of launchAgents) {
|
|
71
|
+
if (fileExists(plist)) {
|
|
72
|
+
try {
|
|
73
|
+
execSync(`launchctl unload "${plist}" 2>/dev/null`, { stdio: "ignore" });
|
|
74
|
+
fs.unlinkSync(plist);
|
|
75
|
+
logOk(`Removed LaunchAgent: ${path.basename(plist)}`);
|
|
76
|
+
} catch (e) { /* ignore */ }
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
63
80
|
// Remove install directory (venv, models, server files, config)
|
|
64
81
|
if (dirExists(INSTALL_DIR)) {
|
|
65
82
|
fs.rmSync(INSTALL_DIR, { recursive: true, force: true });
|
package/config.json
CHANGED
package/config.py
CHANGED
|
@@ -28,6 +28,7 @@ class TTSConfig:
|
|
|
28
28
|
default_speed: float = 1.0
|
|
29
29
|
audio_player: str = "mpv"
|
|
30
30
|
duck_media: bool = False
|
|
31
|
+
audio_output_device: Optional[str] = None # mpv device name, None = system default
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
@dataclass
|
|
@@ -38,6 +39,7 @@ class STTConfig:
|
|
|
38
39
|
max_listen_timeout: float = 15
|
|
39
40
|
vad_threshold: float = 0.3
|
|
40
41
|
nudge_on_timeout: bool = False
|
|
42
|
+
audio_input_device: Optional[int] = None # sounddevice device index, None = system default
|
|
41
43
|
|
|
42
44
|
|
|
43
45
|
@dataclass
|
|
@@ -61,6 +63,7 @@ class AppConfig:
|
|
|
61
63
|
log_level: str = "info"
|
|
62
64
|
log_file: bool = False
|
|
63
65
|
http_port: int = 7865
|
|
66
|
+
check_updates: bool = True
|
|
64
67
|
|
|
65
68
|
|
|
66
69
|
def get_config_path() -> Path:
|
|
@@ -104,6 +107,8 @@ def load_config(config_path: Optional[Path] = None) -> AppConfig:
|
|
|
104
107
|
config.tts.audio_player = tts["audio_player"]
|
|
105
108
|
if "duck_media" in tts:
|
|
106
109
|
config.tts.duck_media = bool(tts["duck_media"])
|
|
110
|
+
if "audio_output_device" in tts:
|
|
111
|
+
config.tts.audio_output_device = tts["audio_output_device"]
|
|
107
112
|
|
|
108
113
|
# STT config
|
|
109
114
|
if "stt" in data:
|
|
@@ -120,6 +125,9 @@ def load_config(config_path: Optional[Path] = None) -> AppConfig:
|
|
|
120
125
|
config.stt.vad_threshold = float(stt["vad_threshold"])
|
|
121
126
|
if "nudge_on_timeout" in stt:
|
|
122
127
|
config.stt.nudge_on_timeout = bool(stt["nudge_on_timeout"])
|
|
128
|
+
if "audio_input_device" in stt:
|
|
129
|
+
val = stt["audio_input_device"]
|
|
130
|
+
config.stt.audio_input_device = int(val) if val is not None else None
|
|
123
131
|
|
|
124
132
|
# Top-level config
|
|
125
133
|
if "main_agent" in data:
|
|
@@ -134,6 +142,8 @@ def load_config(config_path: Optional[Path] = None) -> AppConfig:
|
|
|
134
142
|
config.log_file = bool(data["log_file"])
|
|
135
143
|
if "http_port" in data:
|
|
136
144
|
config.http_port = int(data["http_port"])
|
|
145
|
+
if "check_updates" in data:
|
|
146
|
+
config.check_updates = bool(data["check_updates"])
|
|
137
147
|
|
|
138
148
|
# Wake word config
|
|
139
149
|
if "wake_word" in data:
|
|
@@ -187,6 +197,7 @@ def save_config(config: AppConfig, config_path: Optional[Path] = None) -> None:
|
|
|
187
197
|
"default_speed": config.tts.default_speed,
|
|
188
198
|
"audio_player": config.tts.audio_player,
|
|
189
199
|
"duck_media": config.tts.duck_media,
|
|
200
|
+
"audio_output_device": config.tts.audio_output_device,
|
|
190
201
|
},
|
|
191
202
|
"stt": {
|
|
192
203
|
"model_size": config.stt.model_size,
|
|
@@ -195,6 +206,7 @@ def save_config(config: AppConfig, config_path: Optional[Path] = None) -> None:
|
|
|
195
206
|
"max_listen_timeout": config.stt.max_listen_timeout,
|
|
196
207
|
"vad_threshold": config.stt.vad_threshold,
|
|
197
208
|
"nudge_on_timeout": config.stt.nudge_on_timeout,
|
|
209
|
+
"audio_input_device": config.stt.audio_input_device,
|
|
198
210
|
},
|
|
199
211
|
"main_agent": config.main_agent,
|
|
200
212
|
"last_voice_name": config.last_voice_name,
|
|
@@ -202,6 +214,7 @@ def save_config(config: AppConfig, config_path: Optional[Path] = None) -> None:
|
|
|
202
214
|
"log_level": config.log_level,
|
|
203
215
|
"log_file": config.log_file,
|
|
204
216
|
"http_port": config.http_port,
|
|
217
|
+
"check_updates": config.check_updates,
|
|
205
218
|
"wake_word": {
|
|
206
219
|
"enabled": config.wake_word.enabled,
|
|
207
220
|
"model": config.wake_word.model,
|
package/hooks/session-start.sh
CHANGED
|
@@ -94,36 +94,29 @@ except:
|
|
|
94
94
|
fi
|
|
95
95
|
fi
|
|
96
96
|
|
|
97
|
-
# Fallback:
|
|
98
|
-
if [ -z "$SESSION_NAME" ]; then
|
|
99
|
-
|
|
100
|
-
|
|
97
|
+
# Fallback: query the server's /status endpoint for the actual name
|
|
98
|
+
if [ -z "$SESSION_NAME" ] && [ -n "$PORT" ]; then
|
|
99
|
+
STATUS=$(curl -s --max-time 2 "http://127.0.0.1:$PORT/status" 2>/dev/null)
|
|
100
|
+
if [ -n "$STATUS" ]; then
|
|
101
|
+
SESSION_NAME=$(echo "$STATUS" | python3 -c "
|
|
102
|
+
import sys, json
|
|
101
103
|
try:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
os.kill(s['pid'], 0)
|
|
116
|
-
print(f\"{s['name']}|{s['voice']}\")
|
|
117
|
-
break
|
|
118
|
-
except (OSError, ProcessLookupError):
|
|
119
|
-
pass
|
|
104
|
+
d = json.load(sys.stdin)
|
|
105
|
+
# Check session object first (new servers), fall back to top-level name
|
|
106
|
+
s = d.get('session') or d
|
|
107
|
+
print(s.get('name', ''))
|
|
108
|
+
except:
|
|
109
|
+
pass
|
|
110
|
+
" 2>/dev/null)
|
|
111
|
+
SESSION_VOICE=$(echo "$STATUS" | python3 -c "
|
|
112
|
+
import sys, json
|
|
113
|
+
try:
|
|
114
|
+
d = json.load(sys.stdin)
|
|
115
|
+
s = d.get('session') or d
|
|
116
|
+
print(s.get('voice', ''))
|
|
120
117
|
except:
|
|
121
118
|
pass
|
|
122
119
|
" 2>/dev/null)
|
|
123
|
-
|
|
124
|
-
if [ -n "$SESSION_INFO" ]; then
|
|
125
|
-
SESSION_NAME=$(echo "$SESSION_INFO" | cut -d'|' -f1)
|
|
126
|
-
SESSION_VOICE=$(echo "$SESSION_INFO" | cut -d'|' -f2)
|
|
127
120
|
fi
|
|
128
121
|
fi
|
|
129
122
|
fi
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
|
|
3
|
+
"http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
4
|
+
<plist version="1.0">
|
|
5
|
+
<dict>
|
|
6
|
+
<key>CFBundleExecutable</key>
|
|
7
|
+
<string>VoiceSmithMCP</string>
|
|
8
|
+
<key>CFBundleIdentifier</key>
|
|
9
|
+
<string>com.voicesmith-mcp.launcher</string>
|
|
10
|
+
<key>CFBundleName</key>
|
|
11
|
+
<string>VoiceSmithMCP</string>
|
|
12
|
+
<key>CFBundlePackageType</key>
|
|
13
|
+
<string>APPL</string>
|
|
14
|
+
<key>CFBundleShortVersionString</key>
|
|
15
|
+
<string>1.0</string>
|
|
16
|
+
<key>CFBundleVersion</key>
|
|
17
|
+
<string>1</string>
|
|
18
|
+
<!-- Background-only: no Dock icon or app switcher entry -->
|
|
19
|
+
<key>LSBackgroundOnly</key>
|
|
20
|
+
<true/>
|
|
21
|
+
<!-- Required for macOS TCC to show a mic permission dialog for this bundle -->
|
|
22
|
+
<key>NSMicrophoneUsageDescription</key>
|
|
23
|
+
<string>VoiceSmith MCP uses the microphone to transcribe voice input for Claude.</string>
|
|
24
|
+
</dict>
|
|
25
|
+
</plist>
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* audio_service.c — Unix-socket mic streaming daemon for VoiceSmithMCP
|
|
3
|
+
*
|
|
4
|
+
* Runs as a LaunchAgent (launchd spawns it, ppid=1). Because it is spawned
|
|
5
|
+
* by launchd — not by a terminal app — macOS TCC attributes microphone
|
|
6
|
+
* access to VoiceSmithMCP.app (com.voicesmith-mcp.launcher) rather than to
|
|
7
|
+
* the user's terminal, which typically lacks NSMicrophoneUsageDescription.
|
|
8
|
+
*
|
|
9
|
+
* Protocol (Unix domain socket at SOCKET_PATH):
|
|
10
|
+
* - Service creates the socket file, then loops accepting connections.
|
|
11
|
+
* - Supports MULTIPLE concurrent clients — audio is broadcast to all.
|
|
12
|
+
* - On connection: add client to the list, start recording if not already.
|
|
13
|
+
* - On client disconnect: remove from list, stop recording if no clients.
|
|
14
|
+
* - On SIGTERM/SIGINT: stop cleanly and exit 0.
|
|
15
|
+
* - On CoreAudio error: write message to stderr and exit 1.
|
|
16
|
+
*
|
|
17
|
+
* Build (handled by install.sh):
|
|
18
|
+
* clang -framework AudioToolbox -framework CoreFoundation \
|
|
19
|
+
* audio_service.c -o audio-service
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
#include <AudioToolbox/AudioToolbox.h>
|
|
23
|
+
#include <CoreFoundation/CoreFoundation.h>
|
|
24
|
+
#include <errno.h>
|
|
25
|
+
#include <fcntl.h>
|
|
26
|
+
#include <pthread.h>
|
|
27
|
+
#include <signal.h>
|
|
28
|
+
#include <stdio.h>
|
|
29
|
+
#include <stdlib.h>
|
|
30
|
+
#include <string.h>
|
|
31
|
+
#include <sys/socket.h>
|
|
32
|
+
#include <sys/un.h>
|
|
33
|
+
#include <unistd.h>
|
|
34
|
+
|
|
35
|
+
#define SAMPLE_RATE 16000
|
|
36
|
+
#define CHANNELS 1
|
|
37
|
+
#define BYTES_PER_SAMPLE 4 /* float32 */
|
|
38
|
+
#define CHUNK_SAMPLES 512 /* Silero VAD requires 512-sample chunks */
|
|
39
|
+
#define CHUNK_BYTES (CHUNK_SAMPLES * BYTES_PER_SAMPLE) /* 2048 */
|
|
40
|
+
#define NUM_BUFFERS 3
|
|
41
|
+
#define BUFFER_FRAMES (CHUNK_SAMPLES * 4)
|
|
42
|
+
#define BUFFER_BYTES (BUFFER_FRAMES * BYTES_PER_SAMPLE)
|
|
43
|
+
|
|
44
|
+
#define SOCKET_PATH "/tmp/voicesmith-audio.sock"
|
|
45
|
+
#define MAX_CLIENTS 8
|
|
46
|
+
|
|
47
|
+
/* ── Global state ────────────────────────────────────────────────────────── */
|
|
48
|
+
|
|
49
|
+
static volatile sig_atomic_t g_running = 1;
|
|
50
|
+
static AudioQueueRef g_queue = NULL;
|
|
51
|
+
|
|
52
|
+
/* Client tracking */
|
|
53
|
+
static int g_clients[MAX_CLIENTS];
|
|
54
|
+
static int g_num_clients = 0;
|
|
55
|
+
static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
56
|
+
|
|
57
|
+
static void handle_signal(int sig)
|
|
58
|
+
{
|
|
59
|
+
(void)sig;
|
|
60
|
+
g_running = 0;
|
|
61
|
+
if (g_queue)
|
|
62
|
+
AudioQueueStop(g_queue, /*inImmediate=*/true);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/* ── Client management ───────────────────────────────────────────────────── */
|
|
66
|
+
|
|
67
|
+
static void add_client(int fd)
|
|
68
|
+
{
|
|
69
|
+
pthread_mutex_lock(&g_lock);
|
|
70
|
+
if (g_num_clients < MAX_CLIENTS) {
|
|
71
|
+
g_clients[g_num_clients++] = fd;
|
|
72
|
+
fprintf(stderr, "audio-service: client added (fd=%d, total=%d)\n", fd, g_num_clients);
|
|
73
|
+
} else {
|
|
74
|
+
fprintf(stderr, "audio-service: max clients reached, rejecting fd=%d\n", fd);
|
|
75
|
+
close(fd);
|
|
76
|
+
}
|
|
77
|
+
pthread_mutex_unlock(&g_lock);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
static void remove_client(int fd)
|
|
81
|
+
{
|
|
82
|
+
pthread_mutex_lock(&g_lock);
|
|
83
|
+
for (int i = 0; i < g_num_clients; i++) {
|
|
84
|
+
if (g_clients[i] == fd) {
|
|
85
|
+
g_clients[i] = g_clients[--g_num_clients];
|
|
86
|
+
fprintf(stderr, "audio-service: client removed (fd=%d, total=%d)\n", fd, g_num_clients);
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
pthread_mutex_unlock(&g_lock);
|
|
91
|
+
close(fd);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/* ── Per-callback accumulation state ─────────────────────────────────────── */
|
|
95
|
+
|
|
96
|
+
typedef struct {
|
|
97
|
+
float staging[CHUNK_SAMPLES];
|
|
98
|
+
int pos;
|
|
99
|
+
} State;
|
|
100
|
+
|
|
101
|
+
/* ── AudioQueue input callback ───────────────────────────────────────────── */
|
|
102
|
+
|
|
103
|
+
static void audio_callback(
|
|
104
|
+
void *user_data,
|
|
105
|
+
AudioQueueRef queue,
|
|
106
|
+
AudioQueueBufferRef buffer,
|
|
107
|
+
const AudioTimeStamp *start_time,
|
|
108
|
+
UInt32 num_packets,
|
|
109
|
+
const AudioStreamPacketDescription *packet_desc)
|
|
110
|
+
{
|
|
111
|
+
(void)start_time; (void)num_packets; (void)packet_desc;
|
|
112
|
+
|
|
113
|
+
State *state = (State *)user_data;
|
|
114
|
+
const float *samples = (const float *)buffer->mAudioData;
|
|
115
|
+
UInt32 n = buffer->mAudioDataByteSize / BYTES_PER_SAMPLE;
|
|
116
|
+
|
|
117
|
+
for (UInt32 i = 0; i < n && g_running; i++) {
|
|
118
|
+
state->staging[state->pos++] = samples[i];
|
|
119
|
+
if (state->pos == CHUNK_SAMPLES) {
|
|
120
|
+
/* Broadcast chunk to all connected clients */
|
|
121
|
+
pthread_mutex_lock(&g_lock);
|
|
122
|
+
for (int c = 0; c < g_num_clients; /* no increment */) {
|
|
123
|
+
const char *p = (const char *)state->staging;
|
|
124
|
+
ssize_t rem = (ssize_t)CHUNK_BYTES;
|
|
125
|
+
int ok = 1;
|
|
126
|
+
while (rem > 0) {
|
|
127
|
+
ssize_t r = write(g_clients[c], p, (size_t)rem);
|
|
128
|
+
if (r < 0) {
|
|
129
|
+
if (errno == EINTR) continue;
|
|
130
|
+
/* Client disconnected — remove it */
|
|
131
|
+
int dead_fd = g_clients[c];
|
|
132
|
+
g_clients[c] = g_clients[--g_num_clients];
|
|
133
|
+
close(dead_fd);
|
|
134
|
+
fprintf(stderr, "audio-service: client dropped (fd=%d, total=%d)\n",
|
|
135
|
+
dead_fd, g_num_clients);
|
|
136
|
+
ok = 0;
|
|
137
|
+
break;
|
|
138
|
+
}
|
|
139
|
+
p += r;
|
|
140
|
+
rem -= r;
|
|
141
|
+
}
|
|
142
|
+
if (ok) c++; /* only advance if we didn't remove */
|
|
143
|
+
}
|
|
144
|
+
pthread_mutex_unlock(&g_lock);
|
|
145
|
+
state->pos = 0;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (g_running)
|
|
150
|
+
AudioQueueEnqueueBuffer(queue, buffer, 0, NULL);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/* ── start_queue: create and start an AudioQueue ─────────────────────────── */
|
|
154
|
+
|
|
155
|
+
static OSStatus start_queue(AudioQueueRef *out_queue, State *state)
|
|
156
|
+
{
|
|
157
|
+
AudioStreamBasicDescription fmt;
|
|
158
|
+
memset(&fmt, 0, sizeof(fmt));
|
|
159
|
+
fmt.mSampleRate = SAMPLE_RATE;
|
|
160
|
+
fmt.mFormatID = kAudioFormatLinearPCM;
|
|
161
|
+
fmt.mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagIsPacked;
|
|
162
|
+
fmt.mBitsPerChannel = 32;
|
|
163
|
+
fmt.mChannelsPerFrame = CHANNELS;
|
|
164
|
+
fmt.mBytesPerFrame = BYTES_PER_SAMPLE * CHANNELS;
|
|
165
|
+
fmt.mFramesPerPacket = 1;
|
|
166
|
+
fmt.mBytesPerPacket = fmt.mBytesPerFrame;
|
|
167
|
+
|
|
168
|
+
memset(state, 0, sizeof(*state));
|
|
169
|
+
|
|
170
|
+
AudioQueueRef queue;
|
|
171
|
+
OSStatus err = AudioQueueNewInput(&fmt, audio_callback, state,
|
|
172
|
+
NULL, NULL, 0, &queue);
|
|
173
|
+
if (err != noErr) return err;
|
|
174
|
+
|
|
175
|
+
for (int i = 0; i < NUM_BUFFERS; i++) {
|
|
176
|
+
AudioQueueBufferRef buf;
|
|
177
|
+
err = AudioQueueAllocateBuffer(queue, BUFFER_BYTES, &buf);
|
|
178
|
+
if (err != noErr) { AudioQueueDispose(queue, true); return err; }
|
|
179
|
+
AudioQueueEnqueueBuffer(queue, buf, 0, NULL);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
err = AudioQueueStart(queue, NULL);
|
|
183
|
+
if (err != noErr) { AudioQueueDispose(queue, true); return err; }
|
|
184
|
+
|
|
185
|
+
*out_queue = queue;
|
|
186
|
+
return noErr;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/* ── Accept thread: accepts new clients in the background ────────────────── */
|
|
190
|
+
|
|
191
|
+
static int g_server_fd = -1;
|
|
192
|
+
|
|
193
|
+
static void *accept_thread(void *arg)
|
|
194
|
+
{
|
|
195
|
+
(void)arg;
|
|
196
|
+
while (g_running) {
|
|
197
|
+
int client_fd = accept(g_server_fd, NULL, NULL);
|
|
198
|
+
if (client_fd < 0) {
|
|
199
|
+
if (errno == EINTR) continue;
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
add_client(client_fd);
|
|
203
|
+
}
|
|
204
|
+
return NULL;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/* ── main ────────────────────────────────────────────────────────────────── */
|
|
208
|
+
|
|
209
|
+
int main(void)
|
|
210
|
+
{
|
|
211
|
+
signal(SIGTERM, handle_signal);
|
|
212
|
+
signal(SIGINT, handle_signal);
|
|
213
|
+
signal(SIGPIPE, SIG_IGN);
|
|
214
|
+
|
|
215
|
+
/* Create Unix socket */
|
|
216
|
+
g_server_fd = socket(AF_UNIX, SOCK_STREAM, 0);
|
|
217
|
+
if (g_server_fd < 0) {
|
|
218
|
+
fprintf(stderr, "audio-service: socket: %s\n", strerror(errno));
|
|
219
|
+
return 1;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
unlink(SOCKET_PATH);
|
|
223
|
+
|
|
224
|
+
struct sockaddr_un addr;
|
|
225
|
+
memset(&addr, 0, sizeof(addr));
|
|
226
|
+
addr.sun_family = AF_UNIX;
|
|
227
|
+
strncpy(addr.sun_path, SOCKET_PATH, sizeof(addr.sun_path) - 1);
|
|
228
|
+
|
|
229
|
+
if (bind(g_server_fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
|
|
230
|
+
fprintf(stderr, "audio-service: bind: %s\n", strerror(errno));
|
|
231
|
+
return 1;
|
|
232
|
+
}
|
|
233
|
+
if (listen(g_server_fd, MAX_CLIENTS) < 0) {
|
|
234
|
+
fprintf(stderr, "audio-service: listen: %s\n", strerror(errno));
|
|
235
|
+
return 1;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
/* Start AudioQueue — always recording, broadcasting to all clients */
|
|
239
|
+
State state;
|
|
240
|
+
OSStatus err = start_queue(&g_queue, &state);
|
|
241
|
+
if (err != noErr) {
|
|
242
|
+
fprintf(stderr, "audio-service: AudioQueue error (%d)\n", (int)err);
|
|
243
|
+
return 1;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/* Start accept thread for new connections */
|
|
247
|
+
pthread_t tid;
|
|
248
|
+
pthread_create(&tid, NULL, accept_thread, NULL);
|
|
249
|
+
|
|
250
|
+
/* Run CoreAudio run loop */
|
|
251
|
+
while (g_running)
|
|
252
|
+
CFRunLoopRunInMode(kCFRunLoopDefaultMode, 0.1, false);
|
|
253
|
+
|
|
254
|
+
/* Cleanup */
|
|
255
|
+
AudioQueueStop(g_queue, true);
|
|
256
|
+
AudioQueueDispose(g_queue, true);
|
|
257
|
+
|
|
258
|
+
pthread_mutex_lock(&g_lock);
|
|
259
|
+
for (int i = 0; i < g_num_clients; i++)
|
|
260
|
+
close(g_clients[i]);
|
|
261
|
+
g_num_clients = 0;
|
|
262
|
+
pthread_mutex_unlock(&g_lock);
|
|
263
|
+
|
|
264
|
+
close(g_server_fd);
|
|
265
|
+
unlink(SOCKET_PATH);
|
|
266
|
+
return 0;
|
|
267
|
+
}
|