voicesmith-mcp 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -7
- package/hooks/session-start.sh +90 -19
- package/package.json +1 -1
- package/server.py +38 -3
- package/session_registry.py +87 -56
package/README.md
CHANGED
|
@@ -11,7 +11,7 @@ Local AI voice for coding assistants. Gives your AI a real voice (text-to-speech
|
|
|
11
11
|
- **54 distinct voices** via Kokoro ONNX (local TTS, ~300MB model)
|
|
12
12
|
- **Speech-to-text** via faster-whisper (local STT, ~150MB model)
|
|
13
13
|
- **Voice activity detection** via Silero VAD (local, 2MB)
|
|
14
|
-
- **Multi-session support** — run multiple
|
|
14
|
+
- **Multi-session support** — run multiple Claude Code sessions, each with its own voice (single session for Cursor/Codex)
|
|
15
15
|
- **Works with Claude Code, Cursor, and Codex**
|
|
16
16
|
|
|
17
17
|
## Quick Start
|
|
@@ -69,7 +69,11 @@ The MCP server runs as a local process alongside your IDE. It communicates over
|
|
|
69
69
|
|
|
70
70
|
## Multi-Session
|
|
71
71
|
|
|
72
|
-
Multiple
|
|
72
|
+
**Claude Code:** Full multi-session support. Multiple Claude Code sessions can run simultaneously, each with its own voice. Session identity is tracked via Claude's `session_id` — resuming a session reclaims the same voice, and multiple terminals sharing the same session share the same voice. Orphaned servers are detected and cleaned up automatically.
|
|
73
|
+
|
|
74
|
+
**Cursor / Codex:** Single session only. Cursor runs one MCP server per config (shared across tabs), and Codex has no multi-session hooks. Voice works normally — just no multi-session coordination.
|
|
75
|
+
|
|
76
|
+
Cross-session audio is serialized via `flock` to prevent overlapping playback.
|
|
73
77
|
|
|
74
78
|
## Configuration
|
|
75
79
|
|
|
@@ -102,11 +106,11 @@ Re-run `npx voicesmith-mcp install` to change your voice or update settings. Exi
|
|
|
102
106
|
|
|
103
107
|
## Supported IDEs
|
|
104
108
|
|
|
105
|
-
| IDE | Config Location | Rules Location |
|
|
106
|
-
|
|
107
|
-
| Claude Code | `~/.claude.json` | `~/.claude/CLAUDE.md` |
|
|
108
|
-
| Cursor | `~/.cursor/mcp.json` | `~/.cursor/rules/voicesmith.mdc` |
|
|
109
|
-
| Codex | `~/.codex/mcp.json` | `~/.codex/AGENTS.md` |
|
|
109
|
+
| IDE | Config Location | Rules Location | Multi-Session |
|
|
110
|
+
|-----|----------------|----------------|---------------|
|
|
111
|
+
| Claude Code | `~/.claude.json` | `~/.claude/CLAUDE.md` | Yes (via session_id) |
|
|
112
|
+
| Cursor | `~/.cursor/mcp.json` | `~/.cursor/rules/voicesmith.mdc` | No (single server) |
|
|
113
|
+
| Codex | `~/.codex/mcp.json` | `~/.codex/AGENTS.md` | No (single session) |
|
|
110
114
|
|
|
111
115
|
## Uninstall
|
|
112
116
|
|
package/hooks/session-start.sh
CHANGED
|
@@ -1,44 +1,115 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
|
-
# VoiceSmith MCP — SessionStart hook
|
|
3
|
-
#
|
|
4
|
-
#
|
|
2
|
+
# VoiceSmith MCP — SessionStart hook
|
|
3
|
+
# 1. Receives session_id from Claude Code via stdin JSON
|
|
4
|
+
# 2. Sends session_id to the MCP server's POST /session endpoint
|
|
5
|
+
# 3. Server reconciles voice with sibling sessions (same session_id)
|
|
6
|
+
# 4. Returns the assigned voice name as additionalContext
|
|
5
7
|
|
|
6
8
|
SESSIONS_FILE="$HOME/.local/share/voicesmith-mcp/sessions.json"
|
|
7
9
|
INPUT=$(cat)
|
|
8
10
|
|
|
11
|
+
# Parse session_id from hook input (all hooks receive session_id via stdin)
|
|
12
|
+
SESSION_ID=$(echo "$INPUT" | python3 -c "
|
|
13
|
+
import sys, json
|
|
14
|
+
try:
|
|
15
|
+
data = json.load(sys.stdin)
|
|
16
|
+
print(data.get('session_id', ''))
|
|
17
|
+
except:
|
|
18
|
+
pass
|
|
19
|
+
" 2>/dev/null)
|
|
20
|
+
|
|
9
21
|
SESSION_NAME=""
|
|
10
22
|
SESSION_VOICE=""
|
|
11
23
|
|
|
12
24
|
if [ -f "$SESSIONS_FILE" ]; then
|
|
13
|
-
|
|
25
|
+
# Find this session's MCP server port (by PID liveness, prefer tmux match)
|
|
26
|
+
PORT=$(python3 -c "
|
|
14
27
|
import json, os
|
|
15
28
|
try:
|
|
16
29
|
with open('$SESSIONS_FILE') as f:
|
|
17
30
|
data = json.load(f)
|
|
18
|
-
|
|
31
|
+
tmux = os.environ.get('VOICESMITH_TMUX', '')
|
|
32
|
+
# Try tmux match first
|
|
19
33
|
for s in data.get('sessions', []):
|
|
20
34
|
try:
|
|
21
35
|
os.kill(s['pid'], 0)
|
|
22
|
-
if
|
|
23
|
-
print(
|
|
24
|
-
|
|
36
|
+
if tmux and s.get('tmux_session') == tmux:
|
|
37
|
+
print(s['port'])
|
|
38
|
+
raise SystemExit
|
|
39
|
+
except (OSError, ProcessLookupError):
|
|
40
|
+
pass
|
|
41
|
+
# Fallback: most recent alive session
|
|
42
|
+
for s in reversed(data.get('sessions', [])):
|
|
43
|
+
try:
|
|
44
|
+
os.kill(s['pid'], 0)
|
|
45
|
+
print(s['port'])
|
|
46
|
+
break
|
|
25
47
|
except (OSError, ProcessLookupError):
|
|
26
48
|
pass
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
49
|
+
except:
|
|
50
|
+
pass
|
|
51
|
+
" 2>/dev/null)
|
|
52
|
+
|
|
53
|
+
# Send session_id to the server if we have both port and session_id
|
|
54
|
+
if [ -n "$PORT" ] && [ -n "$SESSION_ID" ]; then
|
|
55
|
+
RESPONSE=$(curl -s --max-time 3 -X POST \
|
|
56
|
+
-H "Content-Type: application/json" \
|
|
57
|
+
-d "{\"session_id\": \"$SESSION_ID\"}" \
|
|
58
|
+
"http://127.0.0.1:$PORT/session" 2>/dev/null)
|
|
59
|
+
|
|
60
|
+
if [ -n "$RESPONSE" ]; then
|
|
61
|
+
SESSION_NAME=$(echo "$RESPONSE" | python3 -c "
|
|
62
|
+
import sys, json
|
|
63
|
+
try:
|
|
64
|
+
d = json.load(sys.stdin)
|
|
65
|
+
s = d.get('session', {})
|
|
66
|
+
print(s.get('name', ''))
|
|
67
|
+
except:
|
|
68
|
+
pass
|
|
69
|
+
" 2>/dev/null)
|
|
70
|
+
SESSION_VOICE=$(echo "$RESPONSE" | python3 -c "
|
|
71
|
+
import sys, json
|
|
72
|
+
try:
|
|
73
|
+
d = json.load(sys.stdin)
|
|
74
|
+
s = d.get('session', {})
|
|
75
|
+
print(s.get('voice', ''))
|
|
76
|
+
except:
|
|
77
|
+
pass
|
|
78
|
+
" 2>/dev/null)
|
|
79
|
+
fi
|
|
80
|
+
fi
|
|
81
|
+
|
|
82
|
+
# Fallback: read sessions.json directly if HTTP call didn't work
|
|
83
|
+
if [ -z "$SESSION_NAME" ]; then
|
|
84
|
+
SESSION_INFO=$(python3 -c "
|
|
85
|
+
import json, os
|
|
86
|
+
try:
|
|
87
|
+
with open('$SESSIONS_FILE') as f:
|
|
88
|
+
data = json.load(f)
|
|
89
|
+
tmux = os.environ.get('VOICESMITH_TMUX', '')
|
|
90
|
+
for s in data.get('sessions', []):
|
|
91
|
+
try:
|
|
92
|
+
os.kill(s['pid'], 0)
|
|
93
|
+
if tmux and s.get('tmux_session') == tmux:
|
|
31
94
|
print(f\"{s['name']}|{s['voice']}\")
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
95
|
+
raise SystemExit
|
|
96
|
+
except (OSError, ProcessLookupError):
|
|
97
|
+
pass
|
|
98
|
+
for s in reversed(data.get('sessions', [])):
|
|
99
|
+
try:
|
|
100
|
+
os.kill(s['pid'], 0)
|
|
101
|
+
print(f\"{s['name']}|{s['voice']}\")
|
|
102
|
+
break
|
|
103
|
+
except (OSError, ProcessLookupError):
|
|
104
|
+
pass
|
|
105
|
+
except:
|
|
36
106
|
pass
|
|
37
107
|
" 2>/dev/null)
|
|
38
108
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
109
|
+
if [ -n "$SESSION_INFO" ]; then
|
|
110
|
+
SESSION_NAME=$(echo "$SESSION_INFO" | cut -d'|' -f1)
|
|
111
|
+
SESSION_VOICE=$(echo "$SESSION_INFO" | cut -d'|' -f2)
|
|
112
|
+
fi
|
|
42
113
|
fi
|
|
43
114
|
fi
|
|
44
115
|
|
package/package.json
CHANGED
package/server.py
CHANGED
|
@@ -175,6 +175,7 @@ class _VoiceHTTPHandler(BaseHTTPRequestHandler):
|
|
|
175
175
|
"ready": True,
|
|
176
176
|
"name": _session_info.get("name") if _session_info else None,
|
|
177
177
|
"port": _session_info.get("port") if _session_info else None,
|
|
178
|
+
"session_id": _session_info.get("session_id") if _session_info else None,
|
|
178
179
|
"mcp_connected": _event_loop is not None,
|
|
179
180
|
"uptime_s": round(time.time() - _startup_time),
|
|
180
181
|
"last_tool_call_age_s": round(time.time() - _last_tool_call),
|
|
@@ -191,9 +192,38 @@ class _VoiceHTTPHandler(BaseHTTPRequestHandler):
|
|
|
191
192
|
self._handle_listen()
|
|
192
193
|
elif self.path == "/speak":
|
|
193
194
|
self._handle_speak()
|
|
195
|
+
elif self.path == "/session":
|
|
196
|
+
self._handle_session_update()
|
|
194
197
|
else:
|
|
195
198
|
self.send_error(404)
|
|
196
199
|
|
|
200
|
+
def _handle_session_update(self):
|
|
201
|
+
"""Receive session_id from the SessionStart hook and reconcile voice."""
|
|
202
|
+
global _session_info
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
content_length = int(self.headers.get("Content-Length", 0))
|
|
206
|
+
body = self.rfile.read(content_length) if content_length > 0 else b"{}"
|
|
207
|
+
params = json.loads(body)
|
|
208
|
+
except (json.JSONDecodeError, ValueError):
|
|
209
|
+
self._json_response(400, {"error": "invalid_json"})
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
session_id = params.get("session_id")
|
|
213
|
+
if not session_id:
|
|
214
|
+
self._json_response(400, {"error": "missing_session_id"})
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
from session_registry import update_session_id
|
|
218
|
+
updated = update_session_id(os.getpid(), session_id)
|
|
219
|
+
|
|
220
|
+
if updated:
|
|
221
|
+
_session_info = updated
|
|
222
|
+
logger.info(f"Session updated: session_id={session_id}, name={updated['name']}")
|
|
223
|
+
self._json_response(200, {"success": True, "session": updated})
|
|
224
|
+
else:
|
|
225
|
+
self._json_response(404, {"error": "session_not_found"})
|
|
226
|
+
|
|
197
227
|
def _handle_speak(self):
|
|
198
228
|
"""Synthesize and play speech via HTTP. Used by SessionStart hook for preheat intro."""
|
|
199
229
|
if _event_loop is None:
|
|
@@ -833,12 +863,17 @@ def _start_preheat_intro():
|
|
|
833
863
|
return
|
|
834
864
|
|
|
835
865
|
def _intro():
|
|
836
|
-
# Wait for server to settle
|
|
866
|
+
# Wait for server to settle — also gives the SessionStart hook
|
|
867
|
+
# time to fire and update _session_info with session_id and
|
|
868
|
+
# possibly a different name (multi-terminal sibling reconciliation)
|
|
837
869
|
time.sleep(1.5)
|
|
870
|
+
# Re-read _session_info in case the hook updated it during the sleep
|
|
871
|
+
intro_name = _session_info.get("name", default_name) if _session_info else default_name
|
|
872
|
+
intro_voice = _session_info.get("voice", default_voice) if _session_info else default_voice
|
|
838
873
|
try:
|
|
839
|
-
result = _tts_engine.synthesize(f"{
|
|
874
|
+
result = _tts_engine.synthesize(f"{intro_name} here, ready to go.", intro_voice, 1.0)
|
|
840
875
|
_audio_player.play(result.samples, result.sample_rate)
|
|
841
|
-
logger.info(f"Preheat intro spoken: {
|
|
876
|
+
logger.info(f"Preheat intro spoken: {intro_name}")
|
|
842
877
|
except Exception as e:
|
|
843
878
|
logger.warning(f"Preheat intro failed: {e}")
|
|
844
879
|
|
package/session_registry.py
CHANGED
|
@@ -9,8 +9,8 @@ import fcntl
|
|
|
9
9
|
import json
|
|
10
10
|
import os
|
|
11
11
|
import signal
|
|
12
|
+
import subprocess
|
|
12
13
|
import time
|
|
13
|
-
import urllib.request
|
|
14
14
|
from datetime import datetime, timezone
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
from typing import Optional
|
|
@@ -59,20 +59,26 @@ def _write_sessions(path: Path, sessions: list[dict]) -> None:
|
|
|
59
59
|
json.dump({"sessions": sessions}, f, indent=2)
|
|
60
60
|
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
62
|
+
def _get_ppid(pid: int) -> int:
|
|
63
|
+
"""Get the parent PID of a process. Returns 0 on failure."""
|
|
64
|
+
try:
|
|
65
|
+
result = subprocess.run(
|
|
66
|
+
["ps", "-o", "ppid=", "-p", str(pid)],
|
|
67
|
+
capture_output=True, text=True, timeout=2,
|
|
68
|
+
)
|
|
69
|
+
return int(result.stdout.strip()) if result.returncode == 0 else 0
|
|
70
|
+
except Exception:
|
|
71
|
+
return 0
|
|
66
72
|
|
|
67
73
|
|
|
68
74
|
def _session_healthy(session: dict) -> bool:
|
|
69
|
-
"""Check if a session is alive and
|
|
75
|
+
"""Check if a session is alive and its parent (IDE) is still running.
|
|
70
76
|
|
|
71
|
-
|
|
72
|
-
1. PID alive
|
|
73
|
-
2.
|
|
74
|
-
|
|
75
|
-
|
|
77
|
+
Checks:
|
|
78
|
+
1. PID alive — catches crashed server processes
|
|
79
|
+
2. Parent PID alive — if the parent (Claude Code, Cursor, etc.) died,
|
|
80
|
+
the server is orphaned. On macOS/Linux, orphaned processes get
|
|
81
|
+
reparented to PID 1 (launchd/init).
|
|
76
82
|
"""
|
|
77
83
|
pid = session.get("pid", 0)
|
|
78
84
|
if not _pid_alive(pid):
|
|
@@ -82,47 +88,15 @@ def _session_healthy(session: dict) -> bool:
|
|
|
82
88
|
if pid == os.getpid():
|
|
83
89
|
return True
|
|
84
90
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
if not data.get("ready", False):
|
|
95
|
-
raise ValueError("not ready")
|
|
96
|
-
|
|
97
|
-
mcp_connected = data.get("mcp_connected", True)
|
|
98
|
-
uptime = data.get("uptime_s", 0)
|
|
99
|
-
|
|
100
|
-
# A server that has been running >10s but never had an MCP client
|
|
101
|
-
# connect is orphaned — it was started but the client disconnected
|
|
102
|
-
# before making any tool calls (e.g., interrupted resume).
|
|
103
|
-
if not mcp_connected and uptime > 10:
|
|
104
|
-
logger.info(
|
|
105
|
-
f"Session '{session.get('name')}' (pid {pid}) never connected "
|
|
106
|
-
f"to MCP client after {uptime}s — treating as stale"
|
|
107
|
-
)
|
|
108
|
-
raise ValueError("never connected")
|
|
109
|
-
|
|
110
|
-
# For periodic cleanup (non-aggressive), also check long inactivity
|
|
111
|
-
# This catches servers whose MCP client disconnected long ago
|
|
112
|
-
# but the process is still lingering
|
|
113
|
-
age = data.get("last_tool_call_age_s")
|
|
114
|
-
if age is not None and age > _STALE_ACTIVITY_THRESHOLD:
|
|
115
|
-
logger.info(
|
|
116
|
-
f"Session '{session.get('name')}' (pid {pid}) inactive "
|
|
117
|
-
f"for {age}s — treating as stale"
|
|
118
|
-
)
|
|
119
|
-
raise ValueError("inactive")
|
|
120
|
-
|
|
121
|
-
return True
|
|
122
|
-
except Exception:
|
|
123
|
-
# Server not responding or inactive — it's orphaned
|
|
124
|
-
logger.info(f"Session '{session.get('name')}' (pid {pid}) stale on port {port}")
|
|
125
|
-
# Kill the orphaned process
|
|
91
|
+
# Check if the server's parent process is still alive.
|
|
92
|
+
# If parent is PID 1 (launchd/init), the IDE exited and the server
|
|
93
|
+
# was reparented — it's orphaned.
|
|
94
|
+
ppid = _get_ppid(pid)
|
|
95
|
+
if ppid <= 1:
|
|
96
|
+
logger.info(
|
|
97
|
+
f"Session '{session.get('name')}' (pid {pid}) orphaned "
|
|
98
|
+
f"(parent pid {ppid}) — treating as stale"
|
|
99
|
+
)
|
|
126
100
|
try:
|
|
127
101
|
os.kill(pid, signal.SIGTERM)
|
|
128
102
|
logger.info(f"Sent SIGTERM to orphaned process {pid}")
|
|
@@ -130,15 +104,15 @@ def _session_healthy(session: dict) -> bool:
|
|
|
130
104
|
pass
|
|
131
105
|
return False
|
|
132
106
|
|
|
107
|
+
return True
|
|
108
|
+
|
|
133
109
|
|
|
134
110
|
def _clean_stale(sessions: list[dict]) -> list[dict]:
|
|
135
|
-
"""Remove sessions that are dead or
|
|
111
|
+
"""Remove sessions that are dead or orphaned.
|
|
136
112
|
|
|
137
113
|
Detection logic:
|
|
138
114
|
1. PID dead → remove immediately
|
|
139
|
-
2.
|
|
140
|
-
3. Server running >10s but MCP client never connected → orphaned, remove and kill
|
|
141
|
-
4. Server with MCP connected but inactive >5min → orphaned, remove and kill
|
|
115
|
+
2. Parent PID is 1 (launchd/init) → IDE exited, server orphaned → kill and remove
|
|
142
116
|
"""
|
|
143
117
|
alive = []
|
|
144
118
|
for s in sessions:
|
|
@@ -271,6 +245,7 @@ def register_session(
|
|
|
271
245
|
"voice": voice,
|
|
272
246
|
"port": port,
|
|
273
247
|
"pid": os.getpid(),
|
|
248
|
+
"session_id": None, # Set later by SessionStart hook
|
|
274
249
|
"tmux_session": tmux_session,
|
|
275
250
|
"started_at": datetime.now(timezone.utc).isoformat(),
|
|
276
251
|
}
|
|
@@ -303,6 +278,62 @@ def unregister_session() -> None:
|
|
|
303
278
|
logger.info("Session unregistered")
|
|
304
279
|
|
|
305
280
|
|
|
281
|
+
def update_session_id(pid: int, session_id: str) -> Optional[dict]:
|
|
282
|
+
"""Set the session_id on this PID's entry and reconcile voice with siblings.
|
|
283
|
+
|
|
284
|
+
When the SessionStart hook fires, it sends the session_id to the server.
|
|
285
|
+
The server calls this to:
|
|
286
|
+
1. Set session_id on its own entry
|
|
287
|
+
2. Check if a living sibling (same session_id) already has a voice
|
|
288
|
+
3. If so, adopt that voice (shared session = shared voice)
|
|
289
|
+
|
|
290
|
+
Returns the updated session dict, or None if PID not found.
|
|
291
|
+
"""
|
|
292
|
+
path = _sessions_path()
|
|
293
|
+
if not path.exists():
|
|
294
|
+
return None
|
|
295
|
+
|
|
296
|
+
try:
|
|
297
|
+
with open(path, "r+") as f:
|
|
298
|
+
fcntl.flock(f, fcntl.LOCK_EX)
|
|
299
|
+
sessions = _read_sessions(path)
|
|
300
|
+
sessions = _clean_stale(sessions)
|
|
301
|
+
|
|
302
|
+
# Find our entry
|
|
303
|
+
our_entry = None
|
|
304
|
+
for s in sessions:
|
|
305
|
+
if s.get("pid") == pid:
|
|
306
|
+
our_entry = s
|
|
307
|
+
break
|
|
308
|
+
|
|
309
|
+
if our_entry is None:
|
|
310
|
+
return None
|
|
311
|
+
|
|
312
|
+
# Set session_id
|
|
313
|
+
our_entry["session_id"] = session_id
|
|
314
|
+
|
|
315
|
+
# Look for living siblings with the same session_id
|
|
316
|
+
for s in sessions:
|
|
317
|
+
if (s.get("session_id") == session_id
|
|
318
|
+
and s.get("pid") != pid
|
|
319
|
+
and _pid_alive(s.get("pid", 0))):
|
|
320
|
+
# Sibling found — adopt its name and voice
|
|
321
|
+
if s["name"] != our_entry["name"]:
|
|
322
|
+
logger.info(
|
|
323
|
+
f"Adopting sibling voice: {our_entry['name']} → {s['name']} "
|
|
324
|
+
f"(shared session_id {session_id})"
|
|
325
|
+
)
|
|
326
|
+
our_entry["name"] = s["name"]
|
|
327
|
+
our_entry["voice"] = s["voice"]
|
|
328
|
+
break
|
|
329
|
+
|
|
330
|
+
_write_sessions(path, sessions)
|
|
331
|
+
return dict(our_entry)
|
|
332
|
+
except OSError as e:
|
|
333
|
+
logger.warning(f"Failed to update session_id: {e}")
|
|
334
|
+
return None
|
|
335
|
+
|
|
336
|
+
|
|
306
337
|
def get_active_sessions() -> list[dict]:
|
|
307
338
|
"""Return list of active sessions (stale PIDs filtered out)."""
|
|
308
339
|
path = _sessions_path()
|