voicesmith-mcp 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@ Local AI voice for coding assistants. Gives your AI a real voice (text-to-speech
11
11
  - **54 distinct voices** via Kokoro ONNX (local TTS, ~300MB model)
12
12
  - **Speech-to-text** via faster-whisper (local STT, ~150MB model)
13
13
  - **Voice activity detection** via Silero VAD (local, 2MB)
14
- - **Multi-session support** — run multiple IDE sessions, each with its own voice
14
+ - **Multi-session support** — run multiple Claude Code sessions, each with its own voice (single session for Cursor/Codex)
15
15
  - **Works with Claude Code, Cursor, and Codex**
16
16
 
17
17
  ## Quick Start
@@ -69,7 +69,11 @@ The MCP server runs as a local process alongside your IDE. It communicates over
69
69
 
70
70
  ## Multi-Session
71
71
 
72
- Multiple IDE sessions can run simultaneously. Each gets a unique voice name and HTTP port. Session coordination uses a shared registry (`sessions.json`) with file locking. Cross-session audio is serialized via `flock` to prevent overlapping playback.
72
+ **Claude Code:** Full multi-session support. Multiple Claude Code sessions can run simultaneously, each with its own voice. Session identity is tracked via Claude's `session_id` — resuming a session reclaims the same voice, and multiple terminals sharing the same session share the same voice. Orphaned servers are detected and cleaned up automatically.
73
+
74
+ **Cursor / Codex:** Single session only. Cursor runs one MCP server per config (shared across tabs), and Codex has no multi-session hooks. Voice works normally — just no multi-session coordination.
75
+
76
+ Cross-session audio is serialized via `flock` to prevent overlapping playback.
73
77
 
74
78
  ## Configuration
75
79
 
@@ -102,11 +106,11 @@ Re-run `npx voicesmith-mcp install` to change your voice or update settings. Exi
102
106
 
103
107
  ## Supported IDEs
104
108
 
105
- | IDE | Config Location | Rules Location |
106
- |-----|----------------|----------------|
107
- | Claude Code | `~/.claude.json` | `~/.claude/CLAUDE.md` |
108
- | Cursor | `~/.cursor/mcp.json` | `~/.cursor/rules/voicesmith.mdc` |
109
- | Codex | `~/.codex/mcp.json` | `~/.codex/AGENTS.md` |
109
+ | IDE | Config Location | Rules Location | Multi-Session |
110
+ |-----|----------------|----------------|---------------|
111
+ | Claude Code | `~/.claude.json` | `~/.claude/CLAUDE.md` | Yes (via session_id) |
112
+ | Cursor | `~/.cursor/mcp.json` | `~/.cursor/rules/voicesmith.mdc` | No (single server) |
113
+ | Codex | `~/.codex/mcp.json` | `~/.codex/AGENTS.md` | No (single session) |
110
114
 
111
115
  ## Uninstall
112
116
 
@@ -1,44 +1,115 @@
1
1
  #!/bin/bash
2
- # VoiceSmith MCP — SessionStart hook (lightweight)
3
- # Discovers this session's assigned voice name and injects it as context.
4
- # No TTS calls just reads sessions.json and returns the name.
2
+ # VoiceSmith MCP — SessionStart hook
3
+ # 1. Receives session_id from Claude Code via stdin JSON
4
+ # 2. Sends session_id to the MCP server's POST /session endpoint
5
+ # 3. Server reconciles voice with sibling sessions (same session_id)
6
+ # 4. Returns the assigned voice name as additionalContext
5
7
 
6
8
  SESSIONS_FILE="$HOME/.local/share/voicesmith-mcp/sessions.json"
7
9
  INPUT=$(cat)
8
10
 
11
+ # Parse session_id from hook input (all hooks receive session_id via stdin)
12
+ SESSION_ID=$(echo "$INPUT" | python3 -c "
13
+ import sys, json
14
+ try:
15
+ data = json.load(sys.stdin)
16
+ print(data.get('session_id', ''))
17
+ except:
18
+ pass
19
+ " 2>/dev/null)
20
+
9
21
  SESSION_NAME=""
10
22
  SESSION_VOICE=""
11
23
 
12
24
  if [ -f "$SESSIONS_FILE" ]; then
13
- SESSION_INFO=$(python3 -c "
25
+ # Find this session's MCP server port (by PID liveness, prefer tmux match)
26
+ PORT=$(python3 -c "
14
27
  import json, os
15
28
  try:
16
29
  with open('$SESSIONS_FILE') as f:
17
30
  data = json.load(f)
18
- tmux_session = os.environ.get('VOICESMITH_TMUX', '')
31
+ tmux = os.environ.get('VOICESMITH_TMUX', '')
32
+ # Try tmux match first
19
33
  for s in data.get('sessions', []):
20
34
  try:
21
35
  os.kill(s['pid'], 0)
22
- if tmux_session and s.get('tmux_session') == tmux_session:
23
- print(f\"{s['name']}|{s['voice']}\")
24
- break
36
+ if tmux and s.get('tmux_session') == tmux:
37
+ print(s['port'])
38
+ raise SystemExit
39
+ except (OSError, ProcessLookupError):
40
+ pass
41
+ # Fallback: most recent alive session
42
+ for s in reversed(data.get('sessions', [])):
43
+ try:
44
+ os.kill(s['pid'], 0)
45
+ print(s['port'])
46
+ break
25
47
  except (OSError, ProcessLookupError):
26
48
  pass
27
- else:
28
- for s in reversed(data.get('sessions', [])):
29
- try:
30
- os.kill(s['pid'], 0)
49
+ except:
50
+ pass
51
+ " 2>/dev/null)
52
+
53
+ # Send session_id to the server if we have both port and session_id
54
+ if [ -n "$PORT" ] && [ -n "$SESSION_ID" ]; then
55
+ RESPONSE=$(curl -s --max-time 3 -X POST \
56
+ -H "Content-Type: application/json" \
57
+ -d "{\"session_id\": \"$SESSION_ID\"}" \
58
+ "http://127.0.0.1:$PORT/session" 2>/dev/null)
59
+
60
+ if [ -n "$RESPONSE" ]; then
61
+ SESSION_NAME=$(echo "$RESPONSE" | python3 -c "
62
+ import sys, json
63
+ try:
64
+ d = json.load(sys.stdin)
65
+ s = d.get('session', {})
66
+ print(s.get('name', ''))
67
+ except:
68
+ pass
69
+ " 2>/dev/null)
70
+ SESSION_VOICE=$(echo "$RESPONSE" | python3 -c "
71
+ import sys, json
72
+ try:
73
+ d = json.load(sys.stdin)
74
+ s = d.get('session', {})
75
+ print(s.get('voice', ''))
76
+ except:
77
+ pass
78
+ " 2>/dev/null)
79
+ fi
80
+ fi
81
+
82
+ # Fallback: read sessions.json directly if HTTP call didn't work
83
+ if [ -z "$SESSION_NAME" ]; then
84
+ SESSION_INFO=$(python3 -c "
85
+ import json, os
86
+ try:
87
+ with open('$SESSIONS_FILE') as f:
88
+ data = json.load(f)
89
+ tmux = os.environ.get('VOICESMITH_TMUX', '')
90
+ for s in data.get('sessions', []):
91
+ try:
92
+ os.kill(s['pid'], 0)
93
+ if tmux and s.get('tmux_session') == tmux:
31
94
  print(f\"{s['name']}|{s['voice']}\")
32
- break
33
- except (OSError, ProcessLookupError):
34
- pass
35
- except Exception:
95
+ raise SystemExit
96
+ except (OSError, ProcessLookupError):
97
+ pass
98
+ for s in reversed(data.get('sessions', [])):
99
+ try:
100
+ os.kill(s['pid'], 0)
101
+ print(f\"{s['name']}|{s['voice']}\")
102
+ break
103
+ except (OSError, ProcessLookupError):
104
+ pass
105
+ except:
36
106
  pass
37
107
  " 2>/dev/null)
38
108
 
39
- if [ -n "$SESSION_INFO" ]; then
40
- SESSION_NAME=$(echo "$SESSION_INFO" | cut -d'|' -f1)
41
- SESSION_VOICE=$(echo "$SESSION_INFO" | cut -d'|' -f2)
109
+ if [ -n "$SESSION_INFO" ]; then
110
+ SESSION_NAME=$(echo "$SESSION_INFO" | cut -d'|' -f1)
111
+ SESSION_VOICE=$(echo "$SESSION_INFO" | cut -d'|' -f2)
112
+ fi
42
113
  fi
43
114
  fi
44
115
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voicesmith-mcp",
3
- "version": "1.0.5",
3
+ "version": "1.0.7",
4
4
  "description": "Local AI voice for coding assistants — TTS & STT via MCP. Kokoro ONNX + faster-whisper, fully offline.",
5
5
  "bin": {
6
6
  "voicesmith-mcp": "bin/cli.js"
package/server.py CHANGED
@@ -175,6 +175,7 @@ class _VoiceHTTPHandler(BaseHTTPRequestHandler):
175
175
  "ready": True,
176
176
  "name": _session_info.get("name") if _session_info else None,
177
177
  "port": _session_info.get("port") if _session_info else None,
178
+ "session_id": _session_info.get("session_id") if _session_info else None,
178
179
  "mcp_connected": _event_loop is not None,
179
180
  "uptime_s": round(time.time() - _startup_time),
180
181
  "last_tool_call_age_s": round(time.time() - _last_tool_call),
@@ -191,9 +192,38 @@ class _VoiceHTTPHandler(BaseHTTPRequestHandler):
191
192
  self._handle_listen()
192
193
  elif self.path == "/speak":
193
194
  self._handle_speak()
195
+ elif self.path == "/session":
196
+ self._handle_session_update()
194
197
  else:
195
198
  self.send_error(404)
196
199
 
200
+ def _handle_session_update(self):
201
+ """Receive session_id from the SessionStart hook and reconcile voice."""
202
+ global _session_info
203
+
204
+ try:
205
+ content_length = int(self.headers.get("Content-Length", 0))
206
+ body = self.rfile.read(content_length) if content_length > 0 else b"{}"
207
+ params = json.loads(body)
208
+ except (json.JSONDecodeError, ValueError):
209
+ self._json_response(400, {"error": "invalid_json"})
210
+ return
211
+
212
+ session_id = params.get("session_id")
213
+ if not session_id:
214
+ self._json_response(400, {"error": "missing_session_id"})
215
+ return
216
+
217
+ from session_registry import update_session_id
218
+ updated = update_session_id(os.getpid(), session_id)
219
+
220
+ if updated:
221
+ _session_info = updated
222
+ logger.info(f"Session updated: session_id={session_id}, name={updated['name']}")
223
+ self._json_response(200, {"success": True, "session": updated})
224
+ else:
225
+ self._json_response(404, {"error": "session_not_found"})
226
+
197
227
  def _handle_speak(self):
198
228
  """Synthesize and play speech via HTTP. Used by SessionStart hook for preheat intro."""
199
229
  if _event_loop is None:
@@ -833,12 +863,17 @@ def _start_preheat_intro():
833
863
  return
834
864
 
835
865
  def _intro():
836
- # Wait for server to settle
866
+ # Wait for server to settle — also gives the SessionStart hook
867
+ # time to fire and update _session_info with session_id and
868
+ # possibly a different name (multi-terminal sibling reconciliation)
837
869
  time.sleep(1.5)
870
+ # Re-read _session_info in case the hook updated it during the sleep
871
+ intro_name = _session_info.get("name", default_name) if _session_info else default_name
872
+ intro_voice = _session_info.get("voice", default_voice) if _session_info else default_voice
838
873
  try:
839
- result = _tts_engine.synthesize(f"{name} here, ready to go.", voice, 1.0)
874
+ result = _tts_engine.synthesize(f"{intro_name} here, ready to go.", intro_voice, 1.0)
840
875
  _audio_player.play(result.samples, result.sample_rate)
841
- logger.info(f"Preheat intro spoken: {name}")
876
+ logger.info(f"Preheat intro spoken: {intro_name}")
842
877
  except Exception as e:
843
878
  logger.warning(f"Preheat intro failed: {e}")
844
879
 
@@ -9,8 +9,8 @@ import fcntl
9
9
  import json
10
10
  import os
11
11
  import signal
12
+ import subprocess
12
13
  import time
13
- import urllib.request
14
14
  from datetime import datetime, timezone
15
15
  from pathlib import Path
16
16
  from typing import Optional
@@ -59,20 +59,26 @@ def _write_sessions(path: Path, sessions: list[dict]) -> None:
59
59
  json.dump({"sessions": sessions}, f, indent=2)
60
60
 
61
61
 
62
- # Max seconds since last MCP tool call before considering a session stale.
63
- # If a server hasn't had any tool calls in this time, it's likely orphaned
64
- # (MCP client disconnected but process is still running).
65
- _STALE_ACTIVITY_THRESHOLD = 300 # 5 minutes
62
+ def _get_ppid(pid: int) -> int:
63
+ """Get the parent PID of a process. Returns 0 on failure."""
64
+ try:
65
+ result = subprocess.run(
66
+ ["ps", "-o", "ppid=", "-p", str(pid)],
67
+ capture_output=True, text=True, timeout=2,
68
+ )
69
+ return int(result.stdout.strip()) if result.returncode == 0 else 0
70
+ except Exception:
71
+ return 0
66
72
 
67
73
 
68
74
  def _session_healthy(session: dict) -> bool:
69
- """Check if a session is alive and actively used.
75
+ """Check if a session is alive and its parent (IDE) is still running.
70
76
 
71
- Three checks:
72
- 1. PID alive (fast, catches crashed processes)
73
- 2. HTTP health check on /status (catches completely dead servers)
74
- 3. Activity check if last_tool_call_age_s exceeds threshold, the server
75
- is alive but orphaned (no MCP client connected)
77
+ Checks:
78
+ 1. PID alive catches crashed server processes
79
+ 2. Parent PID alive if the parent (Claude Code, Cursor, etc.) died,
80
+ the server is orphaned. On macOS/Linux, orphaned processes get
81
+ reparented to PID 1 (launchd/init).
76
82
  """
77
83
  pid = session.get("pid", 0)
78
84
  if not _pid_alive(pid):
@@ -82,47 +88,15 @@ def _session_healthy(session: dict) -> bool:
82
88
  if pid == os.getpid():
83
89
  return True
84
90
 
85
- port = session.get("port")
86
- if not port:
87
- return False
88
-
89
- try:
90
- url = f"http://127.0.0.1:{port}/status"
91
- req = urllib.request.Request(url, method="GET")
92
- with urllib.request.urlopen(req, timeout=2) as resp:
93
- data = json.loads(resp.read())
94
- if not data.get("ready", False):
95
- raise ValueError("not ready")
96
-
97
- mcp_connected = data.get("mcp_connected", True)
98
- uptime = data.get("uptime_s", 0)
99
-
100
- # A server that has been running >10s but never had an MCP client
101
- # connect is orphaned — it was started but the client disconnected
102
- # before making any tool calls (e.g., interrupted resume).
103
- if not mcp_connected and uptime > 10:
104
- logger.info(
105
- f"Session '{session.get('name')}' (pid {pid}) never connected "
106
- f"to MCP client after {uptime}s — treating as stale"
107
- )
108
- raise ValueError("never connected")
109
-
110
- # For periodic cleanup (non-aggressive), also check long inactivity
111
- # This catches servers whose MCP client disconnected long ago
112
- # but the process is still lingering
113
- age = data.get("last_tool_call_age_s")
114
- if age is not None and age > _STALE_ACTIVITY_THRESHOLD:
115
- logger.info(
116
- f"Session '{session.get('name')}' (pid {pid}) inactive "
117
- f"for {age}s — treating as stale"
118
- )
119
- raise ValueError("inactive")
120
-
121
- return True
122
- except Exception:
123
- # Server not responding or inactive — it's orphaned
124
- logger.info(f"Session '{session.get('name')}' (pid {pid}) stale on port {port}")
125
- # Kill the orphaned process
91
+ # Check if the server's parent process is still alive.
92
+ # If parent is PID 1 (launchd/init), the IDE exited and the server
93
+ # was reparented — it's orphaned.
94
+ ppid = _get_ppid(pid)
95
+ if ppid <= 1:
96
+ logger.info(
97
+ f"Session '{session.get('name')}' (pid {pid}) orphaned "
98
+ f"(parent pid {ppid}) — treating as stale"
99
+ )
126
100
  try:
127
101
  os.kill(pid, signal.SIGTERM)
128
102
  logger.info(f"Sent SIGTERM to orphaned process {pid}")
@@ -130,15 +104,15 @@ def _session_healthy(session: dict) -> bool:
130
104
  pass
131
105
  return False
132
106
 
107
+ return True
108
+
133
109
 
134
110
  def _clean_stale(sessions: list[dict]) -> list[dict]:
135
- """Remove sessions that are dead or unresponsive.
111
+ """Remove sessions that are dead or orphaned.
136
112
 
137
113
  Detection logic:
138
114
  1. PID dead → remove immediately
139
- 2. HTTP /status not respondingremove and kill
140
- 3. Server running >10s but MCP client never connected → orphaned, remove and kill
141
- 4. Server with MCP connected but inactive >5min → orphaned, remove and kill
115
+ 2. Parent PID is 1 (launchd/init) IDE exited, server orphaned kill and remove
142
116
  """
143
117
  alive = []
144
118
  for s in sessions:
@@ -271,6 +245,7 @@ def register_session(
271
245
  "voice": voice,
272
246
  "port": port,
273
247
  "pid": os.getpid(),
248
+ "session_id": None, # Set later by SessionStart hook
274
249
  "tmux_session": tmux_session,
275
250
  "started_at": datetime.now(timezone.utc).isoformat(),
276
251
  }
@@ -303,6 +278,62 @@ def unregister_session() -> None:
303
278
  logger.info("Session unregistered")
304
279
 
305
280
 
281
+ def update_session_id(pid: int, session_id: str) -> Optional[dict]:
282
+ """Set the session_id on this PID's entry and reconcile voice with siblings.
283
+
284
+ When the SessionStart hook fires, it sends the session_id to the server.
285
+ The server calls this to:
286
+ 1. Set session_id on its own entry
287
+ 2. Check if a living sibling (same session_id) already has a voice
288
+ 3. If so, adopt that voice (shared session = shared voice)
289
+
290
+ Returns the updated session dict, or None if PID not found.
291
+ """
292
+ path = _sessions_path()
293
+ if not path.exists():
294
+ return None
295
+
296
+ try:
297
+ with open(path, "r+") as f:
298
+ fcntl.flock(f, fcntl.LOCK_EX)
299
+ sessions = _read_sessions(path)
300
+ sessions = _clean_stale(sessions)
301
+
302
+ # Find our entry
303
+ our_entry = None
304
+ for s in sessions:
305
+ if s.get("pid") == pid:
306
+ our_entry = s
307
+ break
308
+
309
+ if our_entry is None:
310
+ return None
311
+
312
+ # Set session_id
313
+ our_entry["session_id"] = session_id
314
+
315
+ # Look for living siblings with the same session_id
316
+ for s in sessions:
317
+ if (s.get("session_id") == session_id
318
+ and s.get("pid") != pid
319
+ and _pid_alive(s.get("pid", 0))):
320
+ # Sibling found — adopt its name and voice
321
+ if s["name"] != our_entry["name"]:
322
+ logger.info(
323
+ f"Adopting sibling voice: {our_entry['name']} → {s['name']} "
324
+ f"(shared session_id {session_id})"
325
+ )
326
+ our_entry["name"] = s["name"]
327
+ our_entry["voice"] = s["voice"]
328
+ break
329
+
330
+ _write_sessions(path, sessions)
331
+ return dict(our_entry)
332
+ except OSError as e:
333
+ logger.warning(f"Failed to update session_id: {e}")
334
+ return None
335
+
336
+
306
337
  def get_active_sessions() -> list[dict]:
307
338
  """Return list of active sessions (stale PIDs filtered out)."""
308
339
  path = _sessions_path()