voicesmith-mcp 1.0.4 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@ Local AI voice for coding assistants. Gives your AI a real voice (text-to-speech
11
11
  - **54 distinct voices** via Kokoro ONNX (local TTS, ~300MB model)
12
12
  - **Speech-to-text** via faster-whisper (local STT, ~150MB model)
13
13
  - **Voice activity detection** via Silero VAD (local, 2MB)
14
- - **Multi-session support** — run multiple IDE sessions, each with its own voice
14
+ - **Multi-session support** — run multiple Claude Code sessions, each with its own voice (single session for Cursor/Codex)
15
15
  - **Works with Claude Code, Cursor, and Codex**
16
16
 
17
17
  ## Quick Start
@@ -69,7 +69,11 @@ The MCP server runs as a local process alongside your IDE. It communicates over
69
69
 
70
70
  ## Multi-Session
71
71
 
72
- Multiple IDE sessions can run simultaneously. Each gets a unique voice name and HTTP port. Session coordination uses a shared registry (`sessions.json`) with file locking. Cross-session audio is serialized via `flock` to prevent overlapping playback.
72
+ **Claude Code:** Full multi-session support. Multiple Claude Code sessions can run simultaneously, each with its own voice. Session identity is tracked via Claude's `session_id` — resuming a session reclaims the same voice, and multiple terminals sharing the same session share the same voice. Orphaned servers are detected and cleaned up automatically.
73
+
74
+ **Cursor / Codex:** Single session only. Cursor runs one MCP server per config (shared across tabs), and Codex has no multi-session hooks. Voice works normally — just no multi-session coordination.
75
+
76
+ Cross-session audio is serialized via `flock` to prevent overlapping playback.
73
77
 
74
78
  ## Configuration
75
79
 
@@ -102,11 +106,11 @@ Re-run `npx voicesmith-mcp install` to change your voice or update settings. Exi
102
106
 
103
107
  ## Supported IDEs
104
108
 
105
- | IDE | Config Location | Rules Location |
106
- |-----|----------------|----------------|
107
- | Claude Code | `~/.claude.json` | `~/.claude/CLAUDE.md` |
108
- | Cursor | `~/.cursor/mcp.json` | `~/.cursor/rules/voicesmith.mdc` |
109
- | Codex | `~/.codex/mcp.json` | `~/.codex/AGENTS.md` |
109
+ | IDE | Config Location | Rules Location | Multi-Session |
110
+ |-----|----------------|----------------|---------------|
111
+ | Claude Code | `~/.claude.json` | `~/.claude/CLAUDE.md` | Yes (via session_id) |
112
+ | Cursor | `~/.cursor/mcp.json` | `~/.cursor/rules/voicesmith.mdc` | No (single server) |
113
+ | Codex | `~/.codex/mcp.json` | `~/.codex/AGENTS.md` | No (single session) |
110
114
 
111
115
  ## Uninstall
112
116
 
@@ -1,44 +1,115 @@
1
1
  #!/bin/bash
2
- # VoiceSmith MCP — SessionStart hook (lightweight)
3
- # Discovers this session's assigned voice name and injects it as context.
4
- # No TTS calls just reads sessions.json and returns the name.
2
+ # VoiceSmith MCP — SessionStart hook
3
+ # 1. Receives session_id from Claude Code via stdin JSON
4
+ # 2. Sends session_id to the MCP server's POST /session endpoint
5
+ # 3. Server reconciles voice with sibling sessions (same session_id)
6
+ # 4. Returns the assigned voice name as additionalContext
5
7
 
6
8
  SESSIONS_FILE="$HOME/.local/share/voicesmith-mcp/sessions.json"
7
9
  INPUT=$(cat)
8
10
 
11
+ # Parse session_id from hook input (all hooks receive session_id via stdin)
12
+ SESSION_ID=$(echo "$INPUT" | python3 -c "
13
+ import sys, json
14
+ try:
15
+ data = json.load(sys.stdin)
16
+ print(data.get('session_id', ''))
17
+ except:
18
+ pass
19
+ " 2>/dev/null)
20
+
9
21
  SESSION_NAME=""
10
22
  SESSION_VOICE=""
11
23
 
12
24
  if [ -f "$SESSIONS_FILE" ]; then
13
- SESSION_INFO=$(python3 -c "
25
+ # Find this session's MCP server port (by PID liveness, prefer tmux match)
26
+ PORT=$(python3 -c "
14
27
  import json, os
15
28
  try:
16
29
  with open('$SESSIONS_FILE') as f:
17
30
  data = json.load(f)
18
- tmux_session = os.environ.get('VOICESMITH_TMUX', '')
31
+ tmux = os.environ.get('VOICESMITH_TMUX', '')
32
+ # Try tmux match first
19
33
  for s in data.get('sessions', []):
20
34
  try:
21
35
  os.kill(s['pid'], 0)
22
- if tmux_session and s.get('tmux_session') == tmux_session:
23
- print(f\"{s['name']}|{s['voice']}\")
24
- break
36
+ if tmux and s.get('tmux_session') == tmux:
37
+ print(s['port'])
38
+ raise SystemExit
39
+ except (OSError, ProcessLookupError):
40
+ pass
41
+ # Fallback: most recent alive session
42
+ for s in reversed(data.get('sessions', [])):
43
+ try:
44
+ os.kill(s['pid'], 0)
45
+ print(s['port'])
46
+ break
25
47
  except (OSError, ProcessLookupError):
26
48
  pass
27
- else:
28
- for s in reversed(data.get('sessions', [])):
29
- try:
30
- os.kill(s['pid'], 0)
49
+ except:
50
+ pass
51
+ " 2>/dev/null)
52
+
53
+ # Send session_id to the server if we have both port and session_id
54
+ if [ -n "$PORT" ] && [ -n "$SESSION_ID" ]; then
55
+ RESPONSE=$(curl -s --max-time 3 -X POST \
56
+ -H "Content-Type: application/json" \
57
+ -d "{\"session_id\": \"$SESSION_ID\"}" \
58
+ "http://127.0.0.1:$PORT/session" 2>/dev/null)
59
+
60
+ if [ -n "$RESPONSE" ]; then
61
+ SESSION_NAME=$(echo "$RESPONSE" | python3 -c "
62
+ import sys, json
63
+ try:
64
+ d = json.load(sys.stdin)
65
+ s = d.get('session', {})
66
+ print(s.get('name', ''))
67
+ except:
68
+ pass
69
+ " 2>/dev/null)
70
+ SESSION_VOICE=$(echo "$RESPONSE" | python3 -c "
71
+ import sys, json
72
+ try:
73
+ d = json.load(sys.stdin)
74
+ s = d.get('session', {})
75
+ print(s.get('voice', ''))
76
+ except:
77
+ pass
78
+ " 2>/dev/null)
79
+ fi
80
+ fi
81
+
82
+ # Fallback: read sessions.json directly if HTTP call didn't work
83
+ if [ -z "$SESSION_NAME" ]; then
84
+ SESSION_INFO=$(python3 -c "
85
+ import json, os
86
+ try:
87
+ with open('$SESSIONS_FILE') as f:
88
+ data = json.load(f)
89
+ tmux = os.environ.get('VOICESMITH_TMUX', '')
90
+ for s in data.get('sessions', []):
91
+ try:
92
+ os.kill(s['pid'], 0)
93
+ if tmux and s.get('tmux_session') == tmux:
31
94
  print(f\"{s['name']}|{s['voice']}\")
32
- break
33
- except (OSError, ProcessLookupError):
34
- pass
35
- except Exception:
95
+ raise SystemExit
96
+ except (OSError, ProcessLookupError):
97
+ pass
98
+ for s in reversed(data.get('sessions', [])):
99
+ try:
100
+ os.kill(s['pid'], 0)
101
+ print(f\"{s['name']}|{s['voice']}\")
102
+ break
103
+ except (OSError, ProcessLookupError):
104
+ pass
105
+ except:
36
106
  pass
37
107
  " 2>/dev/null)
38
108
 
39
- if [ -n "$SESSION_INFO" ]; then
40
- SESSION_NAME=$(echo "$SESSION_INFO" | cut -d'|' -f1)
41
- SESSION_VOICE=$(echo "$SESSION_INFO" | cut -d'|' -f2)
109
+ if [ -n "$SESSION_INFO" ]; then
110
+ SESSION_NAME=$(echo "$SESSION_INFO" | cut -d'|' -f1)
111
+ SESSION_VOICE=$(echo "$SESSION_INFO" | cut -d'|' -f2)
112
+ fi
42
113
  fi
43
114
  fi
44
115
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voicesmith-mcp",
3
- "version": "1.0.4",
3
+ "version": "1.0.7",
4
4
  "description": "Local AI voice for coding assistants — TTS & STT via MCP. Kokoro ONNX + faster-whisper, fully offline.",
5
5
  "bin": {
6
6
  "voicesmith-mcp": "bin/cli.js"
package/server.py CHANGED
@@ -175,6 +175,9 @@ class _VoiceHTTPHandler(BaseHTTPRequestHandler):
175
175
  "ready": True,
176
176
  "name": _session_info.get("name") if _session_info else None,
177
177
  "port": _session_info.get("port") if _session_info else None,
178
+ "session_id": _session_info.get("session_id") if _session_info else None,
179
+ "mcp_connected": _event_loop is not None,
180
+ "uptime_s": round(time.time() - _startup_time),
178
181
  "last_tool_call_age_s": round(time.time() - _last_tool_call),
179
182
  })
180
183
  self.send_response(200)
@@ -189,9 +192,38 @@ class _VoiceHTTPHandler(BaseHTTPRequestHandler):
189
192
  self._handle_listen()
190
193
  elif self.path == "/speak":
191
194
  self._handle_speak()
195
+ elif self.path == "/session":
196
+ self._handle_session_update()
192
197
  else:
193
198
  self.send_error(404)
194
199
 
200
+ def _handle_session_update(self):
201
+ """Receive session_id from the SessionStart hook and reconcile voice."""
202
+ global _session_info
203
+
204
+ try:
205
+ content_length = int(self.headers.get("Content-Length", 0))
206
+ body = self.rfile.read(content_length) if content_length > 0 else b"{}"
207
+ params = json.loads(body)
208
+ except (json.JSONDecodeError, ValueError):
209
+ self._json_response(400, {"error": "invalid_json"})
210
+ return
211
+
212
+ session_id = params.get("session_id")
213
+ if not session_id:
214
+ self._json_response(400, {"error": "missing_session_id"})
215
+ return
216
+
217
+ from session_registry import update_session_id
218
+ updated = update_session_id(os.getpid(), session_id)
219
+
220
+ if updated:
221
+ _session_info = updated
222
+ logger.info(f"Session updated: session_id={session_id}, name={updated['name']}")
223
+ self._json_response(200, {"success": True, "session": updated})
224
+ else:
225
+ self._json_response(404, {"error": "session_not_found"})
226
+
195
227
  def _handle_speak(self):
196
228
  """Synthesize and play speech via HTTP. Used by SessionStart hook for preheat intro."""
197
229
  if _event_loop is None:
@@ -831,12 +863,17 @@ def _start_preheat_intro():
831
863
  return
832
864
 
833
865
  def _intro():
834
- # Wait for server to settle
866
+ # Wait for server to settle — also gives the SessionStart hook
867
+ # time to fire and update _session_info with session_id and
868
+ # possibly a different name (multi-terminal sibling reconciliation)
835
869
  time.sleep(1.5)
870
+ # Re-read _session_info in case the hook updated it during the sleep
871
+ intro_name = _session_info.get("name", default_name) if _session_info else default_name
872
+ intro_voice = _session_info.get("voice", default_voice) if _session_info else default_voice
836
873
  try:
837
- result = _tts_engine.synthesize(f"{name} here, ready to go.", voice, 1.0)
874
+ result = _tts_engine.synthesize(f"{intro_name} here, ready to go.", intro_voice, 1.0)
838
875
  _audio_player.play(result.samples, result.sample_rate)
839
- logger.info(f"Preheat intro spoken: {name}")
876
+ logger.info(f"Preheat intro spoken: {intro_name}")
840
877
  except Exception as e:
841
878
  logger.warning(f"Preheat intro failed: {e}")
842
879
 
@@ -9,8 +9,8 @@ import fcntl
9
9
  import json
10
10
  import os
11
11
  import signal
12
+ import subprocess
12
13
  import time
13
- import urllib.request
14
14
  from datetime import datetime, timezone
15
15
  from pathlib import Path
16
16
  from typing import Optional
@@ -59,20 +59,26 @@ def _write_sessions(path: Path, sessions: list[dict]) -> None:
59
59
  json.dump({"sessions": sessions}, f, indent=2)
60
60
 
61
61
 
62
- # Max seconds since last MCP tool call before considering a session stale.
63
- # If a server hasn't had any tool calls in this time, it's likely orphaned
64
- # (MCP client disconnected but process is still running).
65
- _STALE_ACTIVITY_THRESHOLD = 300 # 5 minutes
62
+ def _get_ppid(pid: int) -> int:
63
+ """Get the parent PID of a process. Returns 0 on failure."""
64
+ try:
65
+ result = subprocess.run(
66
+ ["ps", "-o", "ppid=", "-p", str(pid)],
67
+ capture_output=True, text=True, timeout=2,
68
+ )
69
+ return int(result.stdout.strip()) if result.returncode == 0 else 0
70
+ except Exception:
71
+ return 0
66
72
 
67
73
 
68
- def _session_healthy(session: dict, activity_threshold: int = _STALE_ACTIVITY_THRESHOLD) -> bool:
69
- """Check if a session is alive and actively used.
74
+ def _session_healthy(session: dict) -> bool:
75
+ """Check if a session is alive and its parent (IDE) is still running.
70
76
 
71
- Three checks:
72
- 1. PID alive (fast, catches crashed processes)
73
- 2. HTTP health check on /status (catches completely dead servers)
74
- 3. Activity check if last_tool_call_age_s exceeds threshold, the server
75
- is alive but orphaned (no MCP client connected)
77
+ Checks:
78
+ 1. PID alive catches crashed server processes
79
+ 2. Parent PID alive if the parent (Claude Code, Cursor, etc.) died,
80
+ the server is orphaned. On macOS/Linux, orphaned processes get
81
+ reparented to PID 1 (launchd/init).
76
82
  """
77
83
  pid = session.get("pid", 0)
78
84
  if not _pid_alive(pid):
@@ -82,33 +88,15 @@ def _session_healthy(session: dict, activity_threshold: int = _STALE_ACTIVITY_TH
82
88
  if pid == os.getpid():
83
89
  return True
84
90
 
85
- port = session.get("port")
86
- if not port:
87
- return False
88
-
89
- try:
90
- url = f"http://127.0.0.1:{port}/status"
91
- req = urllib.request.Request(url, method="GET")
92
- with urllib.request.urlopen(req, timeout=2) as resp:
93
- data = json.loads(resp.read())
94
- if not data.get("ready", False):
95
- raise ValueError("not ready")
96
-
97
- # Check activity age — if server hasn't had MCP tool calls
98
- # in a while, it's orphaned
99
- age = data.get("last_tool_call_age_s")
100
- if age is not None and age > activity_threshold:
101
- logger.info(
102
- f"Session '{session.get('name')}' (pid {pid}) inactive "
103
- f"for {age}s — treating as stale"
104
- )
105
- raise ValueError("inactive")
106
-
107
- return True
108
- except Exception:
109
- # Server not responding or inactive — it's orphaned
110
- logger.info(f"Session '{session.get('name')}' (pid {pid}) stale on port {port}")
111
- # Kill the orphaned process
91
+ # Check if the server's parent process is still alive.
92
+ # If parent is PID 1 (launchd/init), the IDE exited and the server
93
+ # was reparented — it's orphaned.
94
+ ppid = _get_ppid(pid)
95
+ if ppid <= 1:
96
+ logger.info(
97
+ f"Session '{session.get('name')}' (pid {pid}) orphaned "
98
+ f"(parent pid {ppid}) — treating as stale"
99
+ )
112
100
  try:
113
101
  os.kill(pid, signal.SIGTERM)
114
102
  logger.info(f"Sent SIGTERM to orphaned process {pid}")
@@ -116,19 +104,19 @@ def _session_healthy(session: dict, activity_threshold: int = _STALE_ACTIVITY_TH
116
104
  pass
117
105
  return False
118
106
 
107
+ return True
119
108
 
120
- def _clean_stale(sessions: list[dict], aggressive: bool = False) -> list[dict]:
121
- """Remove sessions that are dead or unresponsive.
122
109
 
123
- Args:
124
- aggressive: If True, use a shorter activity threshold (10s instead of 5min).
125
- Used during startup registration to quickly reclaim names from
126
- orphaned servers that haven't fully shut down yet.
110
+ def _clean_stale(sessions: list[dict]) -> list[dict]:
111
+ """Remove sessions that are dead or orphaned.
112
+
113
+ Detection logic:
114
+ 1. PID dead → remove immediately
115
+ 2. Parent PID is 1 (launchd/init) → IDE exited, server orphaned → kill and remove
127
116
  """
128
- threshold = 10 if aggressive else _STALE_ACTIVITY_THRESHOLD
129
117
  alive = []
130
118
  for s in sessions:
131
- if _session_healthy(s, activity_threshold=threshold):
119
+ if _session_healthy(s):
132
120
  alive.append(s)
133
121
  else:
134
122
  logger.info(f"Removed stale session: {s.get('name')} (pid {s.get('pid')})")
@@ -223,7 +211,7 @@ def register_session(
223
211
  sessions = _read_sessions(path)
224
212
  # Aggressive cleanup on startup — use short activity threshold
225
213
  # to quickly reclaim names from orphaned servers
226
- sessions = _clean_stale(sessions, aggressive=True)
214
+ sessions = _clean_stale(sessions)
227
215
 
228
216
  taken_names = {s["name"] for s in sessions}
229
217
 
@@ -233,7 +221,7 @@ def register_session(
233
221
  time.sleep(2)
234
222
  fcntl.flock(f, fcntl.LOCK_EX)
235
223
  sessions = _read_sessions(path)
236
- sessions = _clean_stale(sessions, aggressive=True)
224
+ sessions = _clean_stale(sessions)
237
225
  _write_sessions(path, sessions)
238
226
  taken_names = {s["name"] for s in sessions}
239
227
 
@@ -257,6 +245,7 @@ def register_session(
257
245
  "voice": voice,
258
246
  "port": port,
259
247
  "pid": os.getpid(),
248
+ "session_id": None, # Set later by SessionStart hook
260
249
  "tmux_session": tmux_session,
261
250
  "started_at": datetime.now(timezone.utc).isoformat(),
262
251
  }
@@ -289,6 +278,62 @@ def unregister_session() -> None:
289
278
  logger.info("Session unregistered")
290
279
 
291
280
 
281
+ def update_session_id(pid: int, session_id: str) -> Optional[dict]:
282
+ """Set the session_id on this PID's entry and reconcile voice with siblings.
283
+
284
+ When the SessionStart hook fires, it sends the session_id to the server.
285
+ The server calls this to:
286
+ 1. Set session_id on its own entry
287
+ 2. Check if a living sibling (same session_id) already has a voice
288
+ 3. If so, adopt that voice (shared session = shared voice)
289
+
290
+ Returns the updated session dict, or None if PID not found.
291
+ """
292
+ path = _sessions_path()
293
+ if not path.exists():
294
+ return None
295
+
296
+ try:
297
+ with open(path, "r+") as f:
298
+ fcntl.flock(f, fcntl.LOCK_EX)
299
+ sessions = _read_sessions(path)
300
+ sessions = _clean_stale(sessions)
301
+
302
+ # Find our entry
303
+ our_entry = None
304
+ for s in sessions:
305
+ if s.get("pid") == pid:
306
+ our_entry = s
307
+ break
308
+
309
+ if our_entry is None:
310
+ return None
311
+
312
+ # Set session_id
313
+ our_entry["session_id"] = session_id
314
+
315
+ # Look for living siblings with the same session_id
316
+ for s in sessions:
317
+ if (s.get("session_id") == session_id
318
+ and s.get("pid") != pid
319
+ and _pid_alive(s.get("pid", 0))):
320
+ # Sibling found — adopt its name and voice
321
+ if s["name"] != our_entry["name"]:
322
+ logger.info(
323
+ f"Adopting sibling voice: {our_entry['name']} → {s['name']} "
324
+ f"(shared session_id {session_id})"
325
+ )
326
+ our_entry["name"] = s["name"]
327
+ our_entry["voice"] = s["voice"]
328
+ break
329
+
330
+ _write_sessions(path, sessions)
331
+ return dict(our_entry)
332
+ except OSError as e:
333
+ logger.warning(f"Failed to update session_id: {e}")
334
+ return None
335
+
336
+
292
337
  def get_active_sessions() -> list[dict]:
293
338
  """Return list of active sessions (stale PIDs filtered out)."""
294
339
  path = _sessions_path()