voicesmith-mcp 1.0.11 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "voicesmith-mcp",
3
- "version": "1.0.11",
3
+ "version": "1.0.12",
4
4
  "description": "Local AI voice for coding assistants — TTS & STT via MCP. Kokoro ONNX + faster-whisper, fully offline.",
5
5
  "bin": {
6
6
  "voicesmith-mcp": "bin/cli.js"
@@ -61,6 +61,11 @@ class MicCapture:
61
61
  silence_duration = 0.0
62
62
  loop = asyncio.get_event_loop()
63
63
 
64
+ # Reset VAD state — the LSTM hidden state and context window must
65
+ # be cleared between recordings to avoid stale state from previous
66
+ # audio affecting speech detection.
67
+ vad.reset()
68
+
64
69
  stream = None
65
70
  try:
66
71
  stream = sd.InputStream(
@@ -73,6 +78,17 @@ class MicCapture:
73
78
  stream.start()
74
79
  logger.info("Microphone recording started")
75
80
 
81
+ # Discard the first ~200ms of audio to avoid picking up residual
82
+ # speaker output (Tink sound or TTS playback that just finished).
83
+ # This prevents VAD from detecting speaker bleed as "speech" and
84
+ # then cutting off when the bleed stops.
85
+ flush_chunks = int(0.2 * self._sample_rate / 512) # ~6 chunks
86
+ for _ in range(flush_chunks):
87
+ try:
88
+ self._audio_queue.get(timeout=0.1)
89
+ except queue.Empty:
90
+ break
91
+
76
92
  start_time = asyncio.get_event_loop().time()
77
93
 
78
94
  while not self._stop_flag: