agentvibes 5.6.7 → 5.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. package/.agentvibes/LITE-MODE.md +236 -0
  2. package/.agentvibes/README.md +136 -0
  3. package/.agentvibes/backup/session-start-tts.sh.20251210_212814 +141 -0
  4. package/.agentvibes/backups/agents/analyst_20260204_144958.md +78 -0
  5. package/.agentvibes/backups/agents/architect_20260204_144958.md +72 -0
  6. package/.agentvibes/backups/agents/dev_20260204_144958.md +74 -0
  7. package/.agentvibes/backups/agents/pm_20260204_144958.md +72 -0
  8. package/.agentvibes/backups/agents/quick-flow-solo-dev_20260204_144958.md +64 -0
  9. package/.agentvibes/backups/agents/sm_20260204_144958.md +87 -0
  10. package/.agentvibes/backups/agents/tea_20260204_144958.md +79 -0
  11. package/.agentvibes/backups/agents/tech-writer_20260204_144958.md +82 -0
  12. package/.agentvibes/backups/agents/ux-designer_20260204_144958.md +80 -0
  13. package/.agentvibes/config/README-personality-defaults.md +162 -0
  14. package/.agentvibes/config/agentvibes.json +1 -0
  15. package/.agentvibes/config/mode.txt +1 -0
  16. package/.agentvibes/config/personality-voice-defaults.default.json +21 -0
  17. package/.agentvibes/config/save-audio.txt +1 -0
  18. package/.agentvibes/config/voice-metadata.json +160 -0
  19. package/.agentvibes/config.json +38 -1
  20. package/.agentvibes/hooks/help.sh +191 -0
  21. package/.agentvibes/hooks/post-tool-use-lite.sh +111 -0
  22. package/.agentvibes/hooks/save-audio-manager.sh +162 -0
  23. package/.agentvibes/hooks/session-start-full-optimized.sh +102 -0
  24. package/.agentvibes/hooks/session-start-full.sh +142 -0
  25. package/.agentvibes/hooks/session-start-lite-v2.sh +34 -0
  26. package/.agentvibes/hooks/session-start-lite.sh +29 -0
  27. package/.agentvibes/hooks/stop-lite.sh +115 -0
  28. package/.agentvibes/hooks/switch-mode.sh +215 -0
  29. package/.agentvibes/output-styles/audio-summary.md +30 -0
  30. package/.claude/audio/voice-samples/piper/alan.wav +0 -0
  31. package/.claude/audio/voice-samples/piper/amy.wav +0 -0
  32. package/.claude/audio/voice-samples/piper/charlotte.wav +0 -0
  33. package/.claude/audio/voice-samples/piper/joe.wav +0 -0
  34. package/.claude/audio/voice-samples/piper/john.wav +0 -0
  35. package/.claude/audio/voice-samples/piper/katherine.wav +0 -0
  36. package/.claude/audio/voice-samples/piper/kristin.wav +0 -0
  37. package/.claude/audio/voice-samples/piper/linda.wav +0 -0
  38. package/.claude/audio/voice-samples/piper/marcus.wav +0 -0
  39. package/.claude/audio/voice-samples/piper/ryan.wav +0 -0
  40. package/.claude/commands/agent-vibes/provider.md +0 -0
  41. package/.claude/commands/agent-vibes-bmad-voices.md +117 -117
  42. package/.claude/commands/agent-vibes-rdp.md +24 -24
  43. package/.claude/config/audio-effects.cfg +6 -1
  44. package/.claude/config/background-music-position.txt +8 -6
  45. package/.claude/config/reverb-level.txt +0 -0
  46. package/.claude/docs/TERMUX_SETUP.md +408 -408
  47. package/.claude/github-star-reminder.txt +1 -1
  48. package/.claude/hooks/audio-cache-utils.sh +0 -0
  49. package/.claude/hooks/audio-processor.sh +0 -0
  50. package/.claude/hooks/background-music-manager.sh +0 -0
  51. package/.claude/hooks/bmad-party-manager.sh +225 -0
  52. package/.claude/hooks/bmad-party-speak.sh +0 -0
  53. package/.claude/hooks/bmad-speak-enhanced.sh +0 -0
  54. package/.claude/hooks/bmad-speak.sh +0 -0
  55. package/.claude/hooks/bmad-tts-injector.sh +0 -0
  56. package/.claude/hooks/bmad-voice-manager.sh +0 -0
  57. package/.claude/hooks/clawdbot-receiver-SECURE.sh +0 -0
  58. package/.claude/hooks/clawdbot-receiver.sh +0 -0
  59. package/.claude/hooks/clean-audio-cache.sh +0 -0
  60. package/.claude/hooks/cleanup-cache.sh +0 -0
  61. package/.claude/hooks/configure-rdp-mode.sh +0 -0
  62. package/.claude/hooks/download-extra-voices.sh +0 -0
  63. package/.claude/hooks/effects-manager.sh +0 -0
  64. package/.claude/hooks/github-star-reminder.sh +0 -0
  65. package/.claude/hooks/language-manager.sh +0 -0
  66. package/.claude/hooks/learn-manager.sh +0 -0
  67. package/.claude/hooks/macos-voice-manager.sh +0 -0
  68. package/.claude/hooks/migrate-background-music.sh +0 -0
  69. package/.claude/hooks/migrate-to-agentvibes.sh +0 -0
  70. package/.claude/hooks/optimize-background-music.sh +0 -0
  71. package/.claude/hooks/path-resolver.sh +0 -0
  72. package/.claude/hooks/personality-manager.sh +0 -0
  73. package/.claude/hooks/piper-download-voices.sh +0 -0
  74. package/.claude/hooks/piper-installer.sh +0 -0
  75. package/.claude/hooks/piper-multispeaker-registry.sh +0 -0
  76. package/.claude/hooks/piper-voice-manager.sh +0 -0
  77. package/.claude/hooks/play-tts-agentvibes-receiver-for-voiceless-connections.sh +0 -0
  78. package/.claude/hooks/play-tts-enhanced.sh +0 -0
  79. package/.claude/hooks/play-tts-macos.sh +0 -0
  80. package/.claude/hooks/play-tts-piper.sh +40 -2
  81. package/.claude/hooks/play-tts-soprano.sh +0 -0
  82. package/.claude/hooks/play-tts-ssh-remote.sh +0 -0
  83. package/.claude/hooks/play-tts-termux-ssh.sh +0 -0
  84. package/.claude/hooks/play-tts-windows-receiver.sh +0 -0
  85. package/.claude/hooks/play-tts.sh +13 -0
  86. package/.claude/hooks/post-response.sh +41 -0
  87. package/.claude/hooks/prepare-release.sh +0 -0
  88. package/.claude/hooks/provider-commands.sh +0 -0
  89. package/.claude/hooks/provider-manager.sh +0 -0
  90. package/.claude/hooks/replay-target-audio.sh +0 -0
  91. package/.claude/hooks/sentiment-manager.sh +0 -0
  92. package/.claude/hooks/session-start-tts.sh +48 -13
  93. package/.claude/hooks/soprano-gradio-synth.py +0 -0
  94. package/.claude/hooks/speed-manager.sh +0 -0
  95. package/.claude/hooks/stop-tts.sh +0 -0
  96. package/.claude/hooks/stop.sh +63 -0
  97. package/.claude/hooks/termux-installer.sh +0 -0
  98. package/.claude/hooks/translate-manager.sh +0 -0
  99. package/.claude/hooks/translator.py +0 -0
  100. package/.claude/hooks/tts-queue-worker.sh +0 -0
  101. package/.claude/hooks/tts-queue.sh +0 -0
  102. package/.claude/hooks/verbosity-manager.sh +0 -0
  103. package/.claude/hooks/voice-manager.sh +0 -0
  104. package/.claude/hooks-windows/audio-cache-utils.ps1 +119 -119
  105. package/.claude/hooks-windows/play-tts-piper.ps1 +26 -1
  106. package/.claude/hooks-windows/play-tts.ps1 +25 -1
  107. package/.claude/hooks-windows/session-start-tts.ps1 +28 -9
  108. package/.claude/piper-voices-dir.txt +1 -0
  109. package/.claude/settings.json +2 -2
  110. package/.clawdbot/README.md +105 -105
  111. package/.mcp.json +32 -3
  112. package/CLAUDE.md +9 -0
  113. package/README.md +21 -3
  114. package/RELEASE_NOTES.md +61 -0
  115. package/WINDOWS-SETUP.md +208 -208
  116. package/bin/agent-vibes +0 -0
  117. package/bin/agentvibes-voice-browser.js +59 -4
  118. package/bin/agentvibes.js +0 -0
  119. package/bin/ensure-soprano-running.sh +43 -0
  120. package/bin/mcp-server.js +121 -121
  121. package/bin/mcp-server.sh +0 -0
  122. package/bin/test-bmad-pr +78 -78
  123. package/mcp-server/QUICK_START.md +203 -203
  124. package/mcp-server/README.md +345 -345
  125. package/mcp-server/WINDOWS_SETUP.md +260 -260
  126. package/mcp-server/docs/troubleshooting-audio.md +313 -313
  127. package/mcp-server/examples/claude_desktop_config.json +11 -11
  128. package/mcp-server/examples/claude_desktop_config_piper.json +9 -9
  129. package/mcp-server/examples/custom_instructions.md +169 -169
  130. package/mcp-server/install-deps.js +177 -130
  131. package/mcp-server/server.py +1797 -1787
  132. package/mcp-server/test_server.py +0 -0
  133. package/package.json +1 -1
  134. package/src/console/app.js +6 -0
  135. package/src/console/tabs/music-tab.js +18 -2
  136. package/src/console/widgets/format-utils.js +11 -2
  137. package/src/installer.js +38 -37
  138. package/src/services/llm-provider-service.js +28 -9
  139. package/src/utils/voice-names.js +2 -0
  140. package/templates/agentvibes-receiver.sh +0 -0
  141. package/templates/audio/welcome-music.mp3 +0 -0
  142. package/.claude/hooks/play-tts-agentvibes-receiver.sh +0 -1
@@ -1,1787 +1,1797 @@
1
- #!/usr/bin/env python3
2
- """
3
- File: mcp-server/server.py
4
-
5
- AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
6
- Website: https://agentvibes.org
7
- Repository: https://github.com/paulpreibisch/AgentVibes
8
-
9
- Co-created by Paul Preibisch with Claude AI
10
- Copyright (c) 2025 Paul Preibisch
11
-
12
- Licensed under the Apache License, Version 2.0 (the "License");
13
- you may not use this file except in compliance with the License.
14
- You may obtain a copy of the License at
15
-
16
- http://www.apache.org/licenses/LICENSE-2.0
17
-
18
- Unless required by applicable law or agreed to in writing, software
19
- distributed under the License is distributed on an "AS IS" BASIS,
20
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
- See the License for the specific language governing permissions and
22
- limitations under the License.
23
-
24
- DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
- express or implied, including but not limited to the warranties of
26
- merchantability, fitness for a particular purpose and noninfringement.
27
- In no event shall the authors or copyright holders be liable for any claim,
28
- damages or other liability, whether in an action of contract, tort or
29
- otherwise, arising from, out of or in connection with the software or the
30
- use or other dealings in the software.
31
-
32
- ---
33
-
34
- @fileoverview MCP Server exposing AgentVibes TTS capabilities via Model Context Protocol
35
- @context Provides natural language control of TTS features for Claude Desktop, Warp, and other MCP clients
36
- @architecture MCP Server implementation wrapping bash scripts, async subprocess execution for non-blocking I/O
37
- @dependencies .claude/hooks/*.sh scripts, MCP SDK, Python asyncio, subprocess
38
- @entrypoints Called by Claude Desktop/Warp via MCP protocol (stdio transport)
39
- @patterns Tool registry pattern, async subprocess wrapping, provider abstraction, state file management
40
- @related GitHub repo, mcp-server/test_server.py, .claude/hooks/play-tts.sh, docs/ai-optimized-documentation-standards.md
41
- """
42
-
43
- import asyncio
44
- import json
45
- import os
46
- import platform
47
- import re as _re
48
- import subprocess
49
- from pathlib import Path
50
- from typing import Optional
51
-
52
- from mcp.server import Server
53
- from mcp.types import Tool, TextContent, ImageContent, EmbeddedResource
54
- import mcp.server.stdio
55
- class AgentVibesServer:
56
- """MCP Server for AgentVibes TTS functionality"""
57
-
58
- # Script name constants (addresses SonarCloud S1192)
59
- VOICE_MANAGER_SCRIPT = "voice-manager.sh"
60
- PERSONALITY_MANAGER_SCRIPT = "personality-manager.sh"
61
- LANGUAGE_MANAGER_SCRIPT = "language-manager.sh"
62
- BACKGROUND_MUSIC_MANAGER_SCRIPT = "background-music-manager.sh"
63
- EFFECTS_MANAGER_SCRIPT = "effects-manager.sh"
64
-
65
- # Path constants (addresses SonarCloud S1192)
66
- CLAUDE_DIR_NAME = ".claude"
67
- MUTE_FILE_NAME = ".agentvibes-muted"
68
- SEPARATOR = "━" * 39
69
-
70
- def __init__(self):
71
- """Initialize the AgentVibes MCP server"""
72
- # Detect native Windows (not WSL)
73
- self.is_windows = platform.system() == "Windows" and not os.environ.get("WSL_DISTRO_NAME")
74
-
75
- # Script name constants — Windows uses .ps1, Unix uses .sh
76
- if self.is_windows:
77
- self.VOICE_MANAGER_SCRIPT = "voice-manager-windows.ps1"
78
- self.PERSONALITY_MANAGER_SCRIPT = "personality-manager.ps1"
79
- self.LANGUAGE_MANAGER_SCRIPT = "language-manager.ps1"
80
- self.BACKGROUND_MUSIC_MANAGER_SCRIPT = "background-music-manager.ps1"
81
- self.EFFECTS_MANAGER_SCRIPT = "effects-manager.ps1"
82
-
83
- # Find the .claude directory (project-local or global)
84
- self.claude_dir = self._find_claude_dir()
85
- self.hooks_dir = self.claude_dir / ("hooks-windows" if self.is_windows else "hooks")
86
- # Store AgentVibes root directory for environment variable
87
- self.agentvibes_root = self.claude_dir.parent
88
-
89
- def _find_claude_dir(self) -> Path:
90
- """Find the .claude directory relative to this script"""
91
- # Get the AgentVibes root directory (parent of mcp-server)
92
- script_dir = Path(__file__).resolve().parent # mcp-server/
93
- agentvibes_root = script_dir.parent # AgentVibes/
94
- claude_dir = agentvibes_root / self.CLAUDE_DIR_NAME
95
-
96
- # ALWAYS use package .claude for hooks (even in NPX cache)
97
- # The package ALWAYS has .claude/ with all the hooks
98
- if claude_dir.exists() and claude_dir.is_dir():
99
- return claude_dir
100
-
101
- # Fallback to global ~/.claude (should never happen in properly installed package)
102
- return Path.home() / self.CLAUDE_DIR_NAME
103
-
104
- def _resolve_friendly_name(self, voice_name: str) -> str:
105
- """
106
- Resolve friendly name to Piper voice ID using voice-metadata.json.
107
-
108
- Args:
109
- voice_name: Friendly name (e.g., "ryan") or Piper ID
110
-
111
- Returns:
112
- Resolved Piper voice ID, or original voice_name if not found
113
- """
114
- import re
115
-
116
- metadata_path = self.agentvibes_root / ".agentvibes" / "config" / "voice-metadata.json"
117
-
118
- # SECURITY: Verify file exists and is not a symlink
119
- if not metadata_path.exists() or metadata_path.is_symlink():
120
- return voice_name
121
-
122
- # SECURITY: Verify file ownership matches current user (Unix only)
123
- try:
124
- if hasattr(os, 'getuid'):
125
- stat_info = metadata_path.stat()
126
- if stat_info.st_uid != os.getuid():
127
- return voice_name
128
- except (OSError, AttributeError):
129
- pass
130
-
131
- try:
132
- with open(metadata_path, 'r') as f:
133
- metadata = json.load(f)
134
-
135
- voices = metadata.get('voices', {})
136
- voice_lower = voice_name.lower()
137
-
138
- resolved_id = None
139
-
140
- # Check if it's a friendly name key
141
- if voice_lower in voices:
142
- resolved_id = voices[voice_lower].get('id')
143
-
144
- # Check if it matches a displayName
145
- if not resolved_id:
146
- for friendly_name, voice_data in voices.items():
147
- if voice_data.get('displayName', '').lower() == voice_lower:
148
- resolved_id = voice_data.get('id')
149
- break
150
-
151
- # SECURITY: Validate resolved ID matches safe pattern
152
- if resolved_id and re.match(r'^[a-zA-Z0-9_-]+$', resolved_id):
153
- return resolved_id
154
-
155
- except (json.JSONDecodeError, KeyError, IOError, TypeError):
156
- pass
157
-
158
- return voice_name
159
-
160
- # ── LibriTTS display-name resolution ──────────────────────────────────────
161
-
162
- _SURNAME_POOL = [
163
- 'Bell', 'Carter', 'Davis', 'Ellis', 'Foster', 'Gray', 'Hayes', 'Irving',
164
- 'Jones', 'Knox', 'Lane', 'Mason', 'Nash', 'Owens', 'Pierce', 'Quinn',
165
- ]
166
-
167
- @classmethod
168
- def _uniquify_voice_name(cls, raw_name: str) -> str:
169
- """Python port of uniquifyVoiceName from src/utils/voice-names.js"""
170
- import re as _re
171
- if not raw_name:
172
- return raw_name
173
- m = _re.match(r'^(.+)-(\d+)$', raw_name)
174
- if m:
175
- base, n = m.group(1), int(m.group(2))
176
- if n >= 2:
177
- return f"{base} {cls._SURNAME_POOL[(n - 1) % len(cls._SURNAME_POOL)]}"
178
- if ' ' in raw_name:
179
- return raw_name
180
- return f"{raw_name} {cls._SURNAME_POOL[0]}"
181
-
182
- def _build_libritts_catalog(self) -> dict:
183
- """
184
- Build a case-insensitive display-name → entry map from voice-assignments.json.
185
- Returns dict keyed by lowercased display name / raw name / speaker name.
186
- """
187
- catalog: dict = {}
188
- va_path = self.agentvibes_root / "voice-assignments.json"
189
- if not va_path.exists():
190
- return catalog
191
- try:
192
- data = json.loads(va_path.read_text())
193
- for id_str, entry in data.get("libritts_speakers", {}).items():
194
- speaker_id = int(id_str)
195
- raw_name = entry.get("voice_name", "")
196
- display_name = self._uniquify_voice_name(raw_name)
197
- voice_id = f"en_US-libritts-high::{raw_name}"
198
- info = {
199
- "voice_id": voice_id,
200
- "model": "en_US-libritts-high",
201
- "speaker_name": raw_name,
202
- "speaker_id": speaker_id,
203
- "display_name": display_name,
204
- "gender": entry.get("gender", ""),
205
- }
206
- for key in (display_name.lower(), raw_name.lower(),
207
- raw_name.replace(" ", "_").lower()):
208
- catalog.setdefault(key, info)
209
- except (json.JSONDecodeError, KeyError, ValueError, OSError):
210
- pass
211
- return catalog
212
-
213
- def _resolve_voice_input(self, voice_input: str) -> Optional[dict]:
214
- """
215
- Resolve a voice display name or ID to a dict with model/speakerId/voiceId.
216
- Returns None if unresolvable.
217
- Accepts: "Bella Bell", "Bella-2", "en_US-libritts-high::Bella",
218
- "Kristin_Hughes", "en_US-amy-medium"
219
- """
220
- import re as _re
221
- if not voice_input:
222
- return None
223
- MS_SEP = "::"
224
-
225
- # Already a full voiceId with MS_SEP
226
- if MS_SEP in voice_input:
227
- parts = voice_input.split(MS_SEP, 1)
228
- model, speaker_name = parts[0], parts[1]
229
- if not _re.match(r'^[a-zA-Z0-9_-]+$', model):
230
- return None
231
- catalog = self._build_libritts_catalog()
232
- entry = catalog.get(speaker_name.lower())
233
- return {
234
- "voice_id": voice_input,
235
- "model": model,
236
- "speaker_name": speaker_name,
237
- "speaker_id": entry["speaker_id"] if entry else None,
238
- "display_name": entry["display_name"] if entry else speaker_name,
239
- }
240
-
241
- # Plain piper model ID (e.g. en_US-amy-medium)
242
- if _re.match(r'^en_[A-Z]{2}-[a-zA-Z0-9_]+-[a-z]+$', voice_input):
243
- return {
244
- "voice_id": voice_input, "model": voice_input,
245
- "speaker_name": None, "speaker_id": None, "display_name": voice_input,
246
- }
247
-
248
- # LibriTTS display name / raw name lookup
249
- catalog = self._build_libritts_catalog()
250
- normalised = voice_input.replace("_", " ")
251
- entry = catalog.get(normalised.lower()) or catalog.get(voice_input.lower())
252
- return entry or None
253
-
254
- def _get_config_dir(self) -> Path:
255
- """Return the .claude dir to write voice config files into (project or global)."""
256
- cwd = Path.cwd()
257
- if (cwd / ".claude").is_dir() and cwd != self.agentvibes_root:
258
- return cwd / ".claude"
259
- return self.claude_dir
260
-
261
- async def text_to_speech(
262
- self,
263
- text: str,
264
- voice: Optional[str] = None,
265
- personality: Optional[str] = None,
266
- language: Optional[str] = None,
267
- ) -> str:
268
- """
269
- Convert text to speech using AgentVibes.
270
-
271
- Args:
272
- text: The text to speak
273
- voice: Optional voice name (e.g., "Aria", "Northern Terry")
274
- personality: Optional personality style (e.g., "flirty", "sarcastic")
275
- language: Optional language (e.g., "spanish", "french")
276
-
277
- Returns:
278
- Success message with audio file path
279
- """
280
- # Store original settings to restore later
281
- original_personality = None
282
- original_language = None
283
-
284
- try:
285
- # Temporarily set personality if specified
286
- if personality:
287
- original_personality = await self._get_personality()
288
- await self._run_script(
289
- self.PERSONALITY_MANAGER_SCRIPT, ["set", personality]
290
- )
291
-
292
- # Temporarily set language if specified
293
- if language:
294
- original_language = await self._get_language()
295
- await self._run_script(self.LANGUAGE_MANAGER_SCRIPT, ["set", language])
296
-
297
- # Resolve LLM key: AGENTVIBES_LLM > CLAUDECODE=1 > AGENTVIBES_MCP_FALLBACK > "default"
298
- llm_key = os.environ.get("AGENTVIBES_LLM", "").strip()
299
- if llm_key and not _re.match(r"^[a-zA-Z0-9][a-zA-Z0-9_-]*$", llm_key):
300
- llm_key = ""
301
- if not llm_key and os.environ.get("CLAUDECODE", "").strip() == "1":
302
- llm_key = "claude-code"
303
- if not llm_key:
304
- fallback = os.environ.get("AGENTVIBES_MCP_FALLBACK", "").strip()
305
- if fallback and _re.match(r"^[a-zA-Z0-9][a-zA-Z0-9_-]*$", fallback):
306
- llm_key = fallback
307
-
308
- # Call the TTS script via appropriate shell
309
- tts_script = "play-tts.ps1" if self.is_windows else "play-tts.sh"
310
- play_tts = self.hooks_dir / tts_script
311
- if self.is_windows:
312
- args = ["powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", str(play_tts), text]
313
- if voice:
314
- args.extend(["-VoiceOverride", voice])
315
- if llm_key:
316
- args.extend(["-llm", llm_key])
317
- else:
318
- args = ["bash", str(play_tts)]
319
- if llm_key:
320
- args.extend(["--llm", llm_key])
321
- args.append(text)
322
- if voice:
323
- args.append(voice)
324
-
325
- env = self._build_script_env()
326
-
327
- result = await asyncio.create_subprocess_exec(
328
- *args,
329
- stdout=asyncio.subprocess.PIPE,
330
- stderr=asyncio.subprocess.PIPE,
331
- env=env,
332
- )
333
- try:
334
- try:
335
- stdout, stderr = await asyncio.wait_for(result.communicate(), timeout=60.0)
336
- except asyncio.TimeoutError:
337
- result.kill()
338
- await result.wait()
339
- return "❌ TTS timed out after 60 seconds"
340
-
341
- if result.returncode == 0:
342
- output = stdout.decode().strip()
343
- # Extract file path from output
344
- audio_file_path = None
345
- for line in output.split("\n"):
346
- if "Saved to:" in line:
347
- audio_file_path = line.split("Saved to:")[1].strip()
348
- break
349
-
350
- if audio_file_path:
351
- truncated = (
352
- f"{text[:50]}..." if len(text) > 50 else text
353
- )
354
- return f" Spoke: {truncated}\n📁 Audio saved: {audio_file_path}"
355
-
356
- return f"✅ Spoke: {text[:50]}..." if len(text) > 50 else f"✅ Spoke: {text}"
357
- else:
358
- error = stderr.decode().strip()
359
- stdout_output = stdout.decode().strip()
360
- full_error = f"{error}\nStdout: {stdout_output}" if stdout_output else error
361
- return f" TTS failed: {full_error}"
362
- finally:
363
- # Ensure process cleanup
364
- if result.returncode is None:
365
- result.kill()
366
- await result.wait()
367
-
368
- finally:
369
- # Restore original settings
370
- if original_personality:
371
- await self._run_script(
372
- self.PERSONALITY_MANAGER_SCRIPT, ["set", original_personality]
373
- )
374
- if original_language:
375
- await self._run_script(
376
- self.LANGUAGE_MANAGER_SCRIPT, ["set", original_language]
377
- )
378
-
379
- async def list_voices(self) -> str:
380
- """
381
- List all available TTS voices for the active provider.
382
-
383
- Returns:
384
- Formatted list of available voices
385
- """
386
- # Get active provider for display purposes
387
- provider = await self._get_provider()
388
- current_voice = await self._get_current_voice()
389
-
390
- # voice-manager.sh list-simple is now provider-aware
391
- result = await self._run_script(self.VOICE_MANAGER_SCRIPT, ["list-simple"])
392
- if result:
393
- voices = result.strip().split("\n")
394
- voices = [v for v in voices if v] # Filter empty strings
395
-
396
- if not voices:
397
- return (
398
- f"📦 No voices available\n"
399
- f"{self.SEPARATOR}\n"
400
- f"For Piper: Download voices using /agent-vibes:provider download <voice-name>\n"
401
- f"Example: en_US-lessac-medium, en_GB-alba-medium"
402
- )
403
-
404
- # Determine provider label and alternative provider
405
- if "Piper" in provider:
406
- provider_label = "Piper TTS"
407
- alternative_provider = "macOS"
408
- elif "macOS" in provider:
409
- provider_label = "macOS TTS"
410
- alternative_provider = "Piper"
411
- elif "Termux" in provider or "Android" in provider:
412
- provider_label = "Termux SSH (Android)"
413
- alternative_provider = "Piper"
414
- else:
415
- provider_label = "TTS"
416
- alternative_provider = None
417
-
418
- output = f"🎤 Available {provider_label} Voices:\n"
419
- output += f"{self.SEPARATOR}\n"
420
- for voice in voices:
421
- marker = " ✓ (current)" if voice == current_voice else ""
422
- output += f" {voice}{marker}\n"
423
-
424
- # Expand LibriTTS named speakers when en_US-libritts-high is installed
425
- piper_voices_dir = Path.home() / ".local" / "share" / "piper-voices"
426
- libritts_onnx = piper_voices_dir / "en_US-libritts-high.onnx"
427
- if libritts_onnx.exists():
428
- catalog = self._build_libritts_catalog()
429
- if catalog:
430
- output += f"\n 📖 LibriTTS named speakers (en_US-libritts-high):\n"
431
- # De-duplicate: only one entry per display name
432
- seen: set = set()
433
- for entry in catalog.values():
434
- dn = entry["display_name"]
435
- if dn in seen:
436
- continue
437
- seen.add(dn)
438
- spk = entry["speaker_name"]
439
- sid = entry["speaker_id"]
440
- gender = entry.get("gender", "")
441
- g_icon = "♀" if gender.lower() == "female" else ("♂" if gender.lower() == "male" else "—")
442
- marker = " ✓ (current)" if entry["voice_id"] == current_voice else ""
443
- output += f" • {dn} ({g_icon} speaker {sid}){marker}\n"
444
-
445
- output += f"{self.SEPARATOR}\n"
446
-
447
- # Add provider switch hint
448
- if alternative_provider:
449
- output += f"\n💡 Switch to {alternative_provider}? Use: set_provider(provider=\"{alternative_provider.lower()}\")\n"
450
-
451
- return output
452
- return " Failed to list voices"
453
-
454
- async def set_voice(self, voice_name: str) -> str:
455
- """
456
- Switch to a different voice (supports friendly names like "ryan" or "katherine").
457
-
458
- Args:
459
- voice_name: Friendly name (e.g., "ryan") or Piper voice ID
460
-
461
- Returns:
462
- Success or error message
463
- """
464
- # Try new display-name resolver first (handles "Bella Bell", "::" ids, etc.)
465
- resolved = self._resolve_voice_input(voice_name)
466
-
467
- if resolved:
468
- voice_id = resolved["voice_id"]
469
- display_name = resolved["display_name"]
470
- model = resolved["model"]
471
- speaker_id = resolved["speaker_id"]
472
- speaker_name = resolved["speaker_name"]
473
-
474
- # Write the three config files directly (no voice-manager.sh needed)
475
- config_dir = self._get_config_dir()
476
- try:
477
- config_dir.mkdir(parents=True, exist_ok=True)
478
- (config_dir / "tts-voice.txt").write_text(display_name + "\n")
479
- if speaker_name:
480
- (config_dir / "tts-piper-model.txt").write_text(model + "\n")
481
- if speaker_id is not None:
482
- (config_dir / "tts-piper-speaker-id.txt").write_text(str(speaker_id) + "\n")
483
- else:
484
- # Clear speaker-id so piper uses default
485
- try: (config_dir / "tts-piper-speaker-id.txt").unlink()
486
- except FileNotFoundError: pass
487
- else:
488
- # Single-speaker model — clear multi-speaker files
489
- for f in ("tts-piper-model.txt", "tts-piper-speaker-id.txt"):
490
- try: (config_dir / f).unlink()
491
- except FileNotFoundError: pass
492
- except OSError as e:
493
- return f"❌ Failed to write voice config: {e}"
494
-
495
- detail = f" (speaker {speaker_id}, model {model})" if speaker_id is not None else ""
496
- return f"✅ Voice set to: {display_name}{detail}"
497
-
498
- # Fall back to legacy friendly-name resolver (voice-metadata.json)
499
- original_name = voice_name
500
- resolved_name = self._resolve_friendly_name(voice_name)
501
- result = await self._run_script(
502
- self.VOICE_MANAGER_SCRIPT, ["switch", resolved_name, "--silent"]
503
- )
504
- if result and "✅" in result:
505
- if original_name.lower() != resolved_name.lower():
506
- return f"✅ Voice switched to: {original_name} ({resolved_name})"
507
- return f"✅ Voice switched to: {voice_name}"
508
- return f"❌ Failed to switch voice could not resolve '{voice_name}'. Try 'list_voices' to see available names."
509
-
510
- async def list_personalities(self) -> str:
511
- """
512
- List all available personalities.
513
-
514
- Returns:
515
- Formatted list of personalities with descriptions
516
- """
517
- result = await self._run_script(self.PERSONALITY_MANAGER_SCRIPT, ["list"])
518
- return result if result else "❌ Failed to list personalities"
519
-
520
- async def set_personality(self, personality: str) -> str:
521
- """
522
- Set the personality style for TTS messages.
523
-
524
- Args:
525
- personality: Personality name (e.g., "flirty", "sarcastic", "pirate")
526
-
527
- Returns:
528
- Success or error message
529
- """
530
- result = await self._run_script(
531
- self.PERSONALITY_MANAGER_SCRIPT, ["set", personality]
532
- )
533
- if result and "🎭" in result:
534
- return result
535
- return f"❌ Failed to set personality: {result}"
536
-
537
- async def get_config(self) -> str:
538
- """
539
- Get current AgentVibes configuration.
540
-
541
- Returns:
542
- Current voice, personality, language, provider, and LLM settings
543
- """
544
- voice = await self._get_current_voice()
545
- personality = await self._get_personality()
546
- language = await self._get_language()
547
- provider = await self._get_provider()
548
-
549
- # Resolve the LLM key using the same priority as text_to_speech:
550
- # 1. AGENTVIBES_LLM 2. CLAUDECODE=1 3. AGENTVIBES_MCP_FALLBACK 4. "default"
551
- llm_key = os.environ.get("AGENTVIBES_LLM", "").strip()
552
- if llm_key and not _re.match(r"^[a-zA-Z0-9][a-zA-Z0-9_-]*$", llm_key):
553
- llm_key = ""
554
- if not llm_key and os.environ.get("CLAUDECODE", "").strip() == "1":
555
- llm_key = "claude-code"
556
- if not llm_key:
557
- fallback = os.environ.get("AGENTVIBES_MCP_FALLBACK", "").strip()
558
- if fallback and _re.match(r"^[a-zA-Z0-9][a-zA-Z0-9_-]*$", fallback):
559
- llm_key = fallback
560
- if not llm_key:
561
- llm_key = "default"
562
-
563
- output = "🎤 Current AgentVibes Configuration\n"
564
- output += f"{self.SEPARATOR}\n"
565
- output += f"LLM: {llm_key}\n"
566
- output += f"Provider: {provider}\n"
567
- output += f"Voice: {voice}\n"
568
- output += f"Personality: {personality}\n"
569
- output += f"Language: {language}\n"
570
- output += f"{self.SEPARATOR}\n"
571
- return output
572
-
573
- async def set_language(self, language: str) -> str:
574
- """
575
- Set the language for TTS speech.
576
-
577
- Args:
578
- language: Language name (e.g., "spanish", "french", "german")
579
-
580
- Returns:
581
- Success or error message
582
- """
583
- result = await self._run_script(self.LANGUAGE_MANAGER_SCRIPT, ["set", language])
584
- if result and "" in result:
585
- return result
586
- return f"❌ Failed to set language: {result}"
587
-
588
- async def replay_audio(self, n: int = 1) -> str:
589
- """
590
- Replay recently generated TTS audio.
591
-
592
- Args:
593
- n: Which audio to replay (1 = most recent, 2 = second most recent, etc.)
594
-
595
- Returns:
596
- Success or error message
597
- """
598
- result = await self._run_script(self.VOICE_MANAGER_SCRIPT, ["replay", str(n)])
599
- if result and "🔊" in result:
600
- return result
601
- return f"❌ Failed to replay audio: {result}"
602
-
603
- async def set_provider(self, provider: str) -> str:
604
- """
605
- Switch TTS provider between Piper, macOS, and Termux SSH.
606
-
607
- Args:
608
- provider: Provider name ("piper", "macos", or "termux-ssh")
609
-
610
- Returns:
611
- Success or error message
612
- """
613
- provider = provider.lower()
614
- if self.is_windows:
615
- valid_providers = ["windows-piper", "windows-sapi", "soprano"]
616
- else:
617
- valid_providers = ["piper", "macos", "termux-ssh", "soprano"]
618
- if provider not in valid_providers:
619
- return f"❌ Invalid provider: {provider}. Choose from: {', '.join(valid_providers)}"
620
-
621
- result = await self._run_script("provider-manager.sh", ["switch", provider])
622
- if result and ("" in result or "[OK]" in result):
623
- # Automatically speak confirmation in the new provider's voice
624
- provider_names = {
625
- "macos": "macOS",
626
- "termux-ssh": "Termux SSH",
627
- "piper": "Piper",
628
- "windows-piper": "Windows Piper",
629
- "windows-sapi": "Windows SAPI",
630
- "soprano": "Soprano",
631
- }
632
- provider_name = provider_names.get(provider, provider.title())
633
- confirmation_text = f"Successfully switched to {provider_name} provider"
634
-
635
- try:
636
- # Speak the confirmation with 5 second timeout to prevent hanging
637
- await asyncio.wait_for(
638
- self.text_to_speech(confirmation_text),
639
- timeout=5.0
640
- )
641
- # Return the provider switch result plus TTS confirmation
642
- return f"{result}\n🔊 Spoken confirmation: {confirmation_text}"
643
- except asyncio.TimeoutError:
644
- # Timeout - provider may need setup (e.g., Piper not installed)
645
- return f"{result}\n⚠️ Provider switched (TTS confirmation timed out - provider may need setup)"
646
- except Exception as e:
647
- # If TTS fails, still return success for the provider switch
648
- return f"{result}\n⚠️ Provider switched but TTS confirmation failed: {e}"
649
-
650
- return f"❌ Failed to switch provider: {result}"
651
-
652
- async def set_learn_mode(self, enabled: bool) -> str:
653
- """
654
- Enable or disable language learning mode.
655
-
656
- When enabled, TTS speaks in both your main language and target language.
657
-
658
- Args:
659
- enabled: True to enable, False to disable
660
-
661
- Returns:
662
- Success or error message
663
- """
664
- action = "enable" if enabled else "disable"
665
- result = await self._run_script("learn-manager.sh", [action])
666
- if result and "✓" in result:
667
- return result
668
- return f"❌ Failed to set learn mode: {result}"
669
-
670
- async def set_speed(self, speed: str, target: bool = False) -> str:
671
- """
672
- Set speech speed for main or target voice.
673
-
674
- Works with both Piper and macOS providers.
675
-
676
- Args:
677
- speed: Speed value (e.g., "0.5x", "1x", "2x", "normal", "fast", "slow")
678
- target: If True, sets target language speed; if False, sets main voice speed
679
-
680
- Returns:
681
- Success or error message
682
- """
683
- # Security: Using secrets.choice for cryptographically secure random selection
684
- # Even though this is just for UI variety, we use secrets to satisfy security scanners
685
- import secrets
686
-
687
- args = ["target", speed] if target else [speed]
688
- result = await self._run_script("speed-manager.sh", args)
689
- if result and "✓" in result:
690
- # Simple test messages to demonstrate the new speed
691
- test_messages = [
692
- "Testing speed change",
693
- "Speed test in progress",
694
- "Checking audio speed",
695
- "Speed configuration test",
696
- "Audio speed test",
697
- ]
698
-
699
- # Pick a random test message and speak it
700
- test_message = secrets.choice(test_messages)
701
-
702
- try:
703
- # Speak the test message to demonstrate the new speed
704
- await self.text_to_speech(test_message)
705
- return f"{result}\n🔊 Testing new speed: \"{test_message}\""
706
- except Exception as e:
707
- # If TTS fails, still return success for the speed change
708
- return f"{result}\n⚠️ Speed changed but demo failed: {e}"
709
-
710
- return f"❌ Failed to set speed: {result}"
711
-
712
- async def get_speed(self) -> str:
713
- """
714
- Get current speech speed settings.
715
-
716
- Returns:
717
- Current speed settings for main and target voices
718
- """
719
- result = await self._run_script("speed-manager.sh", ["get"])
720
- return result if result else "❌ Failed to get speed settings"
721
-
722
- async def download_extra_voices(self, auto_yes: bool = False) -> str:
723
- """
724
- Download extra high-quality Piper voices from HuggingFace.
725
-
726
- Downloads custom voices: Kristin, Jenny, and Tracy/16Speakers.
727
-
728
- Args:
729
- auto_yes: If True, skips confirmation prompt and downloads automatically
730
-
731
- Returns:
732
- Success message with download summary
733
- """
734
- args = ["--yes"] if auto_yes else []
735
- result = await self._run_script("download-extra-voices.sh", args)
736
- if result and ("✅" in result or "Successfully downloaded" in result or "already downloaded" in result):
737
- return result
738
- return f"❌ Failed to download extra voices: {result}"
739
-
740
- async def get_verbosity(self) -> str:
741
- """
742
- Get current verbosity level.
743
-
744
- Returns:
745
- Current verbosity level with description
746
- """
747
- result = await self._run_script("verbosity-manager.sh", ["get"])
748
- if result:
749
- level = result.strip()
750
- descriptions = {
751
- "low": "LOW - Acknowledgments + Completions only (minimal)",
752
- "medium": "MEDIUM - + Major decisions and findings (balanced)",
753
- "high": "HIGH - All reasoning (maximum transparency)"
754
- }
755
- desc = descriptions.get(level, level)
756
- return f"🎙️ Current Verbosity: {desc}\n\n💡 Change with: set_verbosity(level=\"low|medium|high\")"
757
- return "❌ Failed to get verbosity level"
758
-
759
- async def set_verbosity(self, level: str) -> str:
760
- """
761
- Set verbosity level to control how much Claude speaks.
762
-
763
- Args:
764
- level: Verbosity level (low, medium, or high)
765
-
766
- Returns:
767
- Success or error message
768
- """
769
- result = await self._run_script("verbosity-manager.sh", ["set", level])
770
- if result and "" in result:
771
- return f"{result}\n\n⚠️ Restart Claude Code for changes to take effect"
772
- return f"❌ Failed to set verbosity: {result}"
773
-
774
- def _get_mute_files(self) -> list:
775
- """Get all mute file paths for current platform"""
776
- files = [
777
- Path.home() / self.MUTE_FILE_NAME,
778
- Path.cwd() / self.CLAUDE_DIR_NAME / "agentvibes-muted",
779
- ]
780
- # Windows PowerShell scripts check tts-muted.txt in .claude dir
781
- if self.is_windows:
782
- files.append(Path.home() / self.CLAUDE_DIR_NAME / "tts-muted.txt")
783
- return files
784
-
785
- async def mute(self) -> str:
786
- """
787
- Mute all TTS output. Creates a persistent mute flag.
788
-
789
- Returns:
790
- Success message confirming mute is active
791
- """
792
- try:
793
- mute_file = Path.home() / self.MUTE_FILE_NAME
794
- mute_file.touch()
795
- # On Windows, also write tts-muted.txt for PowerShell script compatibility
796
- if self.is_windows:
797
- win_mute = Path.home() / self.CLAUDE_DIR_NAME / "tts-muted.txt"
798
- win_mute.parent.mkdir(parents=True, exist_ok=True)
799
- win_mute.write_text("true")
800
- return "🔇 AgentVibes TTS muted. All voice output is now silenced.\n\n💡 To unmute, use: unmute()"
801
- except Exception as e:
802
- return f"❌ Failed to mute: {e}"
803
-
804
- async def unmute(self) -> str:
805
- """
806
- Unmute TTS output. Removes the mute flag.
807
-
808
- Returns:
809
- Success message confirming TTS is restored
810
- """
811
- removed = []
812
- try:
813
- for mute_file in self._get_mute_files():
814
- if mute_file.exists():
815
- # tts-muted.txt uses content "true"/"false", others use file existence
816
- if mute_file.name == "tts-muted.txt":
817
- content = mute_file.read_text().strip()
818
- if content == "true":
819
- mute_file.write_text("false")
820
- removed.append(str(mute_file.name))
821
- else:
822
- mute_file.unlink()
823
- removed.append(str(mute_file.name))
824
-
825
- if removed:
826
- return f"🔊 AgentVibes TTS unmuted. Voice output is now restored.\n (Removed: {', '.join(removed)} mute flag)"
827
- else:
828
- return "🔊 AgentVibes TTS was not muted. Voice output is active."
829
- except Exception as e:
830
- return f"❌ Failed to unmute: {e}"
831
-
832
- async def is_muted(self) -> str:
833
- """
834
- Check if TTS is currently muted.
835
-
836
- Returns:
837
- Current mute status
838
- """
839
- for mute_file in self._get_mute_files():
840
- if mute_file.exists():
841
- # tts-muted.txt uses content "true"/"false"
842
- if mute_file.name == "tts-muted.txt":
843
- content = mute_file.read_text().strip()
844
- if content == "true":
845
- return "🔇 TTS is currently MUTED\n\n💡 To unmute, use: unmute()"
846
- else:
847
- return "🔇 TTS is currently MUTED\n\n💡 To unmute, use: unmute()"
848
- return "🔊 TTS is currently ACTIVE\n\n💡 To mute, use: mute()"
849
-
850
- async def list_background_music(self) -> str:
851
- """
852
- List all available background music tracks.
853
-
854
- Returns:
855
- Formatted list of all pre-packaged background music files
856
- """
857
- result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["list"])
858
- return result if result else "❌ Failed to list background music"
859
-
860
- async def set_background_music(self, track_name: str, agent_name: Optional[str] = None) -> str:
861
- """
862
- Set background music track for a specific agent, all agents, or as default.
863
-
864
- Args:
865
- track_name: Track filename or partial name for fuzzy matching
866
- agent_name: Agent name ('all' for all agents, None for default)
867
-
868
- Returns:
869
- Success or error message
870
- """
871
- import re
872
-
873
- # Get list of available tracks for fuzzy matching
874
- list_result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["list"])
875
- if not list_result or "❌" in list_result:
876
- return "❌ Failed to list background music tracks"
877
-
878
- # Parse track names
879
- tracks = []
880
- for line in list_result.split("\n"):
881
- match = re.match(r'\s*\d+\.\s+(.+)', line.strip())
882
- if match:
883
- tracks.append(match.group(1).strip())
884
-
885
- # Try to find a matching track (case-insensitive partial match)
886
- track_lower = track_name.lower()
887
- matched_track = None
888
-
889
- # First try exact match
890
- for track in tracks:
891
- if track.lower() == track_lower:
892
- matched_track = track
893
- break
894
-
895
- # If no exact match, try partial match
896
- if not matched_track:
897
- for track in tracks:
898
- if track_lower in track.lower():
899
- matched_track = track
900
- break
901
-
902
- if not matched_track:
903
- # Show available tracks to help user
904
- available = "\n".join([f" • {t}" for t in tracks])
905
- return f"❌ No track matching '{track_name}' found.\n\nAvailable tracks:\n{available}\n\n💡 Try a partial match like 'celtic' or 'chillwave'"
906
-
907
- # Determine which command to use based on agent_name
908
- if agent_name and agent_name.lower() == "all":
909
- # Set for all agents
910
- result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["set-all", matched_track])
911
- elif agent_name:
912
- # Set for specific agent
913
- result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["set-agent", agent_name, matched_track])
914
- else:
915
- # Set as default
916
- result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["set-default", matched_track])
917
-
918
- if result and ("✅" in result or "[OK]" in result):
919
- if matched_track.lower() != track_name.lower():
920
- return f"{result}\n\n🔍 Matched '{track_name}' to '{matched_track}'"
921
- return result
922
- return f"❌ Failed to set background music: {result}"
923
-
924
- async def enable_background_music(self, enabled: bool) -> str:
925
- """
926
- Enable or disable background music globally.
927
-
928
- Args:
929
- enabled: True to enable, False to disable
930
-
931
- Returns:
932
- Success or error message
933
- """
934
- command = "on" if enabled else "off"
935
- result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, [command])
936
- # Sync to .agentvibes/config.json (TUI source of truth)
937
- try:
938
- import json
939
- cfg_path = self.agentvibes_root / ".agentvibes" / "config.json"
940
- cfg = {}
941
- if cfg_path.exists():
942
- cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
943
- if "backgroundMusic" not in cfg:
944
- cfg["backgroundMusic"] = {}
945
- cfg["backgroundMusic"]["enabled"] = enabled
946
- cfg_path.parent.mkdir(parents=True, exist_ok=True)
947
- cfg_path.write_text(json.dumps(cfg, indent=2) + "\n", encoding="utf-8")
948
- except Exception:
949
- pass # best-effort sync
950
- return result if result else f"❌ Failed to {'enable' if enabled else 'disable'} background music"
951
-
952
- async def set_background_music_volume(self, volume: float) -> str:
953
- """
954
- Set background music volume.
955
-
956
- Args:
957
- volume: Volume level (0.0-1.0)
958
-
959
- Returns:
960
- Success or error message
961
- """
962
- result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["volume", str(volume)])
963
- return result if result else "❌ Failed to set background music volume"
964
-
965
- async def get_background_music_status(self) -> str:
966
- """
967
- Get current background music configuration.
968
-
969
- Returns:
970
- Status information
971
- """
972
- result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["status"])
973
- return result if result else "❌ Failed to get background music status"
974
-
975
- async def set_reverb(self, level: str, agent: str = "default", apply_all: bool = False) -> str:
976
- """
977
- Set reverb level for an agent or globally.
978
-
979
- Args:
980
- level: Reverb level (off, light, medium, heavy, cathedral)
981
- agent: Agent name (default: "default")
982
- apply_all: Apply to all agents (default: False)
983
-
984
- Returns:
985
- Success message
986
- """
987
- args = ["set-reverb", level, agent]
988
- if apply_all:
989
- args.append("--all")
990
- result = await self._run_script(self.EFFECTS_MANAGER_SCRIPT, args)
991
- return result if result else f"✅ Set reverb to {level}"
992
-
993
- async def get_reverb(self, agent: str = "default") -> str:
994
- """
995
- Get current reverb level for an agent.
996
-
997
- Args:
998
- agent: Agent name (default: "default")
999
-
1000
- Returns:
1001
- Current reverb level
1002
- """
1003
- result = await self._run_script(self.EFFECTS_MANAGER_SCRIPT, ["get-reverb", agent])
1004
- if result:
1005
- return f"Current reverb level for {agent}: {result.strip()}"
1006
- return f"❌ Failed to get reverb for {agent}"
1007
-
1008
- async def list_audio_effects(self) -> str:
1009
- """
1010
- List all audio effects for all agents.
1011
-
1012
- Returns:
1013
- Effects configuration
1014
- """
1015
- result = await self._run_script(self.EFFECTS_MANAGER_SCRIPT, ["list"])
1016
- return result if result else "❌ Failed to list audio effects"
1017
-
1018
- async def clean_audio_cache(self) -> str:
1019
- """
1020
- Clean all TTS audio cache files and report space freed.
1021
-
1022
- Non-interactive cleanup suitable for MCP tool usage. Deletes all
1023
- TTS-generated audio files (wav, mp3, aiff) while preserving
1024
- background music tracks.
1025
-
1026
- Returns:
1027
- Cleanup results with file count and space freed
1028
- """
1029
- result = await self._run_script("clean-audio-cache.sh", [])
1030
- return result if result else "❌ Failed to clean audio cache"
1031
-
1032
- # ── Hermes config helpers ────────────────────────────────────────────────
1033
-
1034
- def _hermes_cfg_path(self) -> Path:
1035
- hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
1036
- return hermes_home / "hooks" / "agentvibes-tts" / "agentvibes-ssh-config.json"
1037
-
1038
- async def get_hermes_config(self) -> str:
1039
- """
1040
- Get current Hermes AgentVibes SSH configuration.
1041
-
1042
- Returns:
1043
- Current SSH key, host, port, and voice settings
1044
- """
1045
- cfg_path = self._hermes_cfg_path()
1046
- defaults = {
1047
- "mode": "local",
1048
- "sshKey": "/absolute/path/to/id_ed25519_agentvibes",
1049
- "host": "your-receiver-tailscale-ip",
1050
- "port": "2222",
1051
- "voice": "en_US-libritts-high::Leo-8",
1052
- }
1053
- try:
1054
- cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
1055
- except Exception:
1056
- cfg = {}
1057
- merged = {**defaults, **cfg}
1058
- installed = cfg_path.exists()
1059
- is_local = merged.get("mode", "local") == "local"
1060
- out = "🔌 Hermes AgentVibes Configuration\n"
1061
- out += "" * 40 + "\n"
1062
- out += f"Status: {'✅ Configured' if installed else '⚠️ Not yet installed (run: agentvibes install)'}\n"
1063
- out += f"Mode: {'🏠 Local (Hermes & speakers on same machine)' if is_local else '🌐 Remote (SSH to receiver)'}\n"
1064
- out += f"Voice: {merged['voice']}\n"
1065
- if not is_local:
1066
- out += f"SSH Key: {merged['sshKey']}\n"
1067
- out += f"Host: {merged['host']}\n"
1068
- out += f"Port: {merged['port']}\n"
1069
- if installed:
1070
- out += f"\nConfig file: {cfg_path}\n"
1071
- out += "After changes, run: hermes gateway restart\n"
1072
- return out
1073
-
1074
- async def set_hermes_config(
1075
- self,
1076
- mode: Optional[str] = None,
1077
- ssh_key: Optional[str] = None,
1078
- host: Optional[str] = None,
1079
- port: Optional[str] = None,
1080
- voice: Optional[str] = None,
1081
- ) -> str:
1082
- """
1083
- Save Hermes AgentVibes SSH configuration.
1084
-
1085
- Returns:
1086
- Success message with saved values
1087
- """
1088
- import re as _re
1089
- cfg_path = self._hermes_cfg_path()
1090
- defaults = {
1091
- "mode": "local",
1092
- "sshKey": "/absolute/path/to/id_ed25519_agentvibes",
1093
- "host": "your-receiver-tailscale-ip",
1094
- "port": "2222",
1095
- "voice": "en_US-libritts-high::Leo-8",
1096
- }
1097
- try:
1098
- existing = json.loads(cfg_path.read_text(encoding="utf-8"))
1099
- except Exception:
1100
- existing = {}
1101
- merged = {**defaults, **existing}
1102
-
1103
- if mode is not None:
1104
- m = str(mode).lower().strip()
1105
- if m not in ("local", "remote"):
1106
- return "❌ Invalid mode: must be 'local' or 'remote'"
1107
- merged["mode"] = m
1108
- if ssh_key is not None:
1109
- sk = str(ssh_key).strip()
1110
- if not _re.match(r'^[/~][a-zA-Z0-9_./ -]{0,511}$', sk):
1111
- return "❌ Invalid ssh_key: must be an absolute path (no special chars)"
1112
- merged["sshKey"] = sk
1113
- if host is not None:
1114
- h = str(host).strip()
1115
- if not _re.match(r'^[a-zA-Z0-9._\[\]:-]{1,253}$', h):
1116
- return "❌ Invalid host: must be a hostname or IP address"
1117
- merged["host"] = h
1118
- if port is not None:
1119
- p = str(port).strip()
1120
- if not _re.match(r"^\d{1,5}$", p):
1121
- return "❌ Invalid port: must be a number (e.g. '2222')"
1122
- merged["port"] = p
1123
- if voice is not None:
1124
- merged["voice"] = str(voice)[:200]
1125
-
1126
- try:
1127
- cfg_path.parent.mkdir(parents=True, exist_ok=True)
1128
- cfg_path.parent.chmod(0o700)
1129
- cfg_path.write_text(json.dumps(merged, indent=2), encoding="utf-8")
1130
- cfg_path.chmod(0o600)
1131
- except Exception as e:
1132
- return f" Failed to save config: {e}"
1133
-
1134
- is_local = merged.get("mode", "local") == "local"
1135
- out = "✅ Hermes config saved!\n"
1136
- out += "─" * 40 + "\n"
1137
- out += f"Mode: {'🏠 Local' if is_local else '🌐 Remote (SSH)'}\n"
1138
- out += f"Voice: {merged['voice']}\n"
1139
- if not is_local:
1140
- out += f"SSH Key: {merged['sshKey']}\n"
1141
- out += f"Host: {merged['host']}\n"
1142
- out += f"Port: {merged['port']}\n"
1143
- out += f"\nConfig file: {cfg_path}\n"
1144
- out += "Run: hermes gateway restart\n"
1145
- return out
1146
-
1147
- # Helper methods
1148
- def _build_script_env(self) -> dict:
1149
- """Build environment dict for script execution (shared by all script runners)"""
1150
- env = os.environ.copy()
1151
-
1152
- # Determine where to save settings based on context:
1153
- # 1. If cwd has .claude/ → Use cwd (real Claude Code project)
1154
- # 2. Otherwise Use global ~/.claude/ (Claude Desktop, Warp, etc.)
1155
- # Note: Hooks are ALWAYS from package .claude/ (self.claude_dir)
1156
- cwd = Path.cwd()
1157
- if (cwd / ".claude").is_dir() and cwd != self.agentvibes_root:
1158
- env["CLAUDE_PROJECT_DIR"] = str(cwd)
1159
-
1160
- # Add common locations for piper to PATH (Unix only)
1161
- if not self.is_windows:
1162
- home_dir = Path.home()
1163
- local_bin = str(home_dir / ".local" / "bin")
1164
- if "PATH" in env:
1165
- if local_bin not in env["PATH"]:
1166
- env["PATH"] = f"{local_bin}:{env['PATH']}"
1167
- else:
1168
- env["PATH"] = local_bin
1169
-
1170
- return env
1171
-
1172
- async def _run_script(self, script_name: str, args: list[str]) -> str:
1173
- """Run a script and return output (bash on Unix, PowerShell on Windows)"""
1174
- # Auto-resolve .sh → .ps1 on Windows (class constants handle special cases)
1175
- if self.is_windows and script_name.endswith('.sh'):
1176
- script_name = script_name[:-3] + '.ps1'
1177
- script_path = self.hooks_dir / script_name
1178
- if not script_path.exists():
1179
- return f"Script not found: {script_path}"
1180
-
1181
- # Build command — PowerShell on Windows, bash on Unix
1182
- if self.is_windows:
1183
- cmd = [
1184
- "powershell", "-NoProfile", "-ExecutionPolicy", "Bypass",
1185
- "-File", str(script_path)
1186
- ] + args
1187
- else:
1188
- cmd = ["bash", str(script_path)] + args
1189
-
1190
- env = self._build_script_env()
1191
-
1192
- try:
1193
- result = await asyncio.create_subprocess_exec(
1194
- *cmd,
1195
- stdout=asyncio.subprocess.PIPE,
1196
- stderr=asyncio.subprocess.PIPE,
1197
- env=env,
1198
- )
1199
- try:
1200
- stdout, stderr = await result.communicate()
1201
- if result.returncode == 0:
1202
- return stdout.decode().strip()
1203
- else:
1204
- error_msg = stderr.decode().strip()
1205
- if not error_msg: # If stderr is empty, include stdout for debugging
1206
- error_msg = f"Return code {result.returncode}. Stdout: {stdout.decode().strip()}"
1207
- return error_msg
1208
- finally:
1209
- # Ensure process cleanup
1210
- if result.returncode is None:
1211
- result.kill()
1212
- await result.wait()
1213
- except Exception as e:
1214
- return f"Error running script: {e}"
1215
-
1216
- async def _get_current_voice(self) -> str:
1217
- """Get the currently active voice"""
1218
- result = await self._run_script(self.VOICE_MANAGER_SCRIPT, ["get"])
1219
- return result.strip() if result else "Unknown"
1220
-
1221
- async def _get_personality(self) -> str:
1222
- """Get the current personality setting"""
1223
- personality_file = self.claude_dir / "tts-personality.txt"
1224
- if not personality_file.exists():
1225
- # Try global
1226
- personality_file = Path.home() / self.CLAUDE_DIR_NAME / "tts-personality.txt"
1227
-
1228
- try:
1229
- if personality_file.exists():
1230
- return personality_file.read_text().strip()
1231
- except (PermissionError, UnicodeDecodeError, OSError) as e:
1232
- # Log error but don't crash - return default
1233
- import sys
1234
- print(f"Warning: Could not read personality file: {e}", file=sys.stderr)
1235
- return "normal"
1236
-
1237
- async def _get_language(self) -> str:
1238
- """Get the current language setting"""
1239
- result = await self._run_script(self.LANGUAGE_MANAGER_SCRIPT, ["code"])
1240
- return result.strip() if result else "english"
1241
-
1242
- async def _get_provider(self) -> str:
1243
- """Get the active TTS provider"""
1244
- provider_file = self.claude_dir / "tts-provider.txt"
1245
- if not provider_file.exists():
1246
- provider_file = Path.home() / self.CLAUDE_DIR_NAME / "tts-provider.txt"
1247
-
1248
- provider_labels = {
1249
- "macos": "macOS TTS",
1250
- "piper": "Piper TTS (Free, Offline)",
1251
- "termux-ssh": "Termux SSH (Android)",
1252
- "windows-piper": "Windows Piper TTS (Free, Offline)",
1253
- "windows-sapi": "Windows SAPI (Built-in)",
1254
- "soprano": "Soprano TTS (Ultra-fast Neural)",
1255
- }
1256
- try:
1257
- if provider_file.exists():
1258
- provider = provider_file.read_text().strip()
1259
- # Strip BOM from PowerShell-written files
1260
- provider = provider.lstrip('\ufeff')
1261
- return provider_labels.get(provider, provider)
1262
- except (PermissionError, UnicodeDecodeError, OSError) as e:
1263
- # Log error but don't crash - return default
1264
- import sys
1265
- print(f"Warning: Could not read provider file: {e}", file=sys.stderr)
1266
- # Default based on platform
1267
- if self.is_windows:
1268
- return "Windows SAPI (Built-in)"
1269
- return "Piper TTS (Free, Offline)"
1270
-
1271
-
1272
- # Create the MCP server
1273
- app = Server("agentvibes")
1274
- agent_vibes = AgentVibesServer()
1275
-
1276
-
1277
- @app.list_tools()
1278
- async def list_tools() -> list[Tool]:
1279
- """List all available AgentVibes tools"""
1280
- return [
1281
- Tool(
1282
- name="text_to_speech",
1283
- description="""Convert text to speech using AgentVibes TTS.
1284
-
1285
- Supports both macOS TTS and Piper (free, offline) providers.
1286
- Can use different voices, personalities, and languages.
1287
-
1288
- Perfect for:
1289
- - Speaking acknowledgments and confirmations
1290
- - Adding voice to Claude responses
1291
- - Multi-language communication
1292
- - Personality-driven interactions
1293
-
1294
- Examples:
1295
- - text_to_speech(text="Hello, I'm ready to help!")
1296
- - text_to_speech(text="Task completed!", personality="flirty")
1297
- - text_to_speech(text="Hola, ¿cómo estás?", language="spanish")
1298
- """,
1299
- inputSchema={
1300
- "type": "object",
1301
- "properties": {
1302
- "text": {
1303
- "type": "string",
1304
- "description": "Text to convert to speech (max 500 characters)",
1305
- },
1306
- "voice": {
1307
- "type": "string",
1308
- "description": "Voice name (optional). Use list_voices to see options.",
1309
- },
1310
- "personality": {
1311
- "type": "string",
1312
- "description": "Personality style (optional). Examples: flirty, sarcastic, pirate, robot, zen",
1313
- },
1314
- "language": {
1315
- "type": "string",
1316
- "description": "Language to speak in (optional). Examples: spanish, french, german, italian",
1317
- },
1318
- },
1319
- "required": ["text"],
1320
- },
1321
- ),
1322
- Tool(
1323
- name="list_voices",
1324
- description="List all available TTS voices with current selection",
1325
- inputSchema={"type": "object", "properties": {}},
1326
- ),
1327
- Tool(
1328
- name="set_voice",
1329
- description="Switch to a different TTS voice",
1330
- inputSchema={
1331
- "type": "object",
1332
- "properties": {
1333
- "voice_name": {
1334
- "type": "string",
1335
- "description": "Name of the voice to switch to",
1336
- }
1337
- },
1338
- "required": ["voice_name"],
1339
- },
1340
- ),
1341
- Tool(
1342
- name="list_personalities",
1343
- description="List all available personality styles with descriptions",
1344
- inputSchema={"type": "object", "properties": {}},
1345
- ),
1346
- Tool(
1347
- name="set_personality",
1348
- description="Set the personality style for TTS messages",
1349
- inputSchema={
1350
- "type": "object",
1351
- "properties": {
1352
- "personality": {
1353
- "type": "string",
1354
- "description": "Personality name (e.g., flirty, sarcastic, pirate)",
1355
- }
1356
- },
1357
- "required": ["personality"],
1358
- },
1359
- ),
1360
- Tool(
1361
- name="set_language",
1362
- description="Set the language for TTS speech (supports 25+ languages)",
1363
- inputSchema={
1364
- "type": "object",
1365
- "properties": {
1366
- "language": {
1367
- "type": "string",
1368
- "description": "Language name (e.g., spanish, french, german)",
1369
- }
1370
- },
1371
- "required": ["language"],
1372
- },
1373
- ),
1374
- Tool(
1375
- name="get_config",
1376
- description="Get current voice, personality, language, and provider configuration",
1377
- inputSchema={"type": "object", "properties": {}},
1378
- ),
1379
- Tool(
1380
- name="replay_audio",
1381
- description="Replay recently generated TTS audio",
1382
- inputSchema={
1383
- "type": "object",
1384
- "properties": {
1385
- "n": {
1386
- "type": "integer",
1387
- "description": "Which audio to replay (1 = most recent, default: 1)",
1388
- "minimum": 1,
1389
- "maximum": 10,
1390
- }
1391
- },
1392
- },
1393
- ),
1394
- Tool(
1395
- name="set_provider",
1396
- description="Switch between TTS providers" + (
1397
- ": Windows Piper, Windows SAPI, or Soprano" if agent_vibes.is_windows
1398
- else ": macOS TTS, Piper (free, offline), Soprano, or Termux SSH (Android)"
1399
- ),
1400
- inputSchema={
1401
- "type": "object",
1402
- "properties": {
1403
- "provider": {
1404
- "type": "string",
1405
- "description": (
1406
- "Provider name: 'windows-piper', 'windows-sapi', or 'soprano'"
1407
- if agent_vibes.is_windows
1408
- else "Provider name: 'piper', 'macos', 'soprano', or 'termux-ssh'"
1409
- ),
1410
- "enum": (
1411
- ["windows-piper", "windows-sapi", "soprano"]
1412
- if agent_vibes.is_windows
1413
- else ["piper", "macos", "soprano", "termux-ssh"]
1414
- ),
1415
- }
1416
- },
1417
- "required": ["provider"],
1418
- },
1419
- ),
1420
- Tool(
1421
- name="set_learn_mode",
1422
- description="Enable or disable language learning mode. When ON, TTS speaks in both your main language and target language for bilingual learning.",
1423
- inputSchema={
1424
- "type": "object",
1425
- "properties": {
1426
- "enabled": {
1427
- "type": "boolean",
1428
- "description": "True to enable learning mode, False to disable"
1429
- }
1430
- },
1431
- "required": ["enabled"],
1432
- },
1433
- ),
1434
- Tool(
1435
- name="set_speed",
1436
- description="Set speech speed for main or target voice. Works with both Piper and macOS providers. Use this to make voices faster or slower.",
1437
- inputSchema={
1438
- "type": "object",
1439
- "properties": {
1440
- "speed": {
1441
- "type": "string",
1442
- "description": "Speed value: '0.5x' or 'slow/slower' (half speed, slower), '1x' or 'normal' (normal speed), '2x' or 'fast' (double speed, faster), '3x' or 'faster' (triple speed, very fast)"
1443
- },
1444
- "target": {
1445
- "type": "boolean",
1446
- "description": "If true, sets target language speed (for learning mode); if false or omitted, sets main voice speed",
1447
- "default": False
1448
- }
1449
- },
1450
- "required": ["speed"],
1451
- },
1452
- ),
1453
- Tool(
1454
- name="get_speed",
1455
- description="Get current speech speed settings for main and target voices",
1456
- inputSchema={"type": "object", "properties": {}},
1457
- ),
1458
- Tool(
1459
- name="download_extra_voices",
1460
- description="Download extra high-quality custom Piper voices from HuggingFace. Includes: Kristin (US female), Jenny (UK female with Irish accent), and Tracy/16Speakers (multi-speaker). Perfect for adding variety to your TTS voices.",
1461
- inputSchema={
1462
- "type": "object",
1463
- "properties": {
1464
- "auto_yes": {
1465
- "type": "boolean",
1466
- "description": "Skip confirmation prompt and download automatically (default: False)",
1467
- "default": False
1468
- }
1469
- },
1470
- },
1471
- ),
1472
- Tool(
1473
- name="get_verbosity",
1474
- description="Get current AgentVibes verbosity level (low/medium/high/caveman). Verbosity controls how much Claude speaks while working - from minimal (acknowledgments only) to maximum transparency (all reasoning spoken) to caveman (ultra-terse fragments, max token savings).",
1475
- inputSchema={"type": "object", "properties": {}},
1476
- ),
1477
- Tool(
1478
- name="set_verbosity",
1479
- description="""Set AgentVibes verbosity level to control how much Claude speaks while working.
1480
-
1481
- Verbosity Levels:
1482
- - LOW: Only acknowledgments (start) and completions (end). Minimal interruption.
1483
- - MEDIUM: + Major decisions and key findings. Balanced transparency.
1484
- - HIGH: All reasoning, decisions, and findings. Maximum transparency.
1485
- - CAVEMAN: Ultra-terse fragments. Drops articles, filler, hedging. Abbreviates heavily. 65-75% fewer output tokens.
1486
-
1487
- Perfect for:
1488
- - LOW: Quiet work sessions, minimal distraction
1489
- - MEDIUM: Understanding major decisions without full narration
1490
- - HIGH: Full transparency, learning mode, debugging complex tasks
1491
- - CAVEMAN: Maximum token savings, minimal prose
1492
-
1493
- Note: Changes take effect on next Claude Code session restart.""",
1494
- inputSchema={
1495
- "type": "object",
1496
- "properties": {
1497
- "level": {
1498
- "type": "string",
1499
- "description": "Verbosity level to set",
1500
- "enum": ["low", "medium", "high", "caveman"]
1501
- }
1502
- },
1503
- "required": ["level"],
1504
- },
1505
- ),
1506
- Tool(
1507
- name="mute",
1508
- description="Mute all AgentVibes TTS output. Creates a persistent mute flag that silences all voice output until unmuted. Persists across sessions.",
1509
- inputSchema={"type": "object", "properties": {}},
1510
- ),
1511
- Tool(
1512
- name="unmute",
1513
- description="Unmute AgentVibes TTS output. Removes the mute flag and restores voice output.",
1514
- inputSchema={"type": "object", "properties": {}},
1515
- ),
1516
- Tool(
1517
- name="is_muted",
1518
- description="Check if TTS is currently muted.",
1519
- inputSchema={"type": "object", "properties": {}},
1520
- ),
1521
- Tool(
1522
- name="list_background_music",
1523
- description="List all available pre-packaged background music tracks. Shows all audio files that can be used as background music for TTS.",
1524
- inputSchema={"type": "object", "properties": {}},
1525
- ),
1526
- Tool(
1527
- name="set_background_music",
1528
- description="""Set background music track for a specific agent, all agents, or as default. Supports smart fuzzy matching.
1529
-
1530
- Perfect for:
1531
- - "change background music to flamenco" - Sets for all agents
1532
- - "set John's background music to celtic harp" - Agent-specific
1533
- - "use chillwave as default background" - Default for new agents
1534
-
1535
- Fuzzy matching examples:
1536
- - "flamenco" matches "agentvibes_soft_flamenco_loop.mp3"
1537
- - "celtic" matches "agent_vibes_celtic_harp_v1_loop.mp3"
1538
- - "bossa" matches "agent_vibes_bossa_nova_v2_loop.mp3"
1539
- """,
1540
- inputSchema={
1541
- "type": "object",
1542
- "properties": {
1543
- "track_name": {
1544
- "type": "string",
1545
- "description": "Track filename or partial name for fuzzy matching (e.g., 'celtic', 'flamenco', 'bossa nova')",
1546
- },
1547
- "agent_name": {
1548
- "type": "string",
1549
- "description": "Agent name to configure (optional). Use 'all' for all agents, omit for default",
1550
- },
1551
- },
1552
- "required": ["track_name"],
1553
- },
1554
- ),
1555
- Tool(
1556
- name="enable_background_music",
1557
- description="Enable or disable background music globally. When enabled, TTS audio will be mixed with background music at configured volume (default 30%).",
1558
- inputSchema={
1559
- "type": "object",
1560
- "properties": {
1561
- "enabled": {
1562
- "type": "boolean",
1563
- "description": "True to enable background music, False to disable",
1564
- }
1565
- },
1566
- "required": ["enabled"],
1567
- },
1568
- ),
1569
- Tool(
1570
- name="set_background_music_volume",
1571
- description="Set the volume level for background music (0.0-1.0). Recommended: 0.20-0.40 for subtle background ambiance.",
1572
- inputSchema={
1573
- "type": "object",
1574
- "properties": {
1575
- "volume": {
1576
- "type": "number",
1577
- "description": "Volume level (0.0 = silent, 0.30 = default, 1.0 = full volume)",
1578
- "minimum": 0.0,
1579
- "maximum": 1.0,
1580
- }
1581
- },
1582
- "required": ["volume"],
1583
- },
1584
- ),
1585
- Tool(
1586
- name="get_background_music_status",
1587
- description="Get current background music configuration including enabled status, volume, default track, and number of available tracks.",
1588
- inputSchema={"type": "object", "properties": {}},
1589
- ),
1590
- Tool(
1591
- name="set_reverb",
1592
- description="""Set reverb level for TTS audio. Can apply globally (default agent), to a specific agent, or to all agents.
1593
-
1594
- Reverb adds room/space ambiance to the voice, making it sound like it's in a small room, conference room, or large hall.
1595
-
1596
- Examples:
1597
- - set_reverb(level="medium") - Set reverb for default agent
1598
- - set_reverb(level="cathedral", agent="Winston") - Set cathedral reverb for Winston
1599
- - set_reverb(level="light", apply_all=True) - Set light reverb for all agents
1600
- - set_reverb(level="off") - Turn off reverb for default agent
1601
- """,
1602
- inputSchema={
1603
- "type": "object",
1604
- "properties": {
1605
- "level": {
1606
- "type": "string",
1607
- "description": "Reverb level",
1608
- "enum": ["off", "light", "medium", "heavy", "cathedral"]
1609
- },
1610
- "agent": {
1611
- "type": "string",
1612
- "description": "Agent name (optional, defaults to 'default'). Examples: Winston, John, Mary, Amelia",
1613
- },
1614
- "apply_all": {
1615
- "type": "boolean",
1616
- "description": "Apply to all agents (optional, default: false)",
1617
- }
1618
- },
1619
- "required": ["level"],
1620
- },
1621
- ),
1622
- Tool(
1623
- name="get_reverb",
1624
- description="Get current reverb level for a specific agent or default",
1625
- inputSchema={
1626
- "type": "object",
1627
- "properties": {
1628
- "agent": {
1629
- "type": "string",
1630
- "description": "Agent name (optional, defaults to 'default')",
1631
- }
1632
- },
1633
- },
1634
- ),
1635
- Tool(
1636
- name="list_audio_effects",
1637
- description="List current audio effects configuration for all agents, including reverb levels and other effects",
1638
- inputSchema={"type": "object", "properties": {}},
1639
- ),
1640
- Tool(
1641
- name="clean_audio_cache",
1642
- description="Clean all TTS audio cache files and report space freed. Non-interactive cleanup that removes all wav/mp3/aiff files while preserving background music tracks.",
1643
- inputSchema={"type": "object", "properties": {}},
1644
- ),
1645
- Tool(
1646
- name="get_hermes_config",
1647
- description="Get current Hermes AgentVibes SSH configuration (SSH key path, host, port, voice). Use this to check what's currently set before changing it.",
1648
- inputSchema={"type": "object", "properties": {}},
1649
- ),
1650
- Tool(
1651
- name="set_hermes_config",
1652
- description="Configure Hermes AgentVibes TTS settings. Choose 'local' mode when Hermes runs on the same machine as your speakers (no SSH needed), or 'remote' mode to send audio over SSH to a receiver. Omit any field to keep its current value.",
1653
- inputSchema={
1654
- "type": "object",
1655
- "properties": {
1656
- "mode": {
1657
- "type": "string",
1658
- "enum": ["local", "remote"],
1659
- "description": "'local' = Hermes and speakers on same machine (no SSH). 'remote' = send audio over SSH to a receiver machine.",
1660
- },
1661
- "ssh_key": {
1662
- "type": "string",
1663
- "description": "Absolute path to SSH private key (e.g. /home/user/.ssh/id_ed25519_agentvibes) — only used in remote mode",
1664
- },
1665
- "host": {
1666
- "type": "string",
1667
- "description": "Tailscale IP or hostname of the machine with speakers — only used in remote mode",
1668
- },
1669
- "port": {
1670
- "type": "string",
1671
- "description": "AgentVibes receiver SSH port (e.g. '2222') — only used in remote mode",
1672
- },
1673
- "voice": {
1674
- "type": "string",
1675
- "description": "Piper voice model (e.g. 'en_US-libritts-high::Leo-8')",
1676
- },
1677
- },
1678
- },
1679
- ),
1680
- ]
1681
-
1682
-
1683
- @app.call_tool()
1684
- async def call_tool(name: str, arguments: dict) -> list[TextContent]:
1685
- """Handle tool calls"""
1686
- try:
1687
- if name == "text_to_speech":
1688
- result = await agent_vibes.text_to_speech(
1689
- text=arguments["text"],
1690
- voice=arguments.get("voice"),
1691
- personality=arguments.get("personality"),
1692
- language=arguments.get("language"),
1693
- )
1694
- elif name == "list_voices":
1695
- result = await agent_vibes.list_voices()
1696
- elif name == "set_voice":
1697
- result = await agent_vibes.set_voice(arguments["voice_name"])
1698
- elif name == "list_personalities":
1699
- result = await agent_vibes.list_personalities()
1700
- elif name == "set_personality":
1701
- result = await agent_vibes.set_personality(arguments["personality"])
1702
- elif name == "set_language":
1703
- result = await agent_vibes.set_language(arguments["language"])
1704
- elif name == "get_config":
1705
- result = await agent_vibes.get_config()
1706
- elif name == "replay_audio":
1707
- n = arguments.get("n", 1)
1708
- result = await agent_vibes.replay_audio(n)
1709
- elif name == "set_provider":
1710
- result = await agent_vibes.set_provider(arguments["provider"])
1711
- elif name == "set_learn_mode":
1712
- result = await agent_vibes.set_learn_mode(arguments["enabled"])
1713
- elif name == "set_speed":
1714
- target = arguments.get("target", False)
1715
- result = await agent_vibes.set_speed(arguments["speed"], target)
1716
- elif name == "get_speed":
1717
- result = await agent_vibes.get_speed()
1718
- elif name == "download_extra_voices":
1719
- auto_yes = arguments.get("auto_yes", False)
1720
- result = await agent_vibes.download_extra_voices(auto_yes)
1721
- elif name == "get_verbosity":
1722
- result = await agent_vibes.get_verbosity()
1723
- elif name == "set_verbosity":
1724
- result = await agent_vibes.set_verbosity(arguments["level"])
1725
- elif name == "mute":
1726
- result = await agent_vibes.mute()
1727
- elif name == "unmute":
1728
- result = await agent_vibes.unmute()
1729
- elif name == "is_muted":
1730
- result = await agent_vibes.is_muted()
1731
- elif name == "list_background_music":
1732
- result = await agent_vibes.list_background_music()
1733
- elif name == "set_background_music":
1734
- track_name = arguments.get("track_name")
1735
- agent_name = arguments.get("agent_name")
1736
- result = await agent_vibes.set_background_music(track_name, agent_name)
1737
- elif name == "enable_background_music":
1738
- enabled = arguments.get("enabled")
1739
- result = await agent_vibes.enable_background_music(enabled)
1740
- elif name == "set_background_music_volume":
1741
- volume = arguments.get("volume")
1742
- result = await agent_vibes.set_background_music_volume(volume)
1743
- elif name == "get_background_music_status":
1744
- result = await agent_vibes.get_background_music_status()
1745
- elif name == "set_reverb":
1746
- level = arguments["level"]
1747
- agent = arguments.get("agent", "default")
1748
- apply_all = arguments.get("apply_all", False)
1749
- result = await agent_vibes.set_reverb(level, agent, apply_all)
1750
- elif name == "get_reverb":
1751
- agent = arguments.get("agent", "default")
1752
- result = await agent_vibes.get_reverb(agent)
1753
- elif name == "list_audio_effects":
1754
- result = await agent_vibes.list_audio_effects()
1755
- elif name == "clean_audio_cache":
1756
- result = await agent_vibes.clean_audio_cache()
1757
- elif name == "get_hermes_config":
1758
- result = await agent_vibes.get_hermes_config()
1759
- elif name == "set_hermes_config":
1760
- result = await agent_vibes.set_hermes_config(
1761
- mode=arguments.get("mode"),
1762
- ssh_key=arguments.get("ssh_key"),
1763
- host=arguments.get("host"),
1764
- port=arguments.get("port"),
1765
- voice=arguments.get("voice"),
1766
- )
1767
- else:
1768
- result = f"Unknown tool: {name}"
1769
-
1770
- return [TextContent(type="text", text=result)]
1771
-
1772
- except Exception as e:
1773
- return [TextContent(type="text", text=f"Error: {str(e)}")]
1774
-
1775
-
1776
- async def main():
1777
- """Run the MCP server"""
1778
- async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
1779
- await app.run(
1780
- read_stream,
1781
- write_stream,
1782
- app.create_initialization_options(),
1783
- )
1784
-
1785
-
1786
- if __name__ == "__main__":
1787
- asyncio.run(main())
1
+ #!/usr/bin/env python3
2
+ """
3
+ File: mcp-server/server.py
4
+
5
+ AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
6
+ Website: https://agentvibes.org
7
+ Repository: https://github.com/paulpreibisch/AgentVibes
8
+
9
+ Co-created by Paul Preibisch with Claude AI
10
+ Copyright (c) 2025 Paul Preibisch
11
+
12
+ Licensed under the Apache License, Version 2.0 (the "License");
13
+ you may not use this file except in compliance with the License.
14
+ You may obtain a copy of the License at
15
+
16
+ http://www.apache.org/licenses/LICENSE-2.0
17
+
18
+ Unless required by applicable law or agreed to in writing, software
19
+ distributed under the License is distributed on an "AS IS" BASIS,
20
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
+ See the License for the specific language governing permissions and
22
+ limitations under the License.
23
+
24
+ DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
+ express or implied, including but not limited to the warranties of
26
+ merchantability, fitness for a particular purpose and noninfringement.
27
+ In no event shall the authors or copyright holders be liable for any claim,
28
+ damages or other liability, whether in an action of contract, tort or
29
+ otherwise, arising from, out of or in connection with the software or the
30
+ use or other dealings in the software.
31
+
32
+ ---
33
+
34
+ @fileoverview MCP Server exposing AgentVibes TTS capabilities via Model Context Protocol
35
+ @context Provides natural language control of TTS features for Claude Desktop, Warp, and other MCP clients
36
+ @architecture MCP Server implementation wrapping bash scripts, async subprocess execution for non-blocking I/O
37
+ @dependencies .claude/hooks/*.sh scripts, MCP SDK, Python asyncio, subprocess
38
+ @entrypoints Called by Claude Desktop/Warp via MCP protocol (stdio transport)
39
+ @patterns Tool registry pattern, async subprocess wrapping, provider abstraction, state file management
40
+ @related GitHub repo, mcp-server/test_server.py, .claude/hooks/play-tts.sh, docs/ai-optimized-documentation-standards.md
41
+ """
42
+
43
+ import asyncio
44
+ import json
45
+ import os
46
+ import platform
47
+ import re as _re
48
+ import subprocess
49
+ from pathlib import Path
50
+ from typing import Optional
51
+
52
+ from mcp.server import Server
53
+ from mcp.types import Tool, TextContent, ImageContent, EmbeddedResource
54
+ import mcp.server.stdio
55
+ class AgentVibesServer:
56
+ """MCP Server for AgentVibes TTS functionality"""
57
+
58
+ # Script name constants (addresses SonarCloud S1192)
59
+ VOICE_MANAGER_SCRIPT = "voice-manager.sh"
60
+ PERSONALITY_MANAGER_SCRIPT = "personality-manager.sh"
61
+ LANGUAGE_MANAGER_SCRIPT = "language-manager.sh"
62
+ BACKGROUND_MUSIC_MANAGER_SCRIPT = "background-music-manager.sh"
63
+ EFFECTS_MANAGER_SCRIPT = "effects-manager.sh"
64
+
65
+ # Path constants (addresses SonarCloud S1192)
66
+ CLAUDE_DIR_NAME = ".claude"
67
+ MUTE_FILE_NAME = ".agentvibes-muted"
68
+ SEPARATOR = "━" * 39
69
+
70
+ def __init__(self):
71
+ """Initialize the AgentVibes MCP server"""
72
+ # Detect native Windows (not WSL)
73
+ self.is_windows = platform.system() == "Windows" and not os.environ.get("WSL_DISTRO_NAME")
74
+
75
+ # Script name constants — Windows uses .ps1, Unix uses .sh
76
+ if self.is_windows:
77
+ self.VOICE_MANAGER_SCRIPT = "voice-manager-windows.ps1"
78
+ self.PERSONALITY_MANAGER_SCRIPT = "personality-manager.ps1"
79
+ self.LANGUAGE_MANAGER_SCRIPT = "language-manager.ps1"
80
+ self.BACKGROUND_MUSIC_MANAGER_SCRIPT = "background-music-manager.ps1"
81
+ self.EFFECTS_MANAGER_SCRIPT = "effects-manager.ps1"
82
+
83
+ # Find the .claude directory (project-local or global)
84
+ self.claude_dir = self._find_claude_dir()
85
+ self.hooks_dir = self.claude_dir / ("hooks-windows" if self.is_windows else "hooks")
86
+ # Store AgentVibes root directory for environment variable
87
+ self.agentvibes_root = self.claude_dir.parent
88
+
89
+ def _find_claude_dir(self) -> Path:
90
+ """Find the .claude directory relative to this script"""
91
+ # Get the AgentVibes root directory (parent of mcp-server)
92
+ script_dir = Path(__file__).resolve().parent # mcp-server/
93
+ agentvibes_root = script_dir.parent # AgentVibes/
94
+ claude_dir = agentvibes_root / self.CLAUDE_DIR_NAME
95
+
96
+ # ALWAYS use package .claude for hooks (even in NPX cache)
97
+ # The package ALWAYS has .claude/ with all the hooks
98
+ if claude_dir.exists() and claude_dir.is_dir():
99
+ return claude_dir
100
+
101
+ # Fallback to global ~/.claude (should never happen in properly installed package)
102
+ return Path.home() / self.CLAUDE_DIR_NAME
103
+
104
+ def _resolve_friendly_name(self, voice_name: str) -> str:
105
+ """
106
+ Resolve friendly name to Piper voice ID using voice-metadata.json.
107
+
108
+ Args:
109
+ voice_name: Friendly name (e.g., "ryan") or Piper ID
110
+
111
+ Returns:
112
+ Resolved Piper voice ID, or original voice_name if not found
113
+ """
114
+ import re
115
+
116
+ metadata_path = self.agentvibes_root / ".agentvibes" / "config" / "voice-metadata.json"
117
+
118
+ # SECURITY: Verify file exists and is not a symlink
119
+ if not metadata_path.exists() or metadata_path.is_symlink():
120
+ return voice_name
121
+
122
+ # SECURITY: Verify file ownership matches current user (Unix only)
123
+ try:
124
+ if hasattr(os, 'getuid'):
125
+ stat_info = metadata_path.stat()
126
+ if stat_info.st_uid != os.getuid():
127
+ return voice_name
128
+ except (OSError, AttributeError):
129
+ pass
130
+
131
+ try:
132
+ with open(metadata_path, 'r') as f:
133
+ metadata = json.load(f)
134
+
135
+ voices = metadata.get('voices', {})
136
+ voice_lower = voice_name.lower()
137
+
138
+ resolved_id = None
139
+
140
+ # Check if it's a friendly name key
141
+ if voice_lower in voices:
142
+ resolved_id = voices[voice_lower].get('id')
143
+
144
+ # Check if it matches a displayName
145
+ if not resolved_id:
146
+ for friendly_name, voice_data in voices.items():
147
+ if voice_data.get('displayName', '').lower() == voice_lower:
148
+ resolved_id = voice_data.get('id')
149
+ break
150
+
151
+ # SECURITY: Validate resolved ID matches safe pattern
152
+ if resolved_id and re.match(r'^[a-zA-Z0-9_-]+$', resolved_id):
153
+ return resolved_id
154
+
155
+ except (json.JSONDecodeError, KeyError, IOError, TypeError):
156
+ pass
157
+
158
+ return voice_name
159
+
160
+ # ── LibriTTS display-name resolution ──────────────────────────────────────
161
+
162
+ _SURNAME_POOL = [
163
+ 'Bell', 'Carter', 'Davis', 'Ellis', 'Foster', 'Gray', 'Hayes', 'Irving',
164
+ 'Jones', 'Knox', 'Lane', 'Mason', 'Nash', 'Owens', 'Pierce', 'Quinn',
165
+ ]
166
+
167
+ @classmethod
168
+ def _uniquify_voice_name(cls, raw_name: str) -> str:
169
+ """Python port of uniquifyVoiceName from src/utils/voice-names.js"""
170
+ import re as _re
171
+ if not raw_name:
172
+ return raw_name
173
+ m = _re.match(r'^(.+)-(\d+)$', raw_name)
174
+ if m:
175
+ base, n = m.group(1), int(m.group(2))
176
+ if n >= 2:
177
+ return f"{base} {cls._SURNAME_POOL[(n - 1) % len(cls._SURNAME_POOL)]}"
178
+ if ' ' in raw_name:
179
+ return raw_name
180
+ return f"{raw_name} {cls._SURNAME_POOL[0]}"
181
+
182
+ def _build_libritts_catalog(self) -> dict:
183
+ """
184
+ Build a case-insensitive display-name → entry map from voice-assignments.json.
185
+ Returns dict keyed by lowercased display name / raw name / speaker name.
186
+ """
187
+ catalog: dict = {}
188
+ va_path = self.agentvibes_root / "voice-assignments.json"
189
+ if not va_path.exists():
190
+ return catalog
191
+ try:
192
+ data = json.loads(va_path.read_text())
193
+ for id_str, entry in data.get("libritts_speakers", {}).items():
194
+ speaker_id = int(id_str)
195
+ raw_name = entry.get("voice_name", "")
196
+ display_name = self._uniquify_voice_name(raw_name)
197
+ voice_id = f"en_US-libritts-high::{raw_name}"
198
+ info = {
199
+ "voice_id": voice_id,
200
+ "model": "en_US-libritts-high",
201
+ "speaker_name": raw_name,
202
+ "speaker_id": speaker_id,
203
+ "display_name": display_name,
204
+ "gender": entry.get("gender", ""),
205
+ }
206
+ for key in (display_name.lower(), raw_name.lower(),
207
+ raw_name.replace(" ", "_").lower()):
208
+ catalog.setdefault(key, info)
209
+ except (json.JSONDecodeError, KeyError, ValueError, OSError):
210
+ pass
211
+ return catalog
212
+
213
+ def _resolve_voice_input(self, voice_input: str) -> Optional[dict]:
214
+ """
215
+ Resolve a voice display name or ID to a dict with model/speakerId/voiceId.
216
+ Returns None if unresolvable.
217
+ Accepts: "Bella Bell", "Bella-2", "en_US-libritts-high::Bella",
218
+ "Kristin_Hughes", "en_US-amy-medium"
219
+ """
220
+ import re as _re
221
+ if not voice_input:
222
+ return None
223
+ MS_SEP = "::"
224
+
225
+ # Already a full voiceId with MS_SEP
226
+ if MS_SEP in voice_input:
227
+ parts = voice_input.split(MS_SEP, 1)
228
+ model, speaker_name = parts[0], parts[1]
229
+ if not _re.match(r'^[a-zA-Z0-9_-]+$', model):
230
+ return None
231
+ catalog = self._build_libritts_catalog()
232
+ entry = catalog.get(speaker_name.lower())
233
+ return {
234
+ "voice_id": voice_input,
235
+ "model": model,
236
+ "speaker_name": speaker_name,
237
+ "speaker_id": entry["speaker_id"] if entry else None,
238
+ "display_name": entry["display_name"] if entry else speaker_name,
239
+ }
240
+
241
+ # Plain piper model ID (e.g. en_US-amy-medium)
242
+ if _re.match(r'^en_[A-Z]{2}-[a-zA-Z0-9_]+-[a-z]+$', voice_input):
243
+ return {
244
+ "voice_id": voice_input, "model": voice_input,
245
+ "speaker_name": None, "speaker_id": None, "display_name": voice_input,
246
+ }
247
+
248
+ # LibriTTS display name / raw name lookup
249
+ catalog = self._build_libritts_catalog()
250
+ normalised = voice_input.replace("_", " ")
251
+ entry = catalog.get(normalised.lower()) or catalog.get(voice_input.lower())
252
+ return entry or None
253
+
254
+ def _get_config_dir(self) -> Path:
255
+ """Return the .claude dir to write voice config files into (project or global)."""
256
+ cwd = Path.cwd()
257
+ if (cwd / ".claude").is_dir() and cwd != self.agentvibes_root:
258
+ return cwd / ".claude"
259
+ return self.claude_dir
260
+
261
+ async def text_to_speech(
262
+ self,
263
+ text: str,
264
+ voice: Optional[str] = None,
265
+ personality: Optional[str] = None,
266
+ language: Optional[str] = None,
267
+ ) -> str:
268
+ """
269
+ Convert text to speech using AgentVibes.
270
+
271
+ Args:
272
+ text: The text to speak
273
+ voice: Optional voice name (e.g., "Aria", "Northern Terry")
274
+ personality: Optional personality style (e.g., "flirty", "sarcastic")
275
+ language: Optional language (e.g., "spanish", "french")
276
+
277
+ Returns:
278
+ Success message with audio file path
279
+ """
280
+ # Store original settings to restore later
281
+ original_personality = None
282
+ original_language = None
283
+
284
+ try:
285
+ # Temporarily set personality if specified
286
+ if personality:
287
+ original_personality = await self._get_personality()
288
+ await self._run_script(
289
+ self.PERSONALITY_MANAGER_SCRIPT, ["set", personality]
290
+ )
291
+
292
+ # Temporarily set language if specified
293
+ if language:
294
+ original_language = await self._get_language()
295
+ await self._run_script(self.LANGUAGE_MANAGER_SCRIPT, ["set", language])
296
+
297
+ # Resolve LLM key: AGENTVIBES_LLM > CLAUDECODE=1 > AGENTVIBES_MCP_FALLBACK > "default"
298
+ llm_key = os.environ.get("AGENTVIBES_LLM", "").strip()
299
+ if llm_key and not _re.match(r"^[a-zA-Z0-9][a-zA-Z0-9_-]*$", llm_key):
300
+ llm_key = ""
301
+ if not llm_key and os.environ.get("CLAUDECODE", "").strip() == "1":
302
+ llm_key = "claude-code"
303
+ if not llm_key:
304
+ fallback = os.environ.get("AGENTVIBES_MCP_FALLBACK", "").strip()
305
+ if fallback and _re.match(r"^[a-zA-Z0-9][a-zA-Z0-9_-]*$", fallback):
306
+ llm_key = fallback
307
+
308
+ # Call the TTS script via appropriate shell
309
+ tts_script = "play-tts.ps1" if self.is_windows else "play-tts.sh"
310
+ play_tts = self.hooks_dir / tts_script
311
+ if self.is_windows:
312
+ args = ["powershell", "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", str(play_tts), text]
313
+ if voice:
314
+ args.extend(["-VoiceOverride", voice])
315
+ if llm_key:
316
+ args.extend(["-llm", llm_key])
317
+ else:
318
+ args = ["bash", str(play_tts)]
319
+ if llm_key:
320
+ args.extend(["--llm", llm_key])
321
+ args.append(text)
322
+ if voice:
323
+ args.append(voice)
324
+
325
+ env = self._build_script_env()
326
+
327
+ result = await asyncio.create_subprocess_exec(
328
+ *args,
329
+ stdout=asyncio.subprocess.PIPE,
330
+ stderr=asyncio.subprocess.PIPE,
331
+ env=env,
332
+ )
333
+ try:
334
+ try:
335
+ stdout, stderr = await asyncio.wait_for(result.communicate(), timeout=60.0)
336
+ except asyncio.TimeoutError:
337
+ result.kill()
338
+ await result.wait()
339
+ return "❌ TTS timed out after 60 seconds"
340
+
341
+ if result.returncode == 0:
342
+ output = stdout.decode().strip()
343
+ # Strip ANSI escape codes for clean extraction
344
+ _ansi_strip = _re.compile(r'\x1b\[[0-9;]*m')
345
+ audio_file_path = None
346
+ voice_info = None
347
+ for line in output.split("\n"):
348
+ clean = _ansi_strip.sub('', line).strip()
349
+ if "Saved to:" in clean and audio_file_path is None:
350
+ raw_path = clean.split("Saved to:")[1].strip()
351
+ # Path ends at .wav (strip trailing size/count info)
352
+ wav_end = raw_path.find(".wav")
353
+ audio_file_path = raw_path[:wav_end + 4] if wav_end != -1 else raw_path.split()[0]
354
+ if ("Voice used:" in clean or ("Voice:" in clean and "Background" not in clean)) and voice_info is None:
355
+ voice_info = clean
356
+
357
+ if audio_file_path:
358
+ truncated = (
359
+ f"{text[:50]}..." if len(text) > 50 else text
360
+ )
361
+ result_msg = f" Spoke: {truncated}\n📁 Audio saved: {audio_file_path}"
362
+ if voice_info:
363
+ result_msg += f"\n{voice_info}"
364
+ return result_msg
365
+
366
+ return f"✅ Spoke: {text[:50]}..." if len(text) > 50 else f"✅ Spoke: {text}"
367
+ else:
368
+ error = stderr.decode().strip()
369
+ stdout_output = stdout.decode().strip()
370
+ full_error = f"{error}\nStdout: {stdout_output}" if stdout_output else error
371
+ return f"❌ TTS failed: {full_error}"
372
+ finally:
373
+ # Ensure process cleanup
374
+ if result.returncode is None:
375
+ result.kill()
376
+ await result.wait()
377
+
378
+ finally:
379
+ # Restore original settings
380
+ if original_personality:
381
+ await self._run_script(
382
+ self.PERSONALITY_MANAGER_SCRIPT, ["set", original_personality]
383
+ )
384
+ if original_language:
385
+ await self._run_script(
386
+ self.LANGUAGE_MANAGER_SCRIPT, ["set", original_language]
387
+ )
388
+
389
+ async def list_voices(self) -> str:
390
+ """
391
+ List all available TTS voices for the active provider.
392
+
393
+ Returns:
394
+ Formatted list of available voices
395
+ """
396
+ # Get active provider for display purposes
397
+ provider = await self._get_provider()
398
+ current_voice = await self._get_current_voice()
399
+
400
+ # voice-manager.sh list-simple is now provider-aware
401
+ result = await self._run_script(self.VOICE_MANAGER_SCRIPT, ["list-simple"])
402
+ if result:
403
+ voices = result.strip().split("\n")
404
+ voices = [v for v in voices if v] # Filter empty strings
405
+
406
+ if not voices:
407
+ return (
408
+ f"📦 No voices available\n"
409
+ f"{self.SEPARATOR}\n"
410
+ f"For Piper: Download voices using /agent-vibes:provider download <voice-name>\n"
411
+ f"Example: en_US-lessac-medium, en_GB-alba-medium"
412
+ )
413
+
414
+ # Determine provider label and alternative provider
415
+ if "Piper" in provider:
416
+ provider_label = "Piper TTS"
417
+ alternative_provider = "macOS"
418
+ elif "macOS" in provider:
419
+ provider_label = "macOS TTS"
420
+ alternative_provider = "Piper"
421
+ elif "Termux" in provider or "Android" in provider:
422
+ provider_label = "Termux SSH (Android)"
423
+ alternative_provider = "Piper"
424
+ else:
425
+ provider_label = "TTS"
426
+ alternative_provider = None
427
+
428
+ output = f"🎤 Available {provider_label} Voices:\n"
429
+ output += f"{self.SEPARATOR}\n"
430
+ for voice in voices:
431
+ marker = " (current)" if voice == current_voice else ""
432
+ output += f" • {voice}{marker}\n"
433
+
434
+ # Expand LibriTTS named speakers when en_US-libritts-high is installed
435
+ piper_voices_dir = Path.home() / ".local" / "share" / "piper-voices"
436
+ libritts_onnx = piper_voices_dir / "en_US-libritts-high.onnx"
437
+ if libritts_onnx.exists():
438
+ catalog = self._build_libritts_catalog()
439
+ if catalog:
440
+ output += f"\n 📖 LibriTTS named speakers (en_US-libritts-high):\n"
441
+ # De-duplicate: only one entry per display name
442
+ seen: set = set()
443
+ for entry in catalog.values():
444
+ dn = entry["display_name"]
445
+ if dn in seen:
446
+ continue
447
+ seen.add(dn)
448
+ spk = entry["speaker_name"]
449
+ sid = entry["speaker_id"]
450
+ gender = entry.get("gender", "")
451
+ g_icon = "♀" if gender.lower() == "female" else ("♂" if gender.lower() == "male" else "—")
452
+ marker = " (current)" if entry["voice_id"] == current_voice else ""
453
+ output += f" • {dn} ({g_icon} speaker {sid}){marker}\n"
454
+
455
+ output += f"{self.SEPARATOR}\n"
456
+
457
+ # Add provider switch hint
458
+ if alternative_provider:
459
+ output += f"\n💡 Switch to {alternative_provider}? Use: set_provider(provider=\"{alternative_provider.lower()}\")\n"
460
+
461
+ return output
462
+ return "❌ Failed to list voices"
463
+
464
+ async def set_voice(self, voice_name: str) -> str:
465
+ """
466
+ Switch to a different voice (supports friendly names like "ryan" or "katherine").
467
+
468
+ Args:
469
+ voice_name: Friendly name (e.g., "ryan") or Piper voice ID
470
+
471
+ Returns:
472
+ Success or error message
473
+ """
474
+ # Try new display-name resolver first (handles "Bella Bell", "::" ids, etc.)
475
+ resolved = self._resolve_voice_input(voice_name)
476
+
477
+ if resolved:
478
+ voice_id = resolved["voice_id"]
479
+ display_name = resolved["display_name"]
480
+ model = resolved["model"]
481
+ speaker_id = resolved["speaker_id"]
482
+ speaker_name = resolved["speaker_name"]
483
+
484
+ # Write the three config files directly (no voice-manager.sh needed)
485
+ config_dir = self._get_config_dir()
486
+ try:
487
+ config_dir.mkdir(parents=True, exist_ok=True)
488
+ (config_dir / "tts-voice.txt").write_text(display_name + "\n")
489
+ if speaker_name:
490
+ (config_dir / "tts-piper-model.txt").write_text(model + "\n")
491
+ if speaker_id is not None:
492
+ (config_dir / "tts-piper-speaker-id.txt").write_text(str(speaker_id) + "\n")
493
+ else:
494
+ # Clear speaker-id so piper uses default
495
+ try: (config_dir / "tts-piper-speaker-id.txt").unlink()
496
+ except FileNotFoundError: pass
497
+ else:
498
+ # Single-speaker model clear multi-speaker files
499
+ for f in ("tts-piper-model.txt", "tts-piper-speaker-id.txt"):
500
+ try: (config_dir / f).unlink()
501
+ except FileNotFoundError: pass
502
+ except OSError as e:
503
+ return f"❌ Failed to write voice config: {e}"
504
+
505
+ detail = f" (speaker {speaker_id}, model {model})" if speaker_id is not None else ""
506
+ return f"✅ Voice set to: {display_name}{detail}"
507
+
508
+ # Fall back to legacy friendly-name resolver (voice-metadata.json)
509
+ original_name = voice_name
510
+ resolved_name = self._resolve_friendly_name(voice_name)
511
+ result = await self._run_script(
512
+ self.VOICE_MANAGER_SCRIPT, ["switch", resolved_name, "--silent"]
513
+ )
514
+ if result and "✅" in result:
515
+ if original_name.lower() != resolved_name.lower():
516
+ return f"✅ Voice switched to: {original_name} ({resolved_name})"
517
+ return f"✅ Voice switched to: {voice_name}"
518
+ return f"❌ Failed to switch voice could not resolve '{voice_name}'. Try 'list_voices' to see available names."
519
+
520
+ async def list_personalities(self) -> str:
521
+ """
522
+ List all available personalities.
523
+
524
+ Returns:
525
+ Formatted list of personalities with descriptions
526
+ """
527
+ result = await self._run_script(self.PERSONALITY_MANAGER_SCRIPT, ["list"])
528
+ return result if result else "❌ Failed to list personalities"
529
+
530
+ async def set_personality(self, personality: str) -> str:
531
+ """
532
+ Set the personality style for TTS messages.
533
+
534
+ Args:
535
+ personality: Personality name (e.g., "flirty", "sarcastic", "pirate")
536
+
537
+ Returns:
538
+ Success or error message
539
+ """
540
+ result = await self._run_script(
541
+ self.PERSONALITY_MANAGER_SCRIPT, ["set", personality]
542
+ )
543
+ if result and "🎭" in result:
544
+ return result
545
+ return f"❌ Failed to set personality: {result}"
546
+
547
+ async def get_config(self) -> str:
548
+ """
549
+ Get current AgentVibes configuration.
550
+
551
+ Returns:
552
+ Current voice, personality, language, provider, and LLM settings
553
+ """
554
+ voice = await self._get_current_voice()
555
+ personality = await self._get_personality()
556
+ language = await self._get_language()
557
+ provider = await self._get_provider()
558
+
559
+ # Resolve the LLM key using the same priority as text_to_speech:
560
+ # 1. AGENTVIBES_LLM 2. CLAUDECODE=1 3. AGENTVIBES_MCP_FALLBACK 4. "default"
561
+ llm_key = os.environ.get("AGENTVIBES_LLM", "").strip()
562
+ if llm_key and not _re.match(r"^[a-zA-Z0-9][a-zA-Z0-9_-]*$", llm_key):
563
+ llm_key = ""
564
+ if not llm_key and os.environ.get("CLAUDECODE", "").strip() == "1":
565
+ llm_key = "claude-code"
566
+ if not llm_key:
567
+ fallback = os.environ.get("AGENTVIBES_MCP_FALLBACK", "").strip()
568
+ if fallback and _re.match(r"^[a-zA-Z0-9][a-zA-Z0-9_-]*$", fallback):
569
+ llm_key = fallback
570
+ if not llm_key:
571
+ llm_key = "default"
572
+
573
+ output = "🎤 Current AgentVibes Configuration\n"
574
+ output += f"{self.SEPARATOR}\n"
575
+ output += f"LLM: {llm_key}\n"
576
+ output += f"Provider: {provider}\n"
577
+ output += f"Voice: {voice}\n"
578
+ output += f"Personality: {personality}\n"
579
+ output += f"Language: {language}\n"
580
+ output += f"{self.SEPARATOR}\n"
581
+ return output
582
+
583
+ async def set_language(self, language: str) -> str:
584
+ """
585
+ Set the language for TTS speech.
586
+
587
+ Args:
588
+ language: Language name (e.g., "spanish", "french", "german")
589
+
590
+ Returns:
591
+ Success or error message
592
+ """
593
+ result = await self._run_script(self.LANGUAGE_MANAGER_SCRIPT, ["set", language])
594
+ if result and "✓" in result:
595
+ return result
596
+ return f"❌ Failed to set language: {result}"
597
+
598
+ async def replay_audio(self, n: int = 1) -> str:
599
+ """
600
+ Replay recently generated TTS audio.
601
+
602
+ Args:
603
+ n: Which audio to replay (1 = most recent, 2 = second most recent, etc.)
604
+
605
+ Returns:
606
+ Success or error message
607
+ """
608
+ result = await self._run_script(self.VOICE_MANAGER_SCRIPT, ["replay", str(n)])
609
+ if result and "🔊" in result:
610
+ return result
611
+ return f"❌ Failed to replay audio: {result}"
612
+
613
+ async def set_provider(self, provider: str) -> str:
614
+ """
615
+ Switch TTS provider between Piper, macOS, and Termux SSH.
616
+
617
+ Args:
618
+ provider: Provider name ("piper", "macos", or "termux-ssh")
619
+
620
+ Returns:
621
+ Success or error message
622
+ """
623
+ provider = provider.lower()
624
+ if self.is_windows:
625
+ valid_providers = ["windows-piper", "windows-sapi", "soprano"]
626
+ else:
627
+ valid_providers = ["piper", "macos", "termux-ssh", "soprano"]
628
+ if provider not in valid_providers:
629
+ return f"❌ Invalid provider: {provider}. Choose from: {', '.join(valid_providers)}"
630
+
631
+ result = await self._run_script("provider-manager.sh", ["switch", provider])
632
+ if result and ("✓" in result or "[OK]" in result):
633
+ # Automatically speak confirmation in the new provider's voice
634
+ provider_names = {
635
+ "macos": "macOS",
636
+ "termux-ssh": "Termux SSH",
637
+ "piper": "Piper",
638
+ "windows-piper": "Windows Piper",
639
+ "windows-sapi": "Windows SAPI",
640
+ "soprano": "Soprano",
641
+ }
642
+ provider_name = provider_names.get(provider, provider.title())
643
+ confirmation_text = f"Successfully switched to {provider_name} provider"
644
+
645
+ try:
646
+ # Speak the confirmation with 5 second timeout to prevent hanging
647
+ await asyncio.wait_for(
648
+ self.text_to_speech(confirmation_text),
649
+ timeout=5.0
650
+ )
651
+ # Return the provider switch result plus TTS confirmation
652
+ return f"{result}\n🔊 Spoken confirmation: {confirmation_text}"
653
+ except asyncio.TimeoutError:
654
+ # Timeout - provider may need setup (e.g., Piper not installed)
655
+ return f"{result}\n⚠️ Provider switched (TTS confirmation timed out - provider may need setup)"
656
+ except Exception as e:
657
+ # If TTS fails, still return success for the provider switch
658
+ return f"{result}\n⚠️ Provider switched but TTS confirmation failed: {e}"
659
+
660
+ return f"❌ Failed to switch provider: {result}"
661
+
662
+ async def set_learn_mode(self, enabled: bool) -> str:
663
+ """
664
+ Enable or disable language learning mode.
665
+
666
+ When enabled, TTS speaks in both your main language and target language.
667
+
668
+ Args:
669
+ enabled: True to enable, False to disable
670
+
671
+ Returns:
672
+ Success or error message
673
+ """
674
+ action = "enable" if enabled else "disable"
675
+ result = await self._run_script("learn-manager.sh", [action])
676
+ if result and "✓" in result:
677
+ return result
678
+ return f"❌ Failed to set learn mode: {result}"
679
+
680
+ async def set_speed(self, speed: str, target: bool = False) -> str:
681
+ """
682
+ Set speech speed for main or target voice.
683
+
684
+ Works with both Piper and macOS providers.
685
+
686
+ Args:
687
+ speed: Speed value (e.g., "0.5x", "1x", "2x", "normal", "fast", "slow")
688
+ target: If True, sets target language speed; if False, sets main voice speed
689
+
690
+ Returns:
691
+ Success or error message
692
+ """
693
+ # Security: Using secrets.choice for cryptographically secure random selection
694
+ # Even though this is just for UI variety, we use secrets to satisfy security scanners
695
+ import secrets
696
+
697
+ args = ["target", speed] if target else [speed]
698
+ result = await self._run_script("speed-manager.sh", args)
699
+ if result and "✓" in result:
700
+ # Simple test messages to demonstrate the new speed
701
+ test_messages = [
702
+ "Testing speed change",
703
+ "Speed test in progress",
704
+ "Checking audio speed",
705
+ "Speed configuration test",
706
+ "Audio speed test",
707
+ ]
708
+
709
+ # Pick a random test message and speak it
710
+ test_message = secrets.choice(test_messages)
711
+
712
+ try:
713
+ # Speak the test message to demonstrate the new speed
714
+ await self.text_to_speech(test_message)
715
+ return f"{result}\n🔊 Testing new speed: \"{test_message}\""
716
+ except Exception as e:
717
+ # If TTS fails, still return success for the speed change
718
+ return f"{result}\n⚠️ Speed changed but demo failed: {e}"
719
+
720
+ return f"❌ Failed to set speed: {result}"
721
+
722
+ async def get_speed(self) -> str:
723
+ """
724
+ Get current speech speed settings.
725
+
726
+ Returns:
727
+ Current speed settings for main and target voices
728
+ """
729
+ result = await self._run_script("speed-manager.sh", ["get"])
730
+ return result if result else "❌ Failed to get speed settings"
731
+
732
+ async def download_extra_voices(self, auto_yes: bool = False) -> str:
733
+ """
734
+ Download extra high-quality Piper voices from HuggingFace.
735
+
736
+ Downloads custom voices: Kristin, Jenny, and Tracy/16Speakers.
737
+
738
+ Args:
739
+ auto_yes: If True, skips confirmation prompt and downloads automatically
740
+
741
+ Returns:
742
+ Success message with download summary
743
+ """
744
+ args = ["--yes"] if auto_yes else []
745
+ result = await self._run_script("download-extra-voices.sh", args)
746
+ if result and ("" in result or "Successfully downloaded" in result or "already downloaded" in result):
747
+ return result
748
+ return f"❌ Failed to download extra voices: {result}"
749
+
750
+ async def get_verbosity(self) -> str:
751
+ """
752
+ Get current verbosity level.
753
+
754
+ Returns:
755
+ Current verbosity level with description
756
+ """
757
+ result = await self._run_script("verbosity-manager.sh", ["get"])
758
+ if result:
759
+ level = result.strip()
760
+ descriptions = {
761
+ "low": "LOW - Acknowledgments + Completions only (minimal)",
762
+ "medium": "MEDIUM - + Major decisions and findings (balanced)",
763
+ "high": "HIGH - All reasoning (maximum transparency)"
764
+ }
765
+ desc = descriptions.get(level, level)
766
+ return f"🎙️ Current Verbosity: {desc}\n\n💡 Change with: set_verbosity(level=\"low|medium|high\")"
767
+ return "❌ Failed to get verbosity level"
768
+
769
+ async def set_verbosity(self, level: str) -> str:
770
+ """
771
+ Set verbosity level to control how much Claude speaks.
772
+
773
+ Args:
774
+ level: Verbosity level (low, medium, or high)
775
+
776
+ Returns:
777
+ Success or error message
778
+ """
779
+ result = await self._run_script("verbosity-manager.sh", ["set", level])
780
+ if result and "✅" in result:
781
+ return f"{result}\n\n⚠️ Restart Claude Code for changes to take effect"
782
+ return f"❌ Failed to set verbosity: {result}"
783
+
784
+ def _get_mute_files(self) -> list:
785
+ """Get all mute file paths for current platform"""
786
+ files = [
787
+ Path.home() / self.MUTE_FILE_NAME,
788
+ Path.cwd() / self.CLAUDE_DIR_NAME / "agentvibes-muted",
789
+ ]
790
+ # Windows PowerShell scripts check tts-muted.txt in .claude dir
791
+ if self.is_windows:
792
+ files.append(Path.home() / self.CLAUDE_DIR_NAME / "tts-muted.txt")
793
+ return files
794
+
795
+ async def mute(self) -> str:
796
+ """
797
+ Mute all TTS output. Creates a persistent mute flag.
798
+
799
+ Returns:
800
+ Success message confirming mute is active
801
+ """
802
+ try:
803
+ mute_file = Path.home() / self.MUTE_FILE_NAME
804
+ mute_file.touch()
805
+ # On Windows, also write tts-muted.txt for PowerShell script compatibility
806
+ if self.is_windows:
807
+ win_mute = Path.home() / self.CLAUDE_DIR_NAME / "tts-muted.txt"
808
+ win_mute.parent.mkdir(parents=True, exist_ok=True)
809
+ win_mute.write_text("true")
810
+ return "🔇 AgentVibes TTS muted. All voice output is now silenced.\n\n💡 To unmute, use: unmute()"
811
+ except Exception as e:
812
+ return f"❌ Failed to mute: {e}"
813
+
814
+ async def unmute(self) -> str:
815
+ """
816
+ Unmute TTS output. Removes the mute flag.
817
+
818
+ Returns:
819
+ Success message confirming TTS is restored
820
+ """
821
+ removed = []
822
+ try:
823
+ for mute_file in self._get_mute_files():
824
+ if mute_file.exists():
825
+ # tts-muted.txt uses content "true"/"false", others use file existence
826
+ if mute_file.name == "tts-muted.txt":
827
+ content = mute_file.read_text().strip()
828
+ if content == "true":
829
+ mute_file.write_text("false")
830
+ removed.append(str(mute_file.name))
831
+ else:
832
+ mute_file.unlink()
833
+ removed.append(str(mute_file.name))
834
+
835
+ if removed:
836
+ return f"🔊 AgentVibes TTS unmuted. Voice output is now restored.\n (Removed: {', '.join(removed)} mute flag)"
837
+ else:
838
+ return "🔊 AgentVibes TTS was not muted. Voice output is active."
839
+ except Exception as e:
840
+ return f"❌ Failed to unmute: {e}"
841
+
842
+ async def is_muted(self) -> str:
843
+ """
844
+ Check if TTS is currently muted.
845
+
846
+ Returns:
847
+ Current mute status
848
+ """
849
+ for mute_file in self._get_mute_files():
850
+ if mute_file.exists():
851
+ # tts-muted.txt uses content "true"/"false"
852
+ if mute_file.name == "tts-muted.txt":
853
+ content = mute_file.read_text().strip()
854
+ if content == "true":
855
+ return "🔇 TTS is currently MUTED\n\n💡 To unmute, use: unmute()"
856
+ else:
857
+ return "🔇 TTS is currently MUTED\n\n💡 To unmute, use: unmute()"
858
+ return "🔊 TTS is currently ACTIVE\n\n💡 To mute, use: mute()"
859
+
860
+ async def list_background_music(self) -> str:
861
+ """
862
+ List all available background music tracks.
863
+
864
+ Returns:
865
+ Formatted list of all pre-packaged background music files
866
+ """
867
+ result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["list"])
868
+ return result if result else "❌ Failed to list background music"
869
+
870
+ async def set_background_music(self, track_name: str, agent_name: Optional[str] = None) -> str:
871
+ """
872
+ Set background music track for a specific agent, all agents, or as default.
873
+
874
+ Args:
875
+ track_name: Track filename or partial name for fuzzy matching
876
+ agent_name: Agent name ('all' for all agents, None for default)
877
+
878
+ Returns:
879
+ Success or error message
880
+ """
881
+ import re
882
+
883
+ # Get list of available tracks for fuzzy matching
884
+ list_result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["list"])
885
+ if not list_result or "❌" in list_result:
886
+ return "❌ Failed to list background music tracks"
887
+
888
+ # Parse track names
889
+ tracks = []
890
+ for line in list_result.split("\n"):
891
+ match = re.match(r'\s*\d+\.\s+(.+)', line.strip())
892
+ if match:
893
+ tracks.append(match.group(1).strip())
894
+
895
+ # Try to find a matching track (case-insensitive partial match)
896
+ track_lower = track_name.lower()
897
+ matched_track = None
898
+
899
+ # First try exact match
900
+ for track in tracks:
901
+ if track.lower() == track_lower:
902
+ matched_track = track
903
+ break
904
+
905
+ # If no exact match, try partial match
906
+ if not matched_track:
907
+ for track in tracks:
908
+ if track_lower in track.lower():
909
+ matched_track = track
910
+ break
911
+
912
+ if not matched_track:
913
+ # Show available tracks to help user
914
+ available = "\n".join([f" • {t}" for t in tracks])
915
+ return f"❌ No track matching '{track_name}' found.\n\nAvailable tracks:\n{available}\n\n💡 Try a partial match like 'celtic' or 'chillwave'"
916
+
917
+ # Determine which command to use based on agent_name
918
+ if agent_name and agent_name.lower() == "all":
919
+ # Set for all agents
920
+ result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["set-all", matched_track])
921
+ elif agent_name:
922
+ # Set for specific agent
923
+ result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["set-agent", agent_name, matched_track])
924
+ else:
925
+ # Set as default
926
+ result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["set-default", matched_track])
927
+
928
+ if result and ("✅" in result or "[OK]" in result):
929
+ if matched_track.lower() != track_name.lower():
930
+ return f"{result}\n\n🔍 Matched '{track_name}' to '{matched_track}'"
931
+ return result
932
+ return f"❌ Failed to set background music: {result}"
933
+
934
+ async def enable_background_music(self, enabled: bool) -> str:
935
+ """
936
+ Enable or disable background music globally.
937
+
938
+ Args:
939
+ enabled: True to enable, False to disable
940
+
941
+ Returns:
942
+ Success or error message
943
+ """
944
+ command = "on" if enabled else "off"
945
+ result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, [command])
946
+ # Sync to .agentvibes/config.json (TUI source of truth)
947
+ try:
948
+ import json
949
+ cfg_path = self.agentvibes_root / ".agentvibes" / "config.json"
950
+ cfg = {}
951
+ if cfg_path.exists():
952
+ cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
953
+ if "backgroundMusic" not in cfg:
954
+ cfg["backgroundMusic"] = {}
955
+ cfg["backgroundMusic"]["enabled"] = enabled
956
+ cfg_path.parent.mkdir(parents=True, exist_ok=True)
957
+ cfg_path.write_text(json.dumps(cfg, indent=2) + "\n", encoding="utf-8")
958
+ except Exception:
959
+ pass # best-effort sync
960
+ return result if result else f"❌ Failed to {'enable' if enabled else 'disable'} background music"
961
+
962
+ async def set_background_music_volume(self, volume: float) -> str:
963
+ """
964
+ Set background music volume.
965
+
966
+ Args:
967
+ volume: Volume level (0.0-1.0)
968
+
969
+ Returns:
970
+ Success or error message
971
+ """
972
+ result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["volume", str(volume)])
973
+ return result if result else "❌ Failed to set background music volume"
974
+
975
+ async def get_background_music_status(self) -> str:
976
+ """
977
+ Get current background music configuration.
978
+
979
+ Returns:
980
+ Status information
981
+ """
982
+ result = await self._run_script(self.BACKGROUND_MUSIC_MANAGER_SCRIPT, ["status"])
983
+ return result if result else "❌ Failed to get background music status"
984
+
985
+ async def set_reverb(self, level: str, agent: str = "default", apply_all: bool = False) -> str:
986
+ """
987
+ Set reverb level for an agent or globally.
988
+
989
+ Args:
990
+ level: Reverb level (off, light, medium, heavy, cathedral)
991
+ agent: Agent name (default: "default")
992
+ apply_all: Apply to all agents (default: False)
993
+
994
+ Returns:
995
+ Success message
996
+ """
997
+ args = ["set-reverb", level, agent]
998
+ if apply_all:
999
+ args.append("--all")
1000
+ result = await self._run_script(self.EFFECTS_MANAGER_SCRIPT, args)
1001
+ return result if result else f"✅ Set reverb to {level}"
1002
+
1003
+ async def get_reverb(self, agent: str = "default") -> str:
1004
+ """
1005
+ Get current reverb level for an agent.
1006
+
1007
+ Args:
1008
+ agent: Agent name (default: "default")
1009
+
1010
+ Returns:
1011
+ Current reverb level
1012
+ """
1013
+ result = await self._run_script(self.EFFECTS_MANAGER_SCRIPT, ["get-reverb", agent])
1014
+ if result:
1015
+ return f"Current reverb level for {agent}: {result.strip()}"
1016
+ return f"❌ Failed to get reverb for {agent}"
1017
+
1018
+ async def list_audio_effects(self) -> str:
1019
+ """
1020
+ List all audio effects for all agents.
1021
+
1022
+ Returns:
1023
+ Effects configuration
1024
+ """
1025
+ result = await self._run_script(self.EFFECTS_MANAGER_SCRIPT, ["list"])
1026
+ return result if result else "❌ Failed to list audio effects"
1027
+
1028
+ async def clean_audio_cache(self) -> str:
1029
+ """
1030
+ Clean all TTS audio cache files and report space freed.
1031
+
1032
+ Non-interactive cleanup suitable for MCP tool usage. Deletes all
1033
+ TTS-generated audio files (wav, mp3, aiff) while preserving
1034
+ background music tracks.
1035
+
1036
+ Returns:
1037
+ Cleanup results with file count and space freed
1038
+ """
1039
+ result = await self._run_script("clean-audio-cache.sh", [])
1040
+ return result if result else "❌ Failed to clean audio cache"
1041
+
1042
+ # ── Hermes config helpers ────────────────────────────────────────────────
1043
+
1044
+ def _hermes_cfg_path(self) -> Path:
1045
+ hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
1046
+ return hermes_home / "hooks" / "agentvibes-tts" / "agentvibes-ssh-config.json"
1047
+
1048
+ async def get_hermes_config(self) -> str:
1049
+ """
1050
+ Get current Hermes AgentVibes SSH configuration.
1051
+
1052
+ Returns:
1053
+ Current SSH key, host, port, and voice settings
1054
+ """
1055
+ cfg_path = self._hermes_cfg_path()
1056
+ defaults = {
1057
+ "mode": "local",
1058
+ "sshKey": "/absolute/path/to/id_ed25519_agentvibes",
1059
+ "host": "your-receiver-tailscale-ip",
1060
+ "port": "2222",
1061
+ "voice": "en_US-libritts-high::Leo-8",
1062
+ }
1063
+ try:
1064
+ cfg = json.loads(cfg_path.read_text(encoding="utf-8"))
1065
+ except Exception:
1066
+ cfg = {}
1067
+ merged = {**defaults, **cfg}
1068
+ installed = cfg_path.exists()
1069
+ is_local = merged.get("mode", "local") == "local"
1070
+ out = "🔌 Hermes AgentVibes Configuration\n"
1071
+ out += "─" * 40 + "\n"
1072
+ out += f"Status: {'✅ Configured' if installed else '⚠️ Not yet installed (run: agentvibes install)'}\n"
1073
+ out += f"Mode: {'🏠 Local (Hermes & speakers on same machine)' if is_local else '🌐 Remote (SSH to receiver)'}\n"
1074
+ out += f"Voice: {merged['voice']}\n"
1075
+ if not is_local:
1076
+ out += f"SSH Key: {merged['sshKey']}\n"
1077
+ out += f"Host: {merged['host']}\n"
1078
+ out += f"Port: {merged['port']}\n"
1079
+ if installed:
1080
+ out += f"\nConfig file: {cfg_path}\n"
1081
+ out += "After changes, run: hermes gateway restart\n"
1082
+ return out
1083
+
1084
+ async def set_hermes_config(
1085
+ self,
1086
+ mode: Optional[str] = None,
1087
+ ssh_key: Optional[str] = None,
1088
+ host: Optional[str] = None,
1089
+ port: Optional[str] = None,
1090
+ voice: Optional[str] = None,
1091
+ ) -> str:
1092
+ """
1093
+ Save Hermes AgentVibes SSH configuration.
1094
+
1095
+ Returns:
1096
+ Success message with saved values
1097
+ """
1098
+ import re as _re
1099
+ cfg_path = self._hermes_cfg_path()
1100
+ defaults = {
1101
+ "mode": "local",
1102
+ "sshKey": "/absolute/path/to/id_ed25519_agentvibes",
1103
+ "host": "your-receiver-tailscale-ip",
1104
+ "port": "2222",
1105
+ "voice": "en_US-libritts-high::Leo-8",
1106
+ }
1107
+ try:
1108
+ existing = json.loads(cfg_path.read_text(encoding="utf-8"))
1109
+ except Exception:
1110
+ existing = {}
1111
+ merged = {**defaults, **existing}
1112
+
1113
+ if mode is not None:
1114
+ m = str(mode).lower().strip()
1115
+ if m not in ("local", "remote"):
1116
+ return "❌ Invalid mode: must be 'local' or 'remote'"
1117
+ merged["mode"] = m
1118
+ if ssh_key is not None:
1119
+ sk = str(ssh_key).strip()
1120
+ if not _re.match(r'^[/~][a-zA-Z0-9_./ -]{0,511}$', sk):
1121
+ return "❌ Invalid ssh_key: must be an absolute path (no special chars)"
1122
+ merged["sshKey"] = sk
1123
+ if host is not None:
1124
+ h = str(host).strip()
1125
+ if not _re.match(r'^[a-zA-Z0-9._\[\]:-]{1,253}$', h):
1126
+ return "❌ Invalid host: must be a hostname or IP address"
1127
+ merged["host"] = h
1128
+ if port is not None:
1129
+ p = str(port).strip()
1130
+ if not _re.match(r"^\d{1,5}$", p):
1131
+ return "❌ Invalid port: must be a number (e.g. '2222')"
1132
+ merged["port"] = p
1133
+ if voice is not None:
1134
+ merged["voice"] = str(voice)[:200]
1135
+
1136
+ try:
1137
+ cfg_path.parent.mkdir(parents=True, exist_ok=True)
1138
+ cfg_path.parent.chmod(0o700)
1139
+ cfg_path.write_text(json.dumps(merged, indent=2), encoding="utf-8")
1140
+ cfg_path.chmod(0o600)
1141
+ except Exception as e:
1142
+ return f"❌ Failed to save config: {e}"
1143
+
1144
+ is_local = merged.get("mode", "local") == "local"
1145
+ out = "✅ Hermes config saved!\n"
1146
+ out += "─" * 40 + "\n"
1147
+ out += f"Mode: {'🏠 Local' if is_local else '🌐 Remote (SSH)'}\n"
1148
+ out += f"Voice: {merged['voice']}\n"
1149
+ if not is_local:
1150
+ out += f"SSH Key: {merged['sshKey']}\n"
1151
+ out += f"Host: {merged['host']}\n"
1152
+ out += f"Port: {merged['port']}\n"
1153
+ out += f"\nConfig file: {cfg_path}\n"
1154
+ out += "Run: hermes gateway restart\n"
1155
+ return out
1156
+
1157
+ # Helper methods
1158
+ def _build_script_env(self) -> dict:
1159
+ """Build environment dict for script execution (shared by all script runners)"""
1160
+ env = os.environ.copy()
1161
+
1162
+ # Determine where to save settings based on context:
1163
+ # 1. If cwd has .claude/ → Use cwd (real Claude Code project)
1164
+ # 2. Otherwise → Use global ~/.claude/ (Claude Desktop, Warp, etc.)
1165
+ # Note: Hooks are ALWAYS from package .claude/ (self.claude_dir)
1166
+ cwd = Path.cwd()
1167
+ if (cwd / ".claude").is_dir() and cwd != self.agentvibes_root:
1168
+ env["CLAUDE_PROJECT_DIR"] = str(cwd)
1169
+
1170
+ # Add common locations for piper to PATH (Unix only)
1171
+ if not self.is_windows:
1172
+ home_dir = Path.home()
1173
+ local_bin = str(home_dir / ".local" / "bin")
1174
+ if "PATH" in env:
1175
+ if local_bin not in env["PATH"]:
1176
+ env["PATH"] = f"{local_bin}:{env['PATH']}"
1177
+ else:
1178
+ env["PATH"] = local_bin
1179
+
1180
+ return env
1181
+
1182
+ async def _run_script(self, script_name: str, args: list[str]) -> str:
1183
+ """Run a script and return output (bash on Unix, PowerShell on Windows)"""
1184
+ # Auto-resolve .sh → .ps1 on Windows (class constants handle special cases)
1185
+ if self.is_windows and script_name.endswith('.sh'):
1186
+ script_name = script_name[:-3] + '.ps1'
1187
+ script_path = self.hooks_dir / script_name
1188
+ if not script_path.exists():
1189
+ return f"Script not found: {script_path}"
1190
+
1191
+ # Build command — PowerShell on Windows, bash on Unix
1192
+ if self.is_windows:
1193
+ cmd = [
1194
+ "powershell", "-NoProfile", "-ExecutionPolicy", "Bypass",
1195
+ "-File", str(script_path)
1196
+ ] + args
1197
+ else:
1198
+ cmd = ["bash", str(script_path)] + args
1199
+
1200
+ env = self._build_script_env()
1201
+
1202
+ try:
1203
+ result = await asyncio.create_subprocess_exec(
1204
+ *cmd,
1205
+ stdout=asyncio.subprocess.PIPE,
1206
+ stderr=asyncio.subprocess.PIPE,
1207
+ env=env,
1208
+ )
1209
+ try:
1210
+ stdout, stderr = await result.communicate()
1211
+ if result.returncode == 0:
1212
+ return stdout.decode().strip()
1213
+ else:
1214
+ error_msg = stderr.decode().strip()
1215
+ if not error_msg: # If stderr is empty, include stdout for debugging
1216
+ error_msg = f"Return code {result.returncode}. Stdout: {stdout.decode().strip()}"
1217
+ return error_msg
1218
+ finally:
1219
+ # Ensure process cleanup
1220
+ if result.returncode is None:
1221
+ result.kill()
1222
+ await result.wait()
1223
+ except Exception as e:
1224
+ return f"Error running script: {e}"
1225
+
1226
+ async def _get_current_voice(self) -> str:
1227
+ """Get the currently active voice"""
1228
+ result = await self._run_script(self.VOICE_MANAGER_SCRIPT, ["get"])
1229
+ return result.strip() if result else "Unknown"
1230
+
1231
+ async def _get_personality(self) -> str:
1232
+ """Get the current personality setting"""
1233
+ personality_file = self.claude_dir / "tts-personality.txt"
1234
+ if not personality_file.exists():
1235
+ # Try global
1236
+ personality_file = Path.home() / self.CLAUDE_DIR_NAME / "tts-personality.txt"
1237
+
1238
+ try:
1239
+ if personality_file.exists():
1240
+ return personality_file.read_text().strip()
1241
+ except (PermissionError, UnicodeDecodeError, OSError) as e:
1242
+ # Log error but don't crash - return default
1243
+ import sys
1244
+ print(f"Warning: Could not read personality file: {e}", file=sys.stderr)
1245
+ return "normal"
1246
+
1247
+ async def _get_language(self) -> str:
1248
+ """Get the current language setting"""
1249
+ result = await self._run_script(self.LANGUAGE_MANAGER_SCRIPT, ["code"])
1250
+ return result.strip() if result else "english"
1251
+
1252
+ async def _get_provider(self) -> str:
1253
+ """Get the active TTS provider"""
1254
+ provider_file = self.claude_dir / "tts-provider.txt"
1255
+ if not provider_file.exists():
1256
+ provider_file = Path.home() / self.CLAUDE_DIR_NAME / "tts-provider.txt"
1257
+
1258
+ provider_labels = {
1259
+ "macos": "macOS TTS",
1260
+ "piper": "Piper TTS (Free, Offline)",
1261
+ "termux-ssh": "Termux SSH (Android)",
1262
+ "windows-piper": "Windows Piper TTS (Free, Offline)",
1263
+ "windows-sapi": "Windows SAPI (Built-in)",
1264
+ "soprano": "Soprano TTS (Ultra-fast Neural)",
1265
+ }
1266
+ try:
1267
+ if provider_file.exists():
1268
+ provider = provider_file.read_text().strip()
1269
+ # Strip BOM from PowerShell-written files
1270
+ provider = provider.lstrip('\ufeff')
1271
+ return provider_labels.get(provider, provider)
1272
+ except (PermissionError, UnicodeDecodeError, OSError) as e:
1273
+ # Log error but don't crash - return default
1274
+ import sys
1275
+ print(f"Warning: Could not read provider file: {e}", file=sys.stderr)
1276
+ # Default based on platform
1277
+ if self.is_windows:
1278
+ return "Windows SAPI (Built-in)"
1279
+ return "Piper TTS (Free, Offline)"
1280
+
1281
+
1282
+ # Create the MCP server
1283
+ app = Server("agentvibes")
1284
+ agent_vibes = AgentVibesServer()
1285
+
1286
+
1287
+ @app.list_tools()
1288
+ async def list_tools() -> list[Tool]:
1289
+ """List all available AgentVibes tools"""
1290
+ return [
1291
+ Tool(
1292
+ name="text_to_speech",
1293
+ description="""Convert text to speech using AgentVibes TTS.
1294
+
1295
+ Supports both macOS TTS and Piper (free, offline) providers.
1296
+ Can use different voices, personalities, and languages.
1297
+
1298
+ Perfect for:
1299
+ - Speaking acknowledgments and confirmations
1300
+ - Adding voice to Claude responses
1301
+ - Multi-language communication
1302
+ - Personality-driven interactions
1303
+
1304
+ Examples:
1305
+ - text_to_speech(text="Hello, I'm ready to help!")
1306
+ - text_to_speech(text="Task completed!", personality="flirty")
1307
+ - text_to_speech(text="Hola, ¿cómo estás?", language="spanish")
1308
+ """,
1309
+ inputSchema={
1310
+ "type": "object",
1311
+ "properties": {
1312
+ "text": {
1313
+ "type": "string",
1314
+ "description": "Text to convert to speech (max 500 characters)",
1315
+ },
1316
+ "voice": {
1317
+ "type": "string",
1318
+ "description": "Voice name (optional). Use list_voices to see options.",
1319
+ },
1320
+ "personality": {
1321
+ "type": "string",
1322
+ "description": "Personality style (optional). Examples: flirty, sarcastic, pirate, robot, zen",
1323
+ },
1324
+ "language": {
1325
+ "type": "string",
1326
+ "description": "Language to speak in (optional). Examples: spanish, french, german, italian",
1327
+ },
1328
+ },
1329
+ "required": ["text"],
1330
+ },
1331
+ ),
1332
+ Tool(
1333
+ name="list_voices",
1334
+ description="List all available TTS voices with current selection",
1335
+ inputSchema={"type": "object", "properties": {}},
1336
+ ),
1337
+ Tool(
1338
+ name="set_voice",
1339
+ description="Switch to a different TTS voice",
1340
+ inputSchema={
1341
+ "type": "object",
1342
+ "properties": {
1343
+ "voice_name": {
1344
+ "type": "string",
1345
+ "description": "Name of the voice to switch to",
1346
+ }
1347
+ },
1348
+ "required": ["voice_name"],
1349
+ },
1350
+ ),
1351
+ Tool(
1352
+ name="list_personalities",
1353
+ description="List all available personality styles with descriptions",
1354
+ inputSchema={"type": "object", "properties": {}},
1355
+ ),
1356
+ Tool(
1357
+ name="set_personality",
1358
+ description="Set the personality style for TTS messages",
1359
+ inputSchema={
1360
+ "type": "object",
1361
+ "properties": {
1362
+ "personality": {
1363
+ "type": "string",
1364
+ "description": "Personality name (e.g., flirty, sarcastic, pirate)",
1365
+ }
1366
+ },
1367
+ "required": ["personality"],
1368
+ },
1369
+ ),
1370
+ Tool(
1371
+ name="set_language",
1372
+ description="Set the language for TTS speech (supports 25+ languages)",
1373
+ inputSchema={
1374
+ "type": "object",
1375
+ "properties": {
1376
+ "language": {
1377
+ "type": "string",
1378
+ "description": "Language name (e.g., spanish, french, german)",
1379
+ }
1380
+ },
1381
+ "required": ["language"],
1382
+ },
1383
+ ),
1384
+ Tool(
1385
+ name="get_config",
1386
+ description="Get current voice, personality, language, and provider configuration",
1387
+ inputSchema={"type": "object", "properties": {}},
1388
+ ),
1389
+ Tool(
1390
+ name="replay_audio",
1391
+ description="Replay recently generated TTS audio",
1392
+ inputSchema={
1393
+ "type": "object",
1394
+ "properties": {
1395
+ "n": {
1396
+ "type": "integer",
1397
+ "description": "Which audio to replay (1 = most recent, default: 1)",
1398
+ "minimum": 1,
1399
+ "maximum": 10,
1400
+ }
1401
+ },
1402
+ },
1403
+ ),
1404
+ Tool(
1405
+ name="set_provider",
1406
+ description="Switch between TTS providers" + (
1407
+ ": Windows Piper, Windows SAPI, or Soprano" if agent_vibes.is_windows
1408
+ else ": macOS TTS, Piper (free, offline), Soprano, or Termux SSH (Android)"
1409
+ ),
1410
+ inputSchema={
1411
+ "type": "object",
1412
+ "properties": {
1413
+ "provider": {
1414
+ "type": "string",
1415
+ "description": (
1416
+ "Provider name: 'windows-piper', 'windows-sapi', or 'soprano'"
1417
+ if agent_vibes.is_windows
1418
+ else "Provider name: 'piper', 'macos', 'soprano', or 'termux-ssh'"
1419
+ ),
1420
+ "enum": (
1421
+ ["windows-piper", "windows-sapi", "soprano"]
1422
+ if agent_vibes.is_windows
1423
+ else ["piper", "macos", "soprano", "termux-ssh"]
1424
+ ),
1425
+ }
1426
+ },
1427
+ "required": ["provider"],
1428
+ },
1429
+ ),
1430
+ Tool(
1431
+ name="set_learn_mode",
1432
+ description="Enable or disable language learning mode. When ON, TTS speaks in both your main language and target language for bilingual learning.",
1433
+ inputSchema={
1434
+ "type": "object",
1435
+ "properties": {
1436
+ "enabled": {
1437
+ "type": "boolean",
1438
+ "description": "True to enable learning mode, False to disable"
1439
+ }
1440
+ },
1441
+ "required": ["enabled"],
1442
+ },
1443
+ ),
1444
+ Tool(
1445
+ name="set_speed",
1446
+ description="Set speech speed for main or target voice. Works with both Piper and macOS providers. Use this to make voices faster or slower.",
1447
+ inputSchema={
1448
+ "type": "object",
1449
+ "properties": {
1450
+ "speed": {
1451
+ "type": "string",
1452
+ "description": "Speed value: '0.5x' or 'slow/slower' (half speed, slower), '1x' or 'normal' (normal speed), '2x' or 'fast' (double speed, faster), '3x' or 'faster' (triple speed, very fast)"
1453
+ },
1454
+ "target": {
1455
+ "type": "boolean",
1456
+ "description": "If true, sets target language speed (for learning mode); if false or omitted, sets main voice speed",
1457
+ "default": False
1458
+ }
1459
+ },
1460
+ "required": ["speed"],
1461
+ },
1462
+ ),
1463
+ Tool(
1464
+ name="get_speed",
1465
+ description="Get current speech speed settings for main and target voices",
1466
+ inputSchema={"type": "object", "properties": {}},
1467
+ ),
1468
+ Tool(
1469
+ name="download_extra_voices",
1470
+ description="Download extra high-quality custom Piper voices from HuggingFace. Includes: Kristin (US female), Jenny (UK female with Irish accent), and Tracy/16Speakers (multi-speaker). Perfect for adding variety to your TTS voices.",
1471
+ inputSchema={
1472
+ "type": "object",
1473
+ "properties": {
1474
+ "auto_yes": {
1475
+ "type": "boolean",
1476
+ "description": "Skip confirmation prompt and download automatically (default: False)",
1477
+ "default": False
1478
+ }
1479
+ },
1480
+ },
1481
+ ),
1482
+ Tool(
1483
+ name="get_verbosity",
1484
+ description="Get current AgentVibes verbosity level (low/medium/high/caveman). Verbosity controls how much Claude speaks while working - from minimal (acknowledgments only) to maximum transparency (all reasoning spoken) to caveman (ultra-terse fragments, max token savings).",
1485
+ inputSchema={"type": "object", "properties": {}},
1486
+ ),
1487
+ Tool(
1488
+ name="set_verbosity",
1489
+ description="""Set AgentVibes verbosity level to control how much Claude speaks while working.
1490
+
1491
+ Verbosity Levels:
1492
+ - LOW: Only acknowledgments (start) and completions (end). Minimal interruption.
1493
+ - MEDIUM: + Major decisions and key findings. Balanced transparency.
1494
+ - HIGH: All reasoning, decisions, and findings. Maximum transparency.
1495
+ - CAVEMAN: Ultra-terse fragments. Drops articles, filler, hedging. Abbreviates heavily. 65-75% fewer output tokens.
1496
+
1497
+ Perfect for:
1498
+ - LOW: Quiet work sessions, minimal distraction
1499
+ - MEDIUM: Understanding major decisions without full narration
1500
+ - HIGH: Full transparency, learning mode, debugging complex tasks
1501
+ - CAVEMAN: Maximum token savings, minimal prose
1502
+
1503
+ Note: Changes take effect on next Claude Code session restart.""",
1504
+ inputSchema={
1505
+ "type": "object",
1506
+ "properties": {
1507
+ "level": {
1508
+ "type": "string",
1509
+ "description": "Verbosity level to set",
1510
+ "enum": ["low", "medium", "high", "caveman"]
1511
+ }
1512
+ },
1513
+ "required": ["level"],
1514
+ },
1515
+ ),
1516
+ Tool(
1517
+ name="mute",
1518
+ description="Mute all AgentVibes TTS output. Creates a persistent mute flag that silences all voice output until unmuted. Persists across sessions.",
1519
+ inputSchema={"type": "object", "properties": {}},
1520
+ ),
1521
+ Tool(
1522
+ name="unmute",
1523
+ description="Unmute AgentVibes TTS output. Removes the mute flag and restores voice output.",
1524
+ inputSchema={"type": "object", "properties": {}},
1525
+ ),
1526
+ Tool(
1527
+ name="is_muted",
1528
+ description="Check if TTS is currently muted.",
1529
+ inputSchema={"type": "object", "properties": {}},
1530
+ ),
1531
+ Tool(
1532
+ name="list_background_music",
1533
+ description="List all available pre-packaged background music tracks. Shows all audio files that can be used as background music for TTS.",
1534
+ inputSchema={"type": "object", "properties": {}},
1535
+ ),
1536
+ Tool(
1537
+ name="set_background_music",
1538
+ description="""Set background music track for a specific agent, all agents, or as default. Supports smart fuzzy matching.
1539
+
1540
+ Perfect for:
1541
+ - "change background music to flamenco" - Sets for all agents
1542
+ - "set John's background music to celtic harp" - Agent-specific
1543
+ - "use chillwave as default background" - Default for new agents
1544
+
1545
+ Fuzzy matching examples:
1546
+ - "flamenco" matches "agentvibes_soft_flamenco_loop.mp3"
1547
+ - "celtic" matches "agent_vibes_celtic_harp_v1_loop.mp3"
1548
+ - "bossa" matches "agent_vibes_bossa_nova_v2_loop.mp3"
1549
+ """,
1550
+ inputSchema={
1551
+ "type": "object",
1552
+ "properties": {
1553
+ "track_name": {
1554
+ "type": "string",
1555
+ "description": "Track filename or partial name for fuzzy matching (e.g., 'celtic', 'flamenco', 'bossa nova')",
1556
+ },
1557
+ "agent_name": {
1558
+ "type": "string",
1559
+ "description": "Agent name to configure (optional). Use 'all' for all agents, omit for default",
1560
+ },
1561
+ },
1562
+ "required": ["track_name"],
1563
+ },
1564
+ ),
1565
+ Tool(
1566
+ name="enable_background_music",
1567
+ description="Enable or disable background music globally. When enabled, TTS audio will be mixed with background music at configured volume (default 30%).",
1568
+ inputSchema={
1569
+ "type": "object",
1570
+ "properties": {
1571
+ "enabled": {
1572
+ "type": "boolean",
1573
+ "description": "True to enable background music, False to disable",
1574
+ }
1575
+ },
1576
+ "required": ["enabled"],
1577
+ },
1578
+ ),
1579
+ Tool(
1580
+ name="set_background_music_volume",
1581
+ description="Set the volume level for background music (0.0-1.0). Recommended: 0.20-0.40 for subtle background ambiance.",
1582
+ inputSchema={
1583
+ "type": "object",
1584
+ "properties": {
1585
+ "volume": {
1586
+ "type": "number",
1587
+ "description": "Volume level (0.0 = silent, 0.30 = default, 1.0 = full volume)",
1588
+ "minimum": 0.0,
1589
+ "maximum": 1.0,
1590
+ }
1591
+ },
1592
+ "required": ["volume"],
1593
+ },
1594
+ ),
1595
+ Tool(
1596
+ name="get_background_music_status",
1597
+ description="Get current background music configuration including enabled status, volume, default track, and number of available tracks.",
1598
+ inputSchema={"type": "object", "properties": {}},
1599
+ ),
1600
+ Tool(
1601
+ name="set_reverb",
1602
+ description="""Set reverb level for TTS audio. Can apply globally (default agent), to a specific agent, or to all agents.
1603
+
1604
+ Reverb adds room/space ambiance to the voice, making it sound like it's in a small room, conference room, or large hall.
1605
+
1606
+ Examples:
1607
+ - set_reverb(level="medium") - Set reverb for default agent
1608
+ - set_reverb(level="cathedral", agent="Winston") - Set cathedral reverb for Winston
1609
+ - set_reverb(level="light", apply_all=True) - Set light reverb for all agents
1610
+ - set_reverb(level="off") - Turn off reverb for default agent
1611
+ """,
1612
+ inputSchema={
1613
+ "type": "object",
1614
+ "properties": {
1615
+ "level": {
1616
+ "type": "string",
1617
+ "description": "Reverb level",
1618
+ "enum": ["off", "light", "medium", "heavy", "cathedral"]
1619
+ },
1620
+ "agent": {
1621
+ "type": "string",
1622
+ "description": "Agent name (optional, defaults to 'default'). Examples: Winston, John, Mary, Amelia",
1623
+ },
1624
+ "apply_all": {
1625
+ "type": "boolean",
1626
+ "description": "Apply to all agents (optional, default: false)",
1627
+ }
1628
+ },
1629
+ "required": ["level"],
1630
+ },
1631
+ ),
1632
+ Tool(
1633
+ name="get_reverb",
1634
+ description="Get current reverb level for a specific agent or default",
1635
+ inputSchema={
1636
+ "type": "object",
1637
+ "properties": {
1638
+ "agent": {
1639
+ "type": "string",
1640
+ "description": "Agent name (optional, defaults to 'default')",
1641
+ }
1642
+ },
1643
+ },
1644
+ ),
1645
+ Tool(
1646
+ name="list_audio_effects",
1647
+ description="List current audio effects configuration for all agents, including reverb levels and other effects",
1648
+ inputSchema={"type": "object", "properties": {}},
1649
+ ),
1650
+ Tool(
1651
+ name="clean_audio_cache",
1652
+ description="Clean all TTS audio cache files and report space freed. Non-interactive cleanup that removes all wav/mp3/aiff files while preserving background music tracks.",
1653
+ inputSchema={"type": "object", "properties": {}},
1654
+ ),
1655
+ Tool(
1656
+ name="get_hermes_config",
1657
+ description="Get current Hermes AgentVibes SSH configuration (SSH key path, host, port, voice). Use this to check what's currently set before changing it.",
1658
+ inputSchema={"type": "object", "properties": {}},
1659
+ ),
1660
+ Tool(
1661
+ name="set_hermes_config",
1662
+ description="Configure Hermes AgentVibes TTS settings. Choose 'local' mode when Hermes runs on the same machine as your speakers (no SSH needed), or 'remote' mode to send audio over SSH to a receiver. Omit any field to keep its current value.",
1663
+ inputSchema={
1664
+ "type": "object",
1665
+ "properties": {
1666
+ "mode": {
1667
+ "type": "string",
1668
+ "enum": ["local", "remote"],
1669
+ "description": "'local' = Hermes and speakers on same machine (no SSH). 'remote' = send audio over SSH to a receiver machine.",
1670
+ },
1671
+ "ssh_key": {
1672
+ "type": "string",
1673
+ "description": "Absolute path to SSH private key (e.g. /home/user/.ssh/id_ed25519_agentvibes) — only used in remote mode",
1674
+ },
1675
+ "host": {
1676
+ "type": "string",
1677
+ "description": "Tailscale IP or hostname of the machine with speakers — only used in remote mode",
1678
+ },
1679
+ "port": {
1680
+ "type": "string",
1681
+ "description": "AgentVibes receiver SSH port (e.g. '2222') — only used in remote mode",
1682
+ },
1683
+ "voice": {
1684
+ "type": "string",
1685
+ "description": "Piper voice model (e.g. 'en_US-libritts-high::Leo-8')",
1686
+ },
1687
+ },
1688
+ },
1689
+ ),
1690
+ ]
1691
+
1692
+
1693
+ @app.call_tool()
1694
+ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
1695
+ """Handle tool calls"""
1696
+ try:
1697
+ if name == "text_to_speech":
1698
+ result = await agent_vibes.text_to_speech(
1699
+ text=arguments["text"],
1700
+ voice=arguments.get("voice"),
1701
+ personality=arguments.get("personality"),
1702
+ language=arguments.get("language"),
1703
+ )
1704
+ elif name == "list_voices":
1705
+ result = await agent_vibes.list_voices()
1706
+ elif name == "set_voice":
1707
+ result = await agent_vibes.set_voice(arguments["voice_name"])
1708
+ elif name == "list_personalities":
1709
+ result = await agent_vibes.list_personalities()
1710
+ elif name == "set_personality":
1711
+ result = await agent_vibes.set_personality(arguments["personality"])
1712
+ elif name == "set_language":
1713
+ result = await agent_vibes.set_language(arguments["language"])
1714
+ elif name == "get_config":
1715
+ result = await agent_vibes.get_config()
1716
+ elif name == "replay_audio":
1717
+ n = arguments.get("n", 1)
1718
+ result = await agent_vibes.replay_audio(n)
1719
+ elif name == "set_provider":
1720
+ result = await agent_vibes.set_provider(arguments["provider"])
1721
+ elif name == "set_learn_mode":
1722
+ result = await agent_vibes.set_learn_mode(arguments["enabled"])
1723
+ elif name == "set_speed":
1724
+ target = arguments.get("target", False)
1725
+ result = await agent_vibes.set_speed(arguments["speed"], target)
1726
+ elif name == "get_speed":
1727
+ result = await agent_vibes.get_speed()
1728
+ elif name == "download_extra_voices":
1729
+ auto_yes = arguments.get("auto_yes", False)
1730
+ result = await agent_vibes.download_extra_voices(auto_yes)
1731
+ elif name == "get_verbosity":
1732
+ result = await agent_vibes.get_verbosity()
1733
+ elif name == "set_verbosity":
1734
+ result = await agent_vibes.set_verbosity(arguments["level"])
1735
+ elif name == "mute":
1736
+ result = await agent_vibes.mute()
1737
+ elif name == "unmute":
1738
+ result = await agent_vibes.unmute()
1739
+ elif name == "is_muted":
1740
+ result = await agent_vibes.is_muted()
1741
+ elif name == "list_background_music":
1742
+ result = await agent_vibes.list_background_music()
1743
+ elif name == "set_background_music":
1744
+ track_name = arguments.get("track_name")
1745
+ agent_name = arguments.get("agent_name")
1746
+ result = await agent_vibes.set_background_music(track_name, agent_name)
1747
+ elif name == "enable_background_music":
1748
+ enabled = arguments.get("enabled")
1749
+ result = await agent_vibes.enable_background_music(enabled)
1750
+ elif name == "set_background_music_volume":
1751
+ volume = arguments.get("volume")
1752
+ result = await agent_vibes.set_background_music_volume(volume)
1753
+ elif name == "get_background_music_status":
1754
+ result = await agent_vibes.get_background_music_status()
1755
+ elif name == "set_reverb":
1756
+ level = arguments["level"]
1757
+ agent = arguments.get("agent", "default")
1758
+ apply_all = arguments.get("apply_all", False)
1759
+ result = await agent_vibes.set_reverb(level, agent, apply_all)
1760
+ elif name == "get_reverb":
1761
+ agent = arguments.get("agent", "default")
1762
+ result = await agent_vibes.get_reverb(agent)
1763
+ elif name == "list_audio_effects":
1764
+ result = await agent_vibes.list_audio_effects()
1765
+ elif name == "clean_audio_cache":
1766
+ result = await agent_vibes.clean_audio_cache()
1767
+ elif name == "get_hermes_config":
1768
+ result = await agent_vibes.get_hermes_config()
1769
+ elif name == "set_hermes_config":
1770
+ result = await agent_vibes.set_hermes_config(
1771
+ mode=arguments.get("mode"),
1772
+ ssh_key=arguments.get("ssh_key"),
1773
+ host=arguments.get("host"),
1774
+ port=arguments.get("port"),
1775
+ voice=arguments.get("voice"),
1776
+ )
1777
+ else:
1778
+ result = f"Unknown tool: {name}"
1779
+
1780
+ return [TextContent(type="text", text=result)]
1781
+
1782
+ except Exception as e:
1783
+ return [TextContent(type="text", text=f"Error: {str(e)}")]
1784
+
1785
+
1786
+ async def main():
1787
+ """Run the MCP server"""
1788
+ async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
1789
+ await app.run(
1790
+ read_stream,
1791
+ write_stream,
1792
+ app.create_initialization_options(),
1793
+ )
1794
+
1795
+
1796
+ if __name__ == "__main__":
1797
+ asyncio.run(main())