agentvibes 4.2.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/.agentvibes/bmad/bmad-voices.md +69 -69
  2. package/.agentvibes/config.json +12 -0
  3. package/.claude/activation-instructions +54 -54
  4. package/.claude/audio/tracks/README.md +52 -52
  5. package/.claude/commands/agent-vibes/add.md +21 -21
  6. package/.claude/commands/agent-vibes/agent-vibes.md +101 -101
  7. package/.claude/commands/agent-vibes/agent.md +79 -79
  8. package/.claude/commands/agent-vibes/background-music.md +111 -111
  9. package/.claude/commands/agent-vibes/bmad.md +198 -198
  10. package/.claude/commands/agent-vibes/clean.md +18 -18
  11. package/.claude/commands/agent-vibes/cleanup.md +18 -18
  12. package/.claude/commands/agent-vibes/commands.json +145 -145
  13. package/.claude/commands/agent-vibes/effects.md +97 -97
  14. package/.claude/commands/agent-vibes/get.md +9 -9
  15. package/.claude/commands/agent-vibes/hide.md +91 -91
  16. package/.claude/commands/agent-vibes/language.md +23 -23
  17. package/.claude/commands/agent-vibes/learn.md +67 -67
  18. package/.claude/commands/agent-vibes/list.md +13 -13
  19. package/.claude/commands/agent-vibes/mute.md +37 -37
  20. package/.claude/commands/agent-vibes/preview.md +17 -17
  21. package/.claude/commands/agent-vibes/provider.md +68 -68
  22. package/.claude/commands/agent-vibes/replay-target.md +14 -14
  23. package/.claude/commands/agent-vibes/sample.md +12 -12
  24. package/.claude/commands/agent-vibes/set-favorite-voice.md +84 -84
  25. package/.claude/commands/agent-vibes/set-pretext.md +65 -65
  26. package/.claude/commands/agent-vibes/set-speed.md +41 -41
  27. package/.claude/commands/agent-vibes/show.md +84 -84
  28. package/.claude/commands/agent-vibes/switch.md +87 -87
  29. package/.claude/commands/agent-vibes/target-voice.md +26 -26
  30. package/.claude/commands/agent-vibes/target.md +30 -30
  31. package/.claude/commands/agent-vibes/translate.md +68 -68
  32. package/.claude/commands/agent-vibes/unmute.md +45 -45
  33. package/.claude/commands/agent-vibes/verbosity.md +89 -89
  34. package/.claude/commands/agent-vibes/whoami.md +7 -7
  35. package/.claude/commands/agent-vibes-bmad-voices.md +117 -117
  36. package/.claude/commands/agent-vibes-rdp.md +24 -24
  37. package/.claude/config/agentvibes.json +1 -0
  38. package/.claude/config/audio-effects.cfg +2 -2
  39. package/.claude/config/audio-effects.cfg.sample +52 -52
  40. package/.claude/config/background-music-volume.txt +1 -0
  41. package/.claude/config/intro-text.txt +1 -0
  42. package/.claude/config/piper-speech-rate.txt +4 -0
  43. package/.claude/config/piper-target-speech-rate.txt +1 -0
  44. package/.claude/config/reverb-level.txt +1 -0
  45. package/.claude/config/tts-speech-rate.txt +4 -0
  46. package/.claude/config/tts-target-speech-rate.txt +1 -0
  47. package/.claude/docs/TERMUX_SETUP.md +408 -408
  48. package/.claude/github-star-reminder.txt +1 -1
  49. package/.claude/hooks/README-TTS-QUEUE.md +135 -135
  50. package/.claude/hooks/audio-cache-utils.sh +246 -246
  51. package/.claude/hooks/audio-processor.sh +433 -433
  52. package/.claude/hooks/background-music-manager.sh +404 -404
  53. package/.claude/hooks/bmad-speak-enhanced.sh +165 -165
  54. package/.claude/hooks/bmad-speak.sh +269 -269
  55. package/.claude/hooks/bmad-tts-injector.sh +568 -568
  56. package/.claude/hooks/bmad-voice-manager.sh +928 -928
  57. package/.claude/hooks/clawdbot-receiver-SECURE.sh +129 -129
  58. package/.claude/hooks/clawdbot-receiver.sh +107 -107
  59. package/.claude/hooks/clean-audio-cache.sh +22 -22
  60. package/.claude/hooks/cleanup-cache.sh +106 -106
  61. package/.claude/hooks/configure-rdp-mode.sh +137 -137
  62. package/.claude/hooks/download-extra-voices.sh +244 -244
  63. package/.claude/hooks/effects-manager.sh +268 -268
  64. package/.claude/hooks/github-star-reminder.sh +154 -154
  65. package/.claude/hooks/language-manager.sh +362 -362
  66. package/.claude/hooks/learn-manager.sh +492 -492
  67. package/.claude/hooks/macos-voice-manager.sh +205 -205
  68. package/.claude/hooks/migrate-background-music.sh +125 -125
  69. package/.claude/hooks/migrate-to-agentvibes.sh +161 -161
  70. package/.claude/hooks/optimize-background-music.sh +87 -87
  71. package/.claude/hooks/path-resolver.sh +60 -60
  72. package/.claude/hooks/personality-manager.sh +448 -448
  73. package/.claude/hooks/piper-download-voices.sh +225 -225
  74. package/.claude/hooks/piper-installer.sh +292 -292
  75. package/.claude/hooks/piper-multispeaker-registry.sh +171 -171
  76. package/.claude/hooks/piper-voice-manager.sh +24 -3
  77. package/.claude/hooks/play-tts-agentvibes-receiver-for-voiceless-connections.sh +90 -90
  78. package/.claude/hooks/play-tts-enhanced.sh +105 -105
  79. package/.claude/hooks/play-tts-macos.sh +368 -368
  80. package/.claude/hooks/play-tts-piper.sh +679 -679
  81. package/.claude/hooks/play-tts-soprano.sh +356 -356
  82. package/.claude/hooks/play-tts-ssh-remote.sh +167 -167
  83. package/.claude/hooks/play-tts-termux-ssh.sh +169 -169
  84. package/.claude/hooks/play-tts.sh +301 -301
  85. package/.claude/hooks/prepare-release.sh +54 -54
  86. package/.claude/hooks/provider-commands.sh +617 -617
  87. package/.claude/hooks/provider-manager.sh +399 -399
  88. package/.claude/hooks/replay-target-audio.sh +95 -95
  89. package/.claude/hooks/requirements.txt +6 -6
  90. package/.claude/hooks/sentiment-manager.sh +201 -201
  91. package/.claude/hooks/session-start-tts.sh +81 -81
  92. package/.claude/hooks/soprano-gradio-synth.py +139 -139
  93. package/.claude/hooks/speed-manager.sh +291 -291
  94. package/.claude/hooks/stop-tts.sh +84 -84
  95. package/.claude/hooks/termux-installer.sh +261 -261
  96. package/.claude/hooks/translate-manager.sh +341 -341
  97. package/.claude/hooks/translator.py +237 -237
  98. package/.claude/hooks/tts-queue-worker.sh +145 -145
  99. package/.claude/hooks/tts-queue.sh +165 -165
  100. package/.claude/hooks/verbosity-manager.sh +178 -178
  101. package/.claude/hooks/voice-manager.sh +548 -548
  102. package/.claude/hooks-windows/audio-cache-utils.ps1 +119 -119
  103. package/.claude/hooks-windows/background-music-manager.ps1 +348 -0
  104. package/.claude/hooks-windows/clean-audio-cache.ps1 +53 -0
  105. package/.claude/hooks-windows/download-extra-voices.ps1 +185 -0
  106. package/.claude/hooks-windows/effects-manager.ps1 +294 -0
  107. package/.claude/hooks-windows/language-manager.ps1 +193 -0
  108. package/.claude/hooks-windows/learn-manager.ps1 +241 -0
  109. package/.claude/hooks-windows/personality-manager.ps1 +266 -0
  110. package/.claude/hooks-windows/play-tts-piper.ps1 +209 -0
  111. package/.claude/hooks-windows/play-tts-sapi.ps1 +108 -0
  112. package/.claude/hooks-windows/play-tts-soprano.ps1 +159 -158
  113. package/.claude/hooks-windows/play-tts-windows-piper.ps1 +50 -5
  114. package/.claude/hooks-windows/play-tts-windows-sapi.ps1 +108 -108
  115. package/.claude/hooks-windows/play-tts.ps1 +344 -266
  116. package/.claude/hooks-windows/provider-manager.ps1 +29 -10
  117. package/.claude/hooks-windows/session-start-tts.ps1 +124 -124
  118. package/.claude/hooks-windows/soprano-gradio-synth.py +153 -153
  119. package/.claude/hooks-windows/speed-manager.ps1 +166 -0
  120. package/.claude/hooks-windows/verbosity-manager.ps1 +119 -0
  121. package/.claude/hooks-windows/voice-manager-windows.ps1 +92 -8
  122. package/.claude/output-styles/agent-vibes.md +202 -202
  123. package/.claude/personalities/angry.md +14 -14
  124. package/.claude/personalities/annoying.md +14 -14
  125. package/.claude/personalities/crass.md +14 -14
  126. package/.claude/personalities/dramatic.md +14 -14
  127. package/.claude/personalities/dry-humor.md +50 -50
  128. package/.claude/personalities/flirty.md +20 -20
  129. package/.claude/personalities/funny.md +14 -14
  130. package/.claude/personalities/grandpa.md +32 -32
  131. package/.claude/personalities/millennial.md +14 -14
  132. package/.claude/personalities/moody.md +14 -14
  133. package/.claude/personalities/normal.md +16 -16
  134. package/.claude/personalities/pirate.md +14 -14
  135. package/.claude/personalities/poetic.md +14 -14
  136. package/.claude/personalities/professional.md +14 -14
  137. package/.claude/personalities/rapper.md +55 -55
  138. package/.claude/personalities/robot.md +14 -14
  139. package/.claude/personalities/sarcastic.md +38 -38
  140. package/.claude/personalities/sassy.md +14 -14
  141. package/.claude/personalities/surfer-dude.md +14 -14
  142. package/.claude/personalities/zen.md +14 -14
  143. package/.claude/settings.json +15 -15
  144. package/.claude/verbosity.txt +1 -1
  145. package/.clawdbot/README.md +105 -105
  146. package/.clawdbot/skill/SKILL.md +241 -241
  147. package/.mcp.json +12 -0
  148. package/CLAUDE.md +170 -170
  149. package/README.md +2029 -2007
  150. package/RELEASE_NOTES.md +1310 -1203
  151. package/WINDOWS-SETUP.md +208 -208
  152. package/bin/agent-vibes +39 -39
  153. package/bin/agentvibes-voice-browser.js +1840 -1840
  154. package/bin/agentvibes.js +48 -2
  155. package/bin/mcp-server.js +121 -121
  156. package/bin/mcp-server.sh +206 -206
  157. package/bin/test-bmad-pr +78 -78
  158. package/mcp-server/QUICK_START.md +203 -203
  159. package/mcp-server/README.md +345 -345
  160. package/mcp-server/WINDOWS_SETUP.md +260 -260
  161. package/mcp-server/docs/troubleshooting-audio.md +313 -313
  162. package/mcp-server/examples/claude_desktop_config.json +11 -11
  163. package/mcp-server/examples/claude_desktop_config_piper.json +9 -9
  164. package/mcp-server/examples/custom_instructions.md +169 -169
  165. package/mcp-server/install-deps.js +130 -130
  166. package/mcp-server/pyproject.toml +52 -52
  167. package/mcp-server/requirements.txt +2 -2
  168. package/mcp-server/server.py +1465 -1453
  169. package/mcp-server/test_server.py +395 -395
  170. package/mcp-server/test_windows_script_parity.py +336 -0
  171. package/package.json +110 -110
  172. package/setup-windows.ps1 +815 -815
  173. package/src/bmad-detector.js +71 -71
  174. package/src/cli/list-personalities.js +110 -110
  175. package/src/cli/list-voices.js +114 -114
  176. package/src/commands/bmad-voices.js +394 -394
  177. package/src/commands/install-mcp.js +476 -476
  178. package/src/console/app.js +824 -824
  179. package/src/console/audio-env.js +20 -1
  180. package/src/console/brand-colors.js +13 -13
  181. package/src/console/constants/personalities.js +44 -44
  182. package/src/console/footer-config.js +50 -50
  183. package/src/console/modals/modal-overlay.js +247 -247
  184. package/src/console/navigation.js +62 -62
  185. package/src/console/tabs/agents-tab.js +1684 -1516
  186. package/src/console/tabs/help-tab.js +261 -261
  187. package/src/console/tabs/install-tab.js +1007 -991
  188. package/src/console/tabs/music-tab.js +22 -8
  189. package/src/console/tabs/placeholder-tab.js +53 -53
  190. package/src/console/tabs/readme-tab.js +267 -267
  191. package/src/console/tabs/receiver-tab.js +1472 -1212
  192. package/src/console/tabs/settings-tab.js +152 -79
  193. package/src/console/tabs/voices-tab.js +100 -21
  194. package/src/console/widgets/destroy-list.js +25 -25
  195. package/src/console/widgets/format-utils.js +89 -89
  196. package/src/console/widgets/notice.js +55 -55
  197. package/src/console/widgets/personality-picker.js +185 -185
  198. package/src/console/widgets/reverb-picker.js +94 -94
  199. package/src/console/widgets/track-picker.js +285 -285
  200. package/src/installer/music-file-input.js +304 -304
  201. package/src/installer.js +5882 -5829
  202. package/src/services/agent-voice-store.js +423 -423
  203. package/src/services/config-service.js +264 -264
  204. package/src/services/navigation-service.js +123 -123
  205. package/src/services/provider-service.js +132 -132
  206. package/src/services/verbosity-service.js +157 -157
  207. package/src/utils/audio-duration-validator.js +298 -298
  208. package/src/utils/audio-format-validator.js +277 -277
  209. package/src/utils/dependency-checker.js +469 -466
  210. package/src/utils/file-ownership-verifier.js +358 -358
  211. package/src/utils/list-formatter.js +194 -194
  212. package/src/utils/music-file-validator.js +285 -285
  213. package/src/utils/preview-list-prompt.js +136 -136
  214. package/src/utils/provider-validator.js +96 -12
  215. package/src/utils/secure-music-storage.js +412 -412
  216. package/templates/agentvibes-receiver.sh +482 -482
  217. package/templates/audio/welcome-music.mp3 +0 -0
  218. package/voice-assignments.json +8244 -8244
  219. package/.claude/config/background-music-position.txt +0 -1
package/README.md CHANGED
@@ -1,2007 +1,2029 @@
1
- # 🎤 AgentVibes
2
-
3
- > **Finally! Your agents can talk back!**
4
- >
5
- > 🌐 **[agentvibes.org](https://agentvibes.org)**
6
- >
7
- > Professional text-to-speech for **Claude Code**, **Claude Desktop**, **Warp Terminal**, and **OpenClaw** - **Soprano** (Neural), **Piper TTS** (Free!), **macOS Say** (Built-in!), or **Windows SAPI** (Zero Setup!)
8
-
9
- [![npm version](https://img.shields.io/npm/v/agentvibes)](https://www.npmjs.com/package/agentvibes)
10
- [![Test Suite](https://github.com/paulpreibisch/AgentVibes/actions/workflows/test.yml/badge.svg)](https://github.com/paulpreibisch/AgentVibes/actions/workflows/test.yml)
11
- [![Publish](https://github.com/paulpreibisch/AgentVibes/actions/workflows/publish.yml/badge.svg)](https://github.com/paulpreibisch/AgentVibes/actions/workflows/publish.yml)
12
- [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
13
-
14
- **Author**: Paul Preibisch ([@997Fire](https://x.com/997Fire)) | **Version**: v4.2
15
-
16
- ---
17
-
18
- ## 🚀 Quick Links
19
-
20
- | I want to... | Go here |
21
- |--------------|---------|
22
- | **Install AgentVibes** (just `npx`, no git!) | [Quick Start Guide](docs/quick-start.md) |
23
- | **Run Claude Code on Android** | [Android/Termux Setup](#-android--termux) |
24
- | **Secure OpenClaw on Remote Server** | [Security Hardening Guide](docs/security-hardening-guide.md) ⚠️ |
25
- | **Understand what I need** | [Prerequisites](#-prerequisites) |
26
- | **Set up on Windows (Native)** | [Windows Native Setup](WINDOWS-SETUP.md) |
27
- | **Set up on Windows (Claude Desktop/WSL)** | [Windows WSL Guide](mcp-server/WINDOWS_SETUP.md) |
28
- | **Use with OpenClaw** | [OpenClaw Integration](#-openclaw-integration) |
29
- | **Use natural language** | [MCP Setup](docs/mcp-setup.md) |
30
- | **Switch voices** | [Voice Library](docs/voice-library.md) |
31
- | **Fix issues** (git-lfs? MCP tokens? Read this!) | [Troubleshooting](docs/troubleshooting.md) & [FAQ](#-frequently-asked-questions-faq) |
32
-
33
- ---
34
-
35
- ## ✨ What is AgentVibes?
36
-
37
- **AgentVibes adds lively voice narration to your Claude AI sessions!**
38
-
39
- Whether you're coding in Claude Code, chatting in Claude Desktop, using Warp Terminal, or running OpenClaw - AgentVibes brings AI to life with professional voices and personalities.
40
-
41
- ---
42
-
43
- ## 🌟 NEW IN v4.2BMAD Voices, SSH Receiver & More
44
-
45
- ### 🎭 BMAD Party ModeEvery Agent Has Its Own Voice
46
-
47
- The BMad Method (Build More Architect Dreams) is an AI-driven development framework that helps you build software from ideation through agentic implementation with specialized AI agents, guided workflows, and intelligent planning that adapts to your project's complexity.
48
-
49
- **Every BMAD agent now speaks with their own unique voice, music, and personality.**
50
-
51
- When party mode runs a multi-agent discussion, the Architect, PM, Developer, QA, and Analyst each sound completely different — making every role immediately recognizable.
52
-
53
- **Auto-enabled** if BMAD is installed, party mode activates automatically. Open the BMad Tab to configure each agent:
54
-
55
- ```bash
56
- npx agentvibes # Press B to open the BMad Tab
57
- ```
58
-
59
- **Per-agent configuration:**
60
- - 🎙️ **Voice** 914 voices to choose from, auto-assigned gender-aware
61
- - 🎵 **Background Music**Unique ambient track per agent (cinematic, lo-fi, jazz...)
62
- - 🎚️ **Music Volume** — Per-agent level, or set all at once via Bulk Edit
63
- - 🎛️ **Reverb** — none / room / hall / cathedral / studio per agent
64
- - 💬 **Pretext** — Custom intro phrase ("Winston says:..." before every line)
65
- - 🎭 **Personality**sarcastic, dramatic, pirate, cheerful, and more
66
- - 🔇 **No Overlap** — Speech lock ensures agents never talk over each other
67
- - **Markdown-Clean**Asterisks and formatting stripped before TTS
68
-
69
- ### 🎛️ BMad Tab Visual Agent Configurator
70
-
71
- The `npx agentvibes` TUI now includes a full **BMad Tab** for managing every agent visually inspired by the Voices tab, with the same columns and navigation polish:
72
-
73
- ```bash
74
- npx agentvibes # Press B for BMad Tab
75
- ```
76
-
77
- | Agent | Voice | Gender | Provider | Reverb | Music | Vol | Pretext |
78
- |-------|-------|--------|----------|--------|-------|-----|---------|
79
- | 🏢 Winston | Rose Ibex | Female | Piper (LibriTTS) | studio | jazz | 65% | Winston says |
80
- | 🧠 Larry | Kusal | Male | Piper | hall | cinematic | 80% | Larry says |
81
-
82
- **Highlights:**
83
- - **Beautified voice names** — `16Speakers::Rose_Ibex` shows as `Rose Ibex`; `en_US-kusal-medium` shows as `Kusal`
84
- - **Gender & Provider columns** — see voice metadata at a glance, just like the Voices tab
85
- - **Inline row hints** — navigate to any agent and see `[Space] Preview [Enter] Configure` on the row itself
86
- - **Preview spinner** — animated `⠋⠙⠹⠸` braille spinner while audio plays
87
-
88
- | Key | Action |
89
- |-----|--------|
90
- | `↑↓` / `jk` | Navigate agents |
91
- | `Space` | Preview agent (spinner shows while playing) |
92
- | `Enter` | Configure voice, music, volume, reverb, personality, pretext |
93
- | `A` | Auto-assign unique voices (gender-aware, no repeats) |
94
- | `B` | Bulk Edit — set music / volume / pretext / reverb for all agents |
95
- | `X` | Reset agent to defaults |
96
-
97
- ---
98
-
99
- ### 🖥️ SSH Receiver Hear Your Headless Server
100
-
101
- **Run Claude on a cloud box and hear the TTS on your local machine.**
102
-
103
- The new **Receiver Tab** streams TTS audio from voiceless remote servers to your local machine over TCP — perfect for AWS/GCP dev boxes, WSL2, and SSH sessions.
104
-
105
- ```bash
106
- # On your local machineopen TUI, go to Receiver tab, click Start
107
- npx agentvibes
108
-
109
- # On the remote server — AgentVibes auto-detects the receiver and streams
110
- ```
111
-
112
- Zero-config forwarding. Works with Piper, macOS Say, and Soprano.
113
-
114
- ---
115
-
116
- ### TTS Latency -~1 Second
117
-
118
- - **Batched Node.js calls** — 6 separate profile reads collapsed into 1 (~900ms saved)
119
- - **inotifywait queue** — file-event-based worker, no polling delay
120
- - **Background cache cleanup** — off the critical path every 10th call
121
-
122
- ---
123
-
124
- ### 🎨 ANSI Banner Colors + Toggle
125
-
126
- Full color in the TTS banner (gold voice, cyan reverb, traffic-light cache). Hide it without muting:
127
-
128
- ```bash
129
- touch ~/.agentvibes/banner-disabled # or say "turn off the TTS banner"
130
- ```
131
-
132
- ---
133
-
134
- ### 💬 Intro Text (Pretext) - Your Personal AI Branding
135
-
136
- **Add custom prefixes to every TTS announcement!**
137
-
138
- Configure via the AgentVibes TUI Settings tab:
139
-
140
- ```bash
141
- npx agentvibes # Navigate to Settings tab
142
- ```
143
-
144
- Transform generic AI responses into your personal brand:
145
-
146
- **Before:**
147
- ```
148
- "Starting analysis of the codebase..."
149
- ```
150
-
151
- **After (with "FireBot: " intro text):**
152
- ```
153
- "FireBot: Starting analysis of the codebase..."
154
- ```
155
-
156
- **Perfect for:**
157
- - 🤖 **Personal AI Branding** - Make Claude sound like your custom assistant
158
- - 🏢 **Team Identity** - Company bots with branded voices
159
- - 🎮 **Character Roleplay** - Gaming assistants with character names
160
- - 🎓 **Teaching Contexts** - Professor Bot, Tutor AI, etc.
161
-
162
- **Features:**
163
- - Up to 50 characters
164
- - UTF-8 and emoji support 🎉
165
- - Set during installation or anytime after
166
- - Works with all TTS providers
167
- - Applies to every single announcement
168
-
169
- **Examples:**
170
- - `"JARVIS: "` - Iron Man style
171
- - `"🤖 Assistant: "` - With emoji
172
- - `"CodeBot: "` - Development assistant
173
- - `"Chef AI: "` - Cooking helper
174
-
175
- Configure via: `npx agentvibes` Settings tab
176
-
177
- ---
178
-
179
- ### 🎵 Custom Background Music - Complete Audio Control
180
-
181
- **Upload your own background music with battle-tested security!**
182
-
183
- Configure via the AgentVibes TUI Music tab:
184
-
185
- ```bash
186
- npx agentvibes # Navigate to Music tab
187
- ```
188
-
189
- Replace the default background tracks with your own audio files.
190
-
191
- **Supported Formats:**
192
- - 🎵 MP3 (.mp3)
193
- - 🎵 WAV (.wav)
194
- - 🎵 OGG (.ogg)
195
- - 🎵 M4A (.m4a)
196
-
197
- **Security First:**
198
- - ✅ **180+ attack variations tested** - Path traversal, symlinks, Unicode tricks
199
- - ✅ **100% attack rejection rate** - Every malicious attempt blocked
200
- - ✅ **OWASP CWE-22 compliant** - Industry-standard security
201
- - **7 validation layers** - Defense-in-depth architecture
202
- - ✅ **File ownership verification** - Only your files accepted
203
- - ✅ **Magic number validation** - Real audio files only
204
- - ✅ **Secure storage** - 600 permissions, restricted directory
205
-
206
- **Smart Validation:**
207
- - Recommended duration: 30-90 seconds (optimal looping)
208
- - Maximum: 300 seconds (5 minutes)
209
- - Maximum size: 50MB
210
- - Automatic format detection
211
- - Duration warnings for non-optimal lengths
212
-
213
- **Perfect for:**
214
- - 🎮 **Making coding fun** - Your favorite beats while you build
215
- - 🎼 **Setting the mood** - Match the music to the task (lo-fi for debugging, epic for shipping)
216
- - 🗂️ **Identifying projects** - Different track per repo so you always know which project Claude is in
217
- - 🎹 **Deep focus** - Ambient or classical to stay in flow
218
-
219
- **Features:**
220
- - Preview before setting
221
- - One-command upload
222
- - Works with all TTS providers
223
- - Loops seamlessly under voice
224
- - Easy restore to defaults
225
-
226
- **Menu Options:**
227
- 1. Change music - Upload new audio file
228
- 2. Remove music - Clear custom music
229
- 3. Reset to default - Restore built-in tracks (16 genres)
230
- 4. Enable/Disable - Toggle background music
231
- 5. Preview current - Sample your music
232
-
233
- Configure via: `npx agentvibes` Music tab
234
-
235
- **Security Certified:** See full audit report at `docs/security/SECURITY-AUDIT.md`
236
-
237
- ---
238
-
239
- ### 🎯 Key Features
240
-
241
- **🌟 NEW IN v4.2 — BMAD Party Mode & SSH Receiver:**
242
- - 🎭 **BMAD Party Mode Voices** — Each agent speaks with their unique voice, music, reverb, personality
243
- - 🖥️ **SSH Receiver Tab** — Stream TTS audio from headless servers to your local machine over TCP
244
- - 🎛️ **BMad Tab (TUI)** — Visual agent configurator with auto-assign and bulk edit
245
- - **TTS Latency -1s** — Batched Node.js calls, inotifywait queue, background cleanup
246
- - 🎨 **ANSI Banner Colors Restored** — Gold/cyan/traffic-light colors in TTS info banner
247
- - 🔕 **Banner Toggle** — Hide TTS banner without muting (`~/.agentvibes/banner-disabled`)
248
- - 🔇 **No Party Mode Overlap** — Agents wait for full audio before next speaks
249
- - 🧹 **Markdown-Clean Speech** Asterisks/formatting stripped automatically from party mode
250
-
251
- **🌟 NEW IN v3.6.0 Voice Explorer Release:**
252
- - 🏷️ **Friendly Voice Names** - "Ryan" instead of "en_US-libritts_r-medium-speaker-123"
253
- - 💬 **Intro Text (Pretext)** - Custom prefix for all TTS ("FireBot: Starting...")
254
- - 🎵 **Custom Background Music** - Upload your own audio files with battle-tested security
255
- - 🎨 **Interactive Installer** - Preview voices and music during installation
256
- - 🛡️ **Security Hardening** - 180+ attack variations tested, 100% blocked, OWASP compliant
257
-
258
- **🪟 NEW IN v3.5.5 — Native Windows Support:**
259
- - 🖥️ **Windows Native TTS** - Soprano, Piper, and Windows SAPI providers. No WSL required!
260
- - 🎵 **Background Music** - 16 genre tracks mixed under voice
261
- - 🎛️ **Reverb & Audio Effects** - 5 reverb levels via ffmpeg
262
- - 🔊 **Verbosity Control** - High, Medium, or Low settings
263
- - 🎨 **Beautiful Installer** - `npx agentvibes install` or `.\setup-windows.ps1`
264
-
265
- **⚡ v3.4.0 Highlights:**
266
- - 🎤 **Soprano TTS Provider** - Ultra-fast neural TTS with 20x CPU, 2000x GPU acceleration (thanks [@nathanchase](https://github.com/nathanchase)!)
267
- - 🛡️ **Security Hardening** - 9.5/10 score with comprehensive validation and timeouts
268
- - 🌐 **Environment Intelligence** - PulseAudio tunnel auto-detection for SSH scenarios
269
-
270
- **⚡ Core Features:**
271
- - **One-Command Install** - Get started in 30 seconds (`npx agentvibes install` or `.\setup-windows.ps1` without Node.js)
272
- - 🎭 **Multi-Provider Support** - Soprano (neural), Piper TTS (50+ free voices), macOS Say (100+ built-in), or Windows SAPI
273
- - 🎙️ **27+ Professional AI Voices** - Character voices, accents, and unique personalities
274
- - 🎙️ **Verbosity Control** - Choose how much Claude speaks (LOW, MEDIUM, HIGH)
275
- - 🎙️ **AgentVibes MCP** - Natural language control ("Switch to Aria voice") for Claude Code, Desktop & Warp
276
- - 🔊 **SSH Audio Optimization** - Auto-detects remote sessions and eliminates static (VS Code Remote SSH, cloud dev)
277
-
278
- **🎭 Personalization:**
279
- - 🎭 **19 Built-in Personalities** - From sarcastic to flirty, pirate to dry humor
280
- - 💬 **Advanced Sentiment System** - Apply personality styles to ANY voice without changing it
281
- - 🎵 **Voice Preview & Replay** - Listen before you choose, replay last 10 TTS messages
282
-
283
- **🚀 Integrations & Power Features:**
284
- - 🔌 **Enhanced BMAD Plugin** - Auto voice switching for BMAD agents with multilingual support
285
- - 🔊 **Live Audio Feedback** - Hear task acknowledgments and completions in any language
286
- - 🌍 **30+ Languages** - Multilingual support with native voice quality
287
- - 🆓 **Free & Open** - Use Piper TTS with no API key required
288
-
289
- ### 🤗 Hugging Face AI Voice Models
290
-
291
- **AgentVibes' Piper TTS uses 100% Hugging Face-trained AI voice models** from [rhasspy/piper-voices](https://huggingface.co/rhasspy/piper-voices).
292
-
293
- **What are Hugging Face voice models?**
294
-
295
- Hugging Face voice models are pre-trained artificial intelligence models hosted on the Hugging Face Model Hub platform, designed to convert text into human-like speech (Text-to-Speech or TTS) or perform other speech tasks like voice cloning and speech-to-speech translation. They're accessible via their Transformers library for easy use in applications like voice assistants, audio generation, and more.
296
-
297
- **Key Benefits:**
298
- - 🎯 **Human-like Speech** - VITS-based neural models for natural pronunciation and intonation
299
- - 🌍 **35+ Languages** - Multilingual support with native accents
300
- - 🆓 **100% Open Source** - All Piper voices are free HF models (Tacotron2, FastSpeech2, VITS)
301
- - 🔧 **Developer-Friendly** - Fine-tune, customize, or deploy for various audio projects
302
- - **Offline & Fast** - No API keys, no internet needed once installed
303
-
304
- All 50+ Piper voices AgentVibes provides are sourced from Hugging Face's open-source AI voice models, ensuring high-quality, natural-sounding speech synthesis across all supported platforms.
305
-
306
- ---
307
-
308
- ## 📑 Table of Contents
309
-
310
- ### Getting Started
311
- - [🚀 Quick Start](#-quick-start) - Get voice in 30 seconds (3 simple steps)
312
- - [📱 Android/Termux](#-quick-setup-android--termux-claude-code-on-your-phone) - Run Claude Code on your phone
313
- - [📋 Prerequisites](#-prerequisites) - What you actually need (Node.js + optional tools)
314
- - [✨ What is AgentVibes?](#-what-is-agentvibes) - Overview & key features
315
- - [🌟 NEW FEATURE HIGHLIGHTS](#-new-feature-highlights) - **START HERE!**
316
- - [🎭 BMAD Party Mode](#-bmad-party-mode--multi-agent-voice-conversations) - Per-agent voices, music, reverb
317
- - [🖥️ SSH Receiver](#️-agentvibes-receiver--remote-audio-streaming) - Stream audio from headless servers
318
- - [💬 Intro Text](#-intro-text-pretext---your-personal-ai-branding) - Custom TTS prefixes
319
- - [🎵 Custom Background Music](#-custom-background-music---complete-audio-control) - Upload your own tracks
320
- - [📰 Latest Release](#-latest-release) - v4.2 "Party Mode" — BMAD multi-agent voices, SSH Receiver, BMad Tab, ~1s latency improvement
321
- - [🪟 Windows Setup Guide for Claude Desktop](mcp-server/WINDOWS_SETUP.md) - Complete Windows installation with WSL & Python
322
-
323
- ### AgentVibes MCP (Natural Language Control)
324
- - [🎙️ AgentVibes MCP Overview](#%EF%B8%8F-agentvibes-mcp) - **Easiest way** - Natural language commands
325
- - [For Claude Desktop](docs/mcp-setup.md#for-claude-desktop) - Windows/WSL setup, Python requirements
326
- - [For Warp Terminal](docs/mcp-setup.md#for-warp-terminal) - Warp configuration
327
- - [For Claude Code](docs/mcp-setup.md#for-claude-code) - Project-specific setup
328
-
329
- ### Core Features
330
- - [🎤 Commands Reference](#-commands-reference) - All available commands
331
- - [🎙️ Verbosity Control](#%EF%B8%8F-verbosity-control) - Control how much Claude speaks (low/medium/high)
332
- - [🎭 Personalities vs Sentiments](#-personalities-vs-sentiments) - Two systems explained
333
- - [🗣️ Voice Library](#%EF%B8%8F-voice-library) - 914 voices with friendly names
334
- - [🔌 BMAD Plugin](#-bmad-plugin) - Auto voice switching for BMAD agents
335
- - [🎙️ AgentVibes Receiver - NEW!](#%EF%B8%8F-agentvibes-receiver-remote-audio-streaming-from-voiceless-servers) - Remote audio streaming from voiceless servers
336
-
337
- ### Integrations & Platforms
338
- - [🤖 OpenClaw Integration](#-openclaw-integration) - Use AgentVibes with OpenClaw messaging platform
339
- - [🎙️ AgentVibes Skill for OpenClaw](#-agentvibes-skill-for-openclaw---what-you-get) - 50+ voices, effects, personalities for OpenClaw
340
- - [📱 AgentVibes Receiver](#-agentvibes-receiver-local-phone-) - Remote audio on phones/local machines
341
-
342
- ### Advanced Topics
343
- - [📦 Installation Structure](#-installation-structure) - What gets installed
344
- - [💡 Common Workflows](#-common-workflows) - Quick examples
345
- - [🔧 Advanced Features](#-advanced-features) - Custom voices & personalities
346
- - [🔊 Remote Audio Setup](#-remote-audio-setup) - Play TTS from remote servers
347
- - [🛠️ Technical Documentation](#️-technical-documentation) - Audio architecture, cross-platform support, voice resolution
348
- - [🚨 Security Hardening Guide](docs/security-hardening-guide.md) - **REQUIRED if running OpenClaw on remote server**: SSH hardening, Fail2Ban, Tailscale, UFW, AIDE
349
- - [🔬 Technical Deep Dive](docs/technical-deep-dive.md) - How AgentVibes works under the hood
350
- - [❓ Troubleshooting](#-troubleshooting) - Common issues & fixes
351
-
352
- ### Additional Resources
353
- - [🔗 Useful Links](#-useful-links) - Voice typing & AI tools
354
- - [🔄 Updating](#-updating) - Keep AgentVibes current
355
- - [🗑️ Uninstalling](#️-uninstalling) - Remove AgentVibes cleanly
356
- - [ FAQ](#-frequently-asked-questions-faq) - **NEW!** Common questions answered (git-lfs, MCP tokens, installation)
357
- - [🍎 macOS Testing](docs/macos-testing.md) - Automated testing on macOS with GitHub Actions
358
- - [🤗 Hugging Face Voice Models](docs/hugging-face-models.md) - Technical details on AI voice models
359
- - [🙏 Credits](#-credits) - Acknowledgments
360
- - [🤝 Contributing](#-contributing) - Show support
361
-
362
- ---
363
-
364
- ## 📰 Latest Release
365
-
366
- **[v4.2 - "Party Mode" Release](https://github.com/paulpreibisch/AgentVibes/releases/tag/v4.2)** 🎉
367
-
368
- This is the biggest AgentVibes release since the TUI launched in v4.0. Two headline features: **BMAD Party Mode** gives every agent their own voice and music, and the **SSH Receiver** lets you hear your headless server speak on your local machine.
369
-
370
- ### 🎭 BMAD Party Mode — Multi-Agent Voice Conversations
371
-
372
- The BMad Method (Build More Architect Dreams) is an AI-driven development framework module that helps you build software from ideation through agentic implementation with specialized AI agents, guided workflows, and intelligent planning.
373
-
374
- Every agent in a BMAD discussion now speaks with their own individually configured voice, music, reverb, and personality — making the Architect, PM, Developer, QA, and Analyst immediately recognizable the moment they speak.
375
-
376
- **Auto-enabled** party mode activates automatically when BMAD is detected. Configure agents visually:
377
-
378
- ```bash
379
- npx agentvibes # Press B for BMad Tab
380
- ```
381
-
382
- **Each agent gets:**
383
- - 🎙️ **Their own voice** — 914 to choose from, or auto-assign gender-aware
384
- - 🎵 **Their own music track** — cinematic for the Architect, lo-fi for the Dev
385
- - 🎚️ **Their own volume** — fine-tune per-agent, or bulk-set all at once
386
- - 🎛️ **Their own reverb** — studio, hall, cathedral, room, or none
387
- - 💬 **Their own pretext** — "Winston says:..." before every line
388
- - 🎭 **Their own personality** — sarcastic, dramatic, pirate, cheerful...
389
- - 🔇 **No overlap** — agents wait for full audio before the next one speaks
390
- - **Markdown stripped** no "asterisk asterisk" in TTS output
391
-
392
- ### 🎛️ BMad TabFull Visual Agent Configurator
393
-
394
- Manage every agent from an interactive table same polish as the Voices tab:
395
-
396
- | Key | Action |
397
- |-----|--------|
398
- | `Space` | Preview agent with full profile (animated spinner while playing) |
399
- | `Enter` | Configure voice, music, volume, reverb, personality, pretext |
400
- | `A` | Auto-assign unique voices (gender-aware, no repeats) |
401
- | `B` | Bulk Edit — set music / volume / pretext / reverb for all agents |
402
- | `X` | Reset agent to defaults |
403
-
404
- The table shows **Voice, Gender, Provider, Reverb, Music, Vol, Pretext** columns. Voice names are automatically beautified: `16Speakers::Rose_Ibex` → `Rose Ibex`.
405
-
406
- ### 🖥️ SSH ReceiverHear Your Headless Server
407
-
408
- Stream TTS from a cloud box, WSL2, or any voiceless server directly to your local machine over TCP:
409
-
410
- ```bash
411
- # Local: open TUI Receiver tab Start
412
- npx agentvibes
413
-
414
- # Remote: AgentVibes auto-detects the receiver and streams audio to you
415
- ```
416
-
417
- ### ⚡ ~1 Second Faster TTS
418
-
419
- - 6 Node.js profile reads collapsed into 1 (~900ms saved per speech)
420
- - `inotifywait` queue worker no polling delay
421
- - Cache cleanup runs off the critical path
422
-
423
- ### 🎨 ANSI Colors Restored + Banner Toggle
424
-
425
- Full color in the TTS banner. Silence it without muting audio:
426
- ```bash
427
- touch ~/.agentvibes/banner-disabled # or: "turn off the TTS banner" via MCP
428
- ```
429
-
430
- ### Quick Install
431
-
432
- ```bash
433
- npx agentvibes install
434
- ```
435
-
436
- 💡 **Tip:** If `npx agentvibes` shows an older version: `npm cache clean --force && npx agentvibes@latest`
437
-
438
- 🐛 **Found a bug?** [GitHub Issues](https://github.com/paulpreibisch/AgentVibes/issues)
439
-
440
- [→ View Complete Release Notes](RELEASE_NOTES.md) | [→ View Previous Release (v4.0.1)](https://github.com/paulpreibisch/AgentVibes/releases/tag/v4.0.1) | [→ View All Releases](https://github.com/paulpreibisch/AgentVibes/releases)
441
-
442
- [↑ Back to top](#-table-of-contents)
443
-
444
- ---
445
-
446
- ## 🎙️ AgentVibes MCP
447
-
448
- Agent Vibes was originally created to give the Claude Code assistant a voice! Simply install it with an npx command in your terminal, and Claude Code can talk back to you.
449
-
450
- We've now enhanced this capability by adding an MCP (Model Context Protocol) server. This integration exposes Agent Vibes' functionality directly to your AI assistant, allowing you to configure and control Agent Vibes using natural language instead of typing "/" slash commands.
451
-
452
- Setting it up is straightforward: just add the MCP server to your Claude Code configuration files.
453
-
454
- But the convenience doesn't stop there. With the MCP server in place, Claude Desktop can now use Agent Vibes too! We've even tested it successfully with Warp, an AI assistant that helps you navigate Windows and other operating systems.
455
-
456
- We're thrilled about this expansion because it means Claude Desktop and Warp can finally talk back as well!
457
-
458
- If you decide to use the MCP server on Claude Desktop, after configuration, give Claude Desktop this command: "every time i give you a command, speak the acknowledgement using agentvibes and the confirmation about what you completed, when done"—and watch the magic happen!
459
-
460
- **🎯 Control AgentVibes with natural language - no slash commands to remember!**
461
-
462
- Just say "Switch to Aria voice" or "Speak in Spanish" instead of typing commands.
463
-
464
- **Works in:** Claude Desktop, Claude Code, Warp Terminal
465
-
466
- **[→ View Complete MCP Setup Guide](docs/mcp-setup.md)** - Full setup for all platforms, configuration examples, available tools, and MCP vs slash commands comparison
467
-
468
- [↑ Back to top](#-table-of-contents)
469
-
470
- ---
471
-
472
- ## 🚀 Quick Start - Get Voice in 30 Seconds
473
-
474
- **3 Simple Steps:**
475
-
476
- ### 1️⃣ Install
477
- ```bash
478
- npx agentvibes install
479
- ```
480
-
481
- ### 2️⃣ Choose Provider (Auto-Detected)
482
- - **macOS**: Native `say` provider (100+ voices)
483
- - **Linux/WSL**: Piper TTS (50+ free voices) 🎙️
484
- - **Windows Native**: Soprano, Piper, or SAPI 🪟
485
- - **Android**: Termux with auto-setup 📱
486
-
487
- ### 3️⃣ Use in Claude Code
488
- Just code normally - AgentVibes automatically speaks task acknowledgments and completions! 🔊
489
-
490
- ---
491
-
492
- ### TUI Console Commands
493
-
494
- AgentVibes includes a full **Text User Interface (TUI)** built with blessed.js for managing voices, music, settings, and installation — all from a single interactive console.
495
-
496
- | Command | Description |
497
- |---------|-------------|
498
- | `npx agentvibes` | Smart detection — opens Settings if installed, Install if not |
499
- | `npx agentvibes install` | Open the Install tab directly |
500
- | `npx agentvibes config` | Open the Settings tab directly |
501
-
502
- Once inside, use **Tab** / **Shift+Tab** to switch between tabs: **Voices**, **Music**, **BMad**, **Settings**, **Receiver**, and **Install**. Use **[** / **]** to page through voice and music catalogs.
503
-
504
- ---
505
-
506
- **🍎 macOS Users (One-Time Setup):**
507
- ```bash
508
- brew install bash # Required for bash 5.x features
509
- ```
510
- macOS ships with bash 3.2 (from 2007). After this, everything works perfectly!
511
-
512
- ---
513
-
514
- **[→ Full Setup Guide](docs/quick-start.md)** - Advanced options, provider switching, and detailed setup
515
-
516
- [↑ Back to top](#-table-of-contents)
517
-
518
- [↑ Back to top](#-table-of-contents)
519
-
520
- ---
521
-
522
- ## 📋 Prerequisites - What You Actually Need
523
-
524
- ### Minimum (Core Features)
525
- **✅ REQUIRED:**
526
- - **Node.js** ≥16.0 - Check with: `node --version`
527
-
528
- ### Required for Full Features
529
- **✅ STRONGLY RECOMMENDED:**
530
- - **Python** 3.10+ - Needed for Piper TTS voice engine
531
- - **bash** 5.0+ - macOS only (macOS ships with 3.2 from 2007)
532
-
533
- ### Optional but Recommended
534
- **⭕ OPTIONAL (TTS still works without them):**
535
- - **sox** - Audio effects (reverb, EQ, pitch shifting)
536
- - **ffmpeg** - Background music, audio padding, RDP compression
537
-
538
- ### NOT Required (Despite What You've Heard)
539
- **❌ DEFINITELY NOT NEEDED:**
540
- - Git or git-lfs (npm handles everything)
541
- - ❌ Repository cloning (unless you're contributing code)
542
- - ❌ Build tools or C++ compilers (pre-built package ready to use)
543
-
544
- ### Installation Methods
545
-
546
- | Method | Command | Use Case |
547
- |--------|---------|----------|
548
- | **✅ RECOMMENDED: NPX (via npm)** | `npx agentvibes install` | **All platforms** - Just want to use AgentVibes |
549
- | **🪟 Windows PowerShell** | `.\setup-windows.ps1` | **Windows** - Standalone installer (no Node.js needed) |
550
- | **⚠️ Git Clone** | `git clone ...` | **Developers Only** - Contributing code |
551
-
552
- **Why npx?** Zero git operations, no build steps, just 30 seconds to voice!
553
-
554
- ### For Developers (Contributing Code)
555
-
556
- If you want to contribute to AgentVibes:
557
- ```bash
558
- git clone https://github.com/paulpreibisch/AgentVibes.git
559
- cd AgentVibes
560
- npm install
561
- npm link
562
- ```
563
-
564
- Requires: Node.js 16+, Git (no git-lfs), and `npm link` familiarity.
565
-
566
- [↑ Back to top](#-table-of-contents)
567
-
568
- ---
569
-
570
- ---
571
-
572
- ## 📱 Quick Setup: Android & Termux (Claude Code on Your Phone!)
573
-
574
- **Want to run Claude Code on your Android phone with professional voices?**
575
-
576
- Simply install Termux from F-Droid (NOT Google Play) and run:
577
- ```bash
578
- pkg update && pkg upgrade
579
- pkg install nodejs-lts
580
- npx agentvibes install
581
- ```
582
-
583
- Termux auto-detects and installs everything needed (proot-distro for compatibility, Piper TTS, audio playback).
584
-
585
- **[→ Full Android/Termux Setup Guide](#-android--termux)** - Detailed troubleshooting and verification steps
586
-
587
- [↑ Back to top](#-table-of-contents)
588
-
589
- ---
590
-
591
- ## 📋 System Requirements
592
-
593
- AgentVibes requires certain system dependencies for optimal audio processing and playback. Requirements vary by operating system and TTS provider.
594
-
595
- ### Core Requirements (All Platforms)
596
-
597
- | Tool | Required For | Why It's Needed |
598
- |------|-------------|-----------------|
599
- | **Node.js** ≥16.0 | All platforms | Runtime for AgentVibes installer and MCP server |
600
- | **Bash** ≥5.0 | macOS | Modern bash features (macOS ships with 3.2 from 2007) |
601
- | **Python** 3.10+ | Piper TTS, MCP server | Runs Piper voice engine and MCP server |
602
-
603
- ### Audio Processing Tools (Recommended)
604
-
605
- | Tool | Status | Purpose | Impact if Missing |
606
- |------|--------|---------|------------------|
607
- | **sox** | Recommended | Audio effects (reverb, EQ, pitch, compression) | No audio effects, still works |
608
- | **ffmpeg** | Recommended | Background music mixing, audio padding, RDP compression | No background music or RDP optimization |
609
-
610
- ### Platform-Specific Requirements
611
-
612
- #### 🐧 Linux / WSL
613
-
614
- ```bash
615
- # Ubuntu/Debian
616
- sudo apt-get update
617
- sudo apt-get install -y sox ffmpeg python3-pip pipx
618
-
619
- # Fedora/RHEL
620
- sudo dnf install -y sox ffmpeg python3-pip pipx
621
-
622
- # Arch Linux
623
- sudo pacman -S sox ffmpeg python-pip python-pipx
624
- ```
625
-
626
- **Audio Playback** (one of the following):
627
- - `paplay` (PulseAudio - usually pre-installed)
628
- - `aplay` (ALSA - fallback)
629
- - `mpg123` (fallback)
630
- - `mpv` (fallback)
631
-
632
- **Why these tools?**
633
- - **sox**: Applies audio effects defined in `.claude/config/audio-effects.cfg` (reverb, pitch shifting, EQ, compression)
634
- - **ffmpeg**: Mixes background music tracks, adds silence padding to prevent audio cutoff, compresses audio for RDP/SSH sessions
635
- - **paplay/aplay**: Plays generated TTS audio files
636
- - **pipx**: Isolated Python environment manager for Piper TTS installation
637
-
638
- #### 🍎 macOS
639
-
640
- ```bash
641
- # Install Homebrew if not already installed
642
- /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
643
-
644
- # Required: Modern bash
645
- brew install bash
646
-
647
- # Recommended: Audio processing tools
648
- brew install sox ffmpeg pipx
649
- ```
650
-
651
- **Audio Playback**:
652
- - `afplay` (built-in - always available)
653
- - `say` (built-in - for macOS TTS provider)
654
-
655
- **Why these tools?**
656
- - **bash 5.x**: macOS ships with bash 3.2 which lacks associative arrays and other modern features AgentVibes uses
657
- - **sox**: Same audio effects processing as Linux
658
- - **ffmpeg**: Same background music and padding as Linux
659
- - **afplay**: Built-in macOS audio player
660
- - **say**: Built-in macOS text-to-speech (alternative to Piper)
661
-
662
- #### 🪟 Windows
663
-
664
- **Option A: Native Windows (Recommended)**
665
-
666
- AgentVibes now supports native Windows with three TTS providers. No WSL required!
667
-
668
- ```powershell
669
- # Interactive Node.js installer (recommended)
670
- npx agentvibes install
671
-
672
- # Or use the standalone PowerShell installer
673
- .\setup-windows.ps1
674
- ```
675
-
676
- **Providers available natively:**
677
- - **Soprano** - Ultra-fast neural TTS (best quality, requires `pip install soprano-tts`)
678
- - **Windows Piper** - High quality offline neural voices (auto-downloaded)
679
- - **Windows SAPI** - Built-in Windows voices (zero setup)
680
-
681
- **Requirements:** Node.js 16+, PowerShell 5.1+, ffmpeg (optional, for background music & reverb)
682
-
683
- See [Windows Native Setup Guide](WINDOWS-SETUP.md) for full instructions.
684
-
685
- **Option B: WSL (Legacy)**
686
-
687
- For Claude Desktop or WSL-based workflows, follow the [Windows WSL Guide](mcp-server/WINDOWS_SETUP.md).
688
-
689
- ```powershell
690
- # Install WSL from PowerShell (Administrator)
691
- wsl --install -d Ubuntu
692
- ```
693
-
694
- Then follow Linux requirements above inside WSL.
695
-
696
- #### 🤖 Android / Termux
697
-
698
- **Running Claude Code on Your Android Using Termux**
699
-
700
- AgentVibes fully supports Android devices through the [Termux app](https://termux.dev/). This enables you to run Claude Code with professional TTS voices directly on your Android phone or tablet!
701
-
702
- **Quick Setup:**
703
-
704
- ```bash
705
- # 1. Install Termux from F-Droid (NOT Google Play - it's outdated)
706
- # Download: https://f-droid.org/en/packages/com.termux/
707
-
708
- # 2. Install Node.js in Termux
709
- pkg update && pkg upgrade
710
- pkg install nodejs-lts
711
-
712
- # 3. Install AgentVibes (auto-detects Android and runs Termux installer)
713
- npx agentvibes install
714
- ```
715
-
716
- **What Gets Installed?**
717
-
718
- The Termux installer automatically sets up:
719
- - **proot-distro** with Debian (for glibc compatibility)
720
- - **Piper TTS** via proot wrapper (Android uses bionic libc, not glibc)
721
- - **termux-media-player** for audio playback (`paplay` doesn't work on Android)
722
- - **Audio dependencies**: ffmpeg, sox, bc for processing
723
- - **termux-api** for Android-specific audio routing
724
-
725
- **Why Termux Instead of Standard Installation?**
726
-
727
- Android's architecture requires special handling:
728
- - Standard pip/pipx fails (missing wheels for bionic libc)
729
- - ❌ Linux binaries require glibc (Android uses bionic)
730
- - `/tmp` directory is not accessible on Android
731
- - Standard audio tools like `paplay` don't exist
732
-
733
- ✅ Termux installer solves all these issues with proot-distro and Android-native audio playback!
734
-
735
- **Requirements:**
736
- - [Termux app](https://f-droid.org/en/packages/com.termux/) (from F-Droid, NOT Google Play)
737
- - [Termux:API](https://f-droid.org/en/packages/com.termux.api/) (for audio playback)
738
- - Android 7.0+ (recommended: Android 10+)
739
- - ~500MB free storage (for Piper TTS + voice models)
740
-
741
- **Audio Playback:**
742
- - Uses `termux-media-player` instead of `paplay`
743
- - Audio automatically routes through Android's media system
744
- - Supports all Piper TTS voices (50+ languages)
745
-
746
- **Verifying Your Setup:**
747
-
748
- ```bash
749
- # Check Termux environment
750
- echo $PREFIX # Should show /data/data/com.termux/files/usr
751
-
752
- # Check Node.js
753
- node --version # Should be ≥16.0
754
-
755
- # Check if Piper is installed
756
- which piper # Should return /data/data/com.termux/files/usr/bin/piper
757
-
758
- # Test audio playback
759
- termux-media-player play /path/to/audio.wav
760
- ```
761
-
762
- **Troubleshooting:**
763
-
764
- | Issue | Solution |
765
- |-------|----------|
766
- | "piper: not found" | Run `npx agentvibes install` - auto-detects Termux |
767
- | No audio playback | Install Termux:API from F-Droid |
768
- | Permission denied | Run `termux-setup-storage` to grant storage access |
769
- | Slow installation | Use WiFi, not mobile data (~300MB download) |
770
-
771
- **Why F-Droid and Not Google Play?**
772
-
773
- Google Play's Termux version is outdated and unsupported. Always use the [F-Droid version](https://f-droid.org/en/packages/com.termux/) for the latest security updates and compatibility.
774
-
775
- ### TTS Provider Requirements
776
-
777
- #### Piper TTS (Free, Offline)
778
- - **Python** 3.10+
779
- - **pipx** (for isolated installation)
780
- - **Disk Space**: ~50MB per voice model
781
- - **Internet**: Only for initial voice downloads
782
-
783
- ```bash
784
- # Installed automatically by AgentVibes
785
- pipx install piper-tts
786
- ```
787
-
788
- #### macOS Say (Built-in, macOS Only)
789
- - No additional requirements
790
- - 100+ voices pre-installed on macOS
791
- - Use: `/agent-vibes:provider switch macos`
792
-
793
- ### Verifying Your Setup
794
-
795
- ```bash
796
- # Check all dependencies
797
- node --version # Should be ≥16.0
798
- python3 --version # Should be ≥3.10
799
- bash --version # Should be ≥5.0 (macOS users!)
800
- sox --version # Optional but recommended
801
- ffmpeg -version # Optional but recommended
802
- pipx --version # Required for Piper TTS
803
-
804
- # Check audio playback (Linux/WSL)
805
- paplay --version || aplay --version
806
-
807
- # Check audio playback (macOS)
808
- which afplay # Should return /usr/bin/afplay
809
- ```
810
-
811
- ### What Happens Without Optional Dependencies?
812
-
813
- | Missing Tool | Impact | Workaround |
814
- |-------------|--------|------------|
815
- | sox | No audio effects (reverb, EQ, pitch) | TTS still works, just no effects |
816
- | ffmpeg | No background music, no audio padding | TTS still works, audio may cut off slightly early |
817
- | paplay/aplay | No audio playback on Linux | Install at least one audio player |
818
-
819
- **All TTS generation still works** - optional tools only enhance the experience!
820
-
821
- [↑ Back to top](#-table-of-contents)
822
-
823
- ---
824
-
825
- ## 🎭 Choose Your Voice Provider
826
-
827
- **Piper TTS** (free, works offline on Linux/WSL) or **macOS Say** (free, built-in on Mac) - pick one and switch anytime.
828
-
829
- | Provider | Platform | Cost | Quality | Setup |
830
- |----------|----------|------|---------|-------|
831
- | **macOS Say** | macOS only | Free (built-in) | ⭐⭐⭐⭐ | Zero config |
832
- | **Piper** | Linux/WSL/Windows | Free | ⭐⭐⭐⭐ | Auto-downloads |
833
- | **Soprano** | Linux/WSL/Windows | Free | ⭐⭐⭐⭐⭐ | `pip install soprano-tts` |
834
- | **Windows SAPI** | Windows | Free (built-in) | ⭐⭐⭐ | Zero config |
835
-
836
- On macOS, the native `say` provider is automatically detected and recommended!
837
-
838
- **[→ Provider Comparison Guide](docs/providers.md)**
839
-
840
- [↑ Back to top](#-table-of-contents)
841
-
842
- ---
843
-
844
- ## 🎤 Commands Reference
845
-
846
- AgentVibes provides **50+ slash commands** and **natural language MCP equivalents**.
847
-
848
- **Quick Examples:**
849
- ```bash
850
- # Voice control
851
- /agent-vibes:switch Aria # Or: "Switch to Aria voice"
852
- /agent-vibes:list # Or: "List all voices"
853
-
854
- # Personality & sentiment
855
- /agent-vibes:personality pirate # Or: "Set personality to pirate"
856
- /agent-vibes:sentiment sarcastic # Or: "Apply sarcastic sentiment"
857
-
858
- # Language & learning
859
- /agent-vibes:set-language spanish # Or: "Speak in Spanish"
860
- /agent-vibes:learn # Or: "Enable learning mode"
861
- ```
862
-
863
- **[→ View Complete Command Reference](docs/commands.md)** - All voice, system, personality, sentiment, language, and BMAD commands with MCP equivalents
864
-
865
- ### Intro Text Commands
866
-
867
- ```bash
868
- # Configure intro text open Settings tab
869
- npx agentvibes
870
-
871
- # View current intro text
872
- cat ~/.claude/config/intro-text.txt
873
- ```
874
-
875
- **MCP Equivalent:**
876
- ```
877
- "Set my intro text to 'FireBot: '"
878
- "What's my current intro text?"
879
- "Clear my intro text"
880
- ```
881
-
882
- ### Custom Music Commands
883
-
884
- ```bash
885
- # Configure background music open Music tab
886
- npx agentvibes
887
- ```
888
-
889
- **MCP Equivalent:**
890
- ```
891
- "Configure my background music"
892
- "Add custom background music"
893
- "Remove custom music"
894
- "Preview my background music"
895
- ```
896
-
897
- ### Friendly Voice Name Commands
898
-
899
- ```bash
900
- # Switch using friendly name
901
- /agent-vibes:switch Ryan
902
- /agent-vibes:switch Sarah
903
-
904
- # List all voices with friendly names
905
- /agent-vibes:list
906
-
907
- # Get current voice (shows friendly name if available)
908
- /agent-vibes:whoami
909
- ```
910
-
911
- **MCP Equivalent:**
912
- ```
913
- "Switch to Ryan voice"
914
- "Use the Sarah voice"
915
- "List all available voices"
916
- ```
917
-
918
- [↑ Back to top](#-table-of-contents)
919
-
920
- ---
921
-
922
- ## 🎙️ Verbosity Control
923
-
924
- **Control how much Claude speaks while working!** 🔊
925
-
926
- Choose from three verbosity levels:
927
-
928
- ### LOW (Minimal) 🔇
929
- - Acknowledgments only (start of task)
930
- - Completions only (end of task)
931
- - Perfect for quiet work sessions
932
-
933
- ### MEDIUM (Balanced) 🤔
934
- - Acknowledgments + completions
935
- - Major decisions ("I'll use grep to search")
936
- - Key findings ("Found 12 instances")
937
- - Perfect for understanding decisions without full narration
938
-
939
- ### HIGH (Maximum Transparency) 💭
940
- - All reasoning ("Let me search for all instances")
941
- - All decisions ("I'll use grep for this")
942
- - All findings ("Found it at line 1323")
943
- - Perfect for learning mode, debugging complex tasks
944
-
945
- **Quick Commands:**
946
- ```bash
947
- /agent-vibes:verbosity # Show current level
948
- /agent-vibes:verbosity high # Maximum transparency
949
- /agent-vibes:verbosity medium # Balanced
950
- /agent-vibes:verbosity low # Minimal (default)
951
- ```
952
-
953
- **MCP Equivalent:**
954
- ```
955
- "Set verbosity to high"
956
- "What's my current verbosity level?"
957
- ```
958
-
959
- 💡 **How it works:** Claude uses emoji markers (💭 🤔 ✓) in its text, and AgentVibes automatically detects and speaks them based on your verbosity level. No manual TTS calls needed!
960
-
961
- ⚠️ **Note:** Changes take effect on next Claude Code session restart.
962
-
963
- [↑ Back to top](#-table-of-contents)
964
-
965
- ---
966
-
967
- ## 📚 Language Learning Mode
968
-
969
- **🎯 Learn Spanish (or 30+ languages) while you program!** 🌍
970
-
971
- Every task acknowledgment plays **twice** - first in English, then in your target language. Context-based learning while you code!
972
-
973
- **[→ View Complete Learning Mode Guide](docs/language-learning-mode.md)** - Full tutorial, quick start, commands, speech rate control, supported languages, and pro tips
974
-
975
- [↑ Back to top](#-table-of-contents)
976
-
977
- ---
978
-
979
- ## 🎭 Personalities vs Sentiments
980
-
981
- **Two ways to add personality:**
982
-
983
- - **🎪 Personalities** - Changes BOTH voice AND speaking style (e.g., `pirate` personality = Pirate Marshal voice + pirate speak)
984
- - **💭 Sentiments** - Keeps your current voice, only changes speaking style (e.g., Aria voice + sarcastic sentiment)
985
-
986
- **[→ Complete Personalities Guide](docs/personalities.md)** - All 19 personalities, create custom ones
987
-
988
- [↑ Back to top](#-table-of-contents)
989
-
990
- ---
991
-
992
- ## 🗣️ Voice Library
993
-
994
- Use the **AgentVibes TUI installer** (`/audio-browser`) to browse, sample, and install from 914 voices interactively.
995
-
996
- ### Friendly Voice Names
997
-
998
- All voices now have memorable names! Instead of technical IDs like `en_US-libritts_r-medium-speaker-123`, just use friendly names like **Ryan**, **Joe**, or **Sarah**.
999
-
1000
- **Voice Metadata Includes:**
1001
- - Display name and technical ID
1002
- - Gender, accent, and region
1003
- - Personality traits (professional, warm, friendly, etc.)
1004
- - Recommended use cases
1005
- - Quality rating and sample rate
1006
-
1007
- ### Voice Categories
1008
-
1009
- **Curated Voices** (10 personalities):
1010
- These hand-picked voices cover common use cases with clear characteristics.
1011
-
1012
- **Speaker Variations** (904 voices):
1013
- High-quality Piper TTS voices from the libritts-high model. Each speaker has unique vocal characteristics, accents, and tones.
1014
-
1015
- ### Popular Voices
1016
-
1017
- AgentVibes includes professional AI voices from Piper TTS and macOS Say with multilingual support.
1018
-
1019
- 🎧 **Try in Claude Code:** `/agent-vibes:preview` to hear all voices
1020
- 🌍 **Multilingual:** Use Antoni, Rachel, Domi, or Bella for automatic language detection
1021
-
1022
- **[→ View Complete Voice Library](docs/voice-library.md)** - All voices with clickable samples, descriptions, and best use cases
1023
-
1024
- [↑ Back to top](#-table-of-contents)
1025
-
1026
- ---
1027
-
1028
- ## 🔌 BMAD Plugin
1029
-
1030
- **Automatically switch voices when using BMAD agents!**
1031
-
1032
- The BMAD plugin detects when you activate a BMAD agent (e.g., `/BMad:agents:pm`) and automatically uses the assigned voice for that role.
1033
-
1034
- **Version Support**: AgentVibes supports both BMAD v4 and v6-alpha installations. Version detection is automatic - just install BMAD and AgentVibes will detect and configure itself correctly!
1035
-
1036
- ### 🔊 TTS Injection: How It Works
1037
-
1038
- BMAD uses a **loosely-coupled injection system** for voice integration. BMAD source files contain placeholder markers that AgentVibes replaces with speaking instructions during installation:
1039
-
1040
- **Before Installation (BMAD Source):**
1041
- ```xml
1042
- <rules>
1043
- <r>ALWAYS communicate in {communication_language}...</r>
1044
- <!-- TTS_INJECTION:agent-tts -->
1045
- <r>Stay in character until exit selected</r>
1046
- </rules>
1047
- ```
1048
-
1049
- **After Installation (with AgentVibes enabled):**
1050
- ```xml
1051
- <rules>
1052
- <r>ALWAYS communicate in {communication_language}...</r>
1053
- - When responding to user messages, speak your responses using TTS:
1054
- Call: `.claude/hooks/bmad-speak.sh '{agent-id}' '{response-text}'`
1055
- Where {agent-id} is your agent type (pm, architect, dev, etc.)
1056
-
1057
- - Auto Voice Switching: AgentVibes automatically switches to the voice
1058
- assigned for your agent role when activated
1059
- <r>Stay in character until exit selected</r>
1060
- </rules>
1061
- ```
1062
-
1063
- **After Installation (with TTS disabled):**
1064
- ```xml
1065
- <rules>
1066
- <r>ALWAYS communicate in {communication_language}...</r>
1067
- <r>Stay in character until exit selected</r>
1068
- </rules>
1069
- ```
1070
-
1071
- This design means **any TTS provider** can integrate with BMAD by replacing these markers with their own instructions!
1072
-
1073
- **[→ View Complete BMAD Documentation](docs/bmad-plugin.md)** - All agent mappings, language support, TTS injection details, plugin management, and customization
1074
-
1075
- [↑ Back to top](#-table-of-contents)
1076
-
1077
- ---
1078
-
1079
- ## 🤖 OpenClaw Integration
1080
-
1081
- **Use AgentVibes TTS with OpenClaw - the revolutionary AI assistant you can access via any instant messenger!**
1082
-
1083
- **What is OpenClaw?** [OpenClaw](https://openclaw.ai/) is a revolutionary AI assistant that brings Claude AI to your favorite messaging platforms - WhatsApp, Telegram, Discord, and more. No apps to install, no websites to visit - just message your AI assistant like you would a friend.
1084
-
1085
- 🌐 **Website**: https://openclaw.ai/
1086
-
1087
- AgentVibes seamlessly integrates with OpenClaw, providing professional text-to-speech for AI assistants running on messaging platforms and remote servers.
1088
-
1089
- ### 🚨 CRITICAL: Security Before Running OpenClaw on Any Remote Server
1090
-
1091
- ⚠️ **SECURITY IS NOT OPTIONAL** - Running OpenClaw on a remote server exposes your infrastructure to attack vectors including SSH compromise, credential theft, and lateral movement.
1092
-
1093
- **👉 READ THIS FIRST:** [Security Hardening Guide](docs/security-hardening-guide.md) - **Required reading** covering:
1094
- - ✅ SSH hardening (key-only auth, port 2222, fail2ban)
1095
- - Firewall configuration (UFW/iptables)
1096
- - ✅ Intrusion detection (AIDE, Wazuh)
1097
- - VPN tunneling (Tailscale alternative to direct SSH)
1098
-
1099
- **Do not expose your OpenClaw server to the internet without reading this guide.**
1100
-
1101
- ### 🎯 Key Benefits
1102
-
1103
- - **Free & Offline**: No API costs, works without internet
1104
- - **Remote SSH Audio**: Audio tunnels from server to local machine via PulseAudio
1105
- - **50+ Voices**: Professional AI voices in 30+ languages
1106
- - **Zero Config**: Automatic when AgentVibes is installed
1107
-
1108
- ### 🚀 Installation
1109
-
1110
- AgentVibes includes a ready-to-use OpenClaw skill that enables TTS on messaging platforms. The setup involves two components:
1111
-
1112
- #### Component 1: OpenClaw Server (Remote)
1113
-
1114
- Install AgentVibes on your OpenClaw server:
1115
-
1116
- ```bash
1117
- # On your remote server where OpenClaw is running
1118
- npx agentvibes install
1119
- ```
1120
-
1121
- The OpenClaw skill is **automatically included** in the AgentVibes npm package at `.clawdbot/skill/SKILL.md`.
1122
-
1123
- **How to activate the skill in OpenClaw:**
1124
-
1125
- 1. **Locate the skill** - After installing AgentVibes, the skill is at:
1126
- ```
1127
- node_modules/agentvibes/.clawdbot/skill/SKILL.md
1128
- ```
1129
-
1130
- 2. **Link to OpenClaw skills directory** (if OpenClaw uses skills):
1131
- ```bash
1132
- # Example - adjust path based on your OpenClaw installation
1133
- ln -s $(npm root -g)/agentvibes/.clawdbot/skill/SKILL.md ~/.openclaw/skills/agentvibes.md
1134
- ```
1135
-
1136
- 3. **OpenClaw auto-detection** - Many OpenClaw setups automatically detect AgentVibes when it's installed. Check your OpenClaw logs for:
1137
- ```
1138
- ✓ AgentVibes skill detected and loaded
1139
- ```
1140
-
1141
- ---
1142
-
1143
- #### 🎙️ AgentVibes Voice Management Skill for OpenClaw
1144
-
1145
- Manage your text-to-speech voices across multiple providers with the AgentVibes Voice Management Skill:
1146
-
1147
- **Voice Management Features:**
1148
- - 🎤 **50+ Professional Voices** - Across Piper TTS, Piper (free offline), and macOS Say providers
1149
- - 🔀 **Multi-Provider Support** - Switch between Piper TTS (premium), Piper (free), and macOS Say
1150
- - 👂 **Voice Preview** - Listen to voices before selecting them
1151
- - 🎚️ **Voice Customization** - Add custom voices, set pretext, control speech rate
1152
- - 📋 **Voice Management** - List, switch, replay, and manage your voice library
1153
- - 🔇 **Mute Control** - Mute/unmute TTS output with persistent settings
1154
- - 🌍 **Multilingual Support** - Voices in 30+ languages across all providers
1155
-
1156
- **Installation Confirmation:**
1157
- ✅ The skill is **automatically included** in the AgentVibes npm package at:
1158
- ```
1159
- node_modules/agentvibes/.clawdbot/skill/SKILL.md
1160
- ```
1161
-
1162
- No extra setup needed - when you run `npx agentvibes install` on your OpenClaw server, the skill is ready to use!
1163
-
1164
- **Full Skill Documentation:**
1165
- **[→ View Complete AgentVibes Skill Guide](.clawdbot/skill/SKILL.md)** - 430+ lines covering:
1166
- - Quick start with 50+ voice options
1167
- - Background music & effects management
1168
- - Personality system (19+ styles)
1169
- - Voice effects (reverb, reverb, EQ)
1170
- - Speed & verbosity control
1171
- - Remote SSH audio setup
1172
- - Troubleshooting & complete reference
1173
-
1174
- **Popular Voice Examples:**
1175
- ```bash
1176
- # Female voices
1177
- npx agentvibes speak "Hello" --voice en_US-amy-medium
1178
- npx agentvibes speak "Bonjour" --voice fr_FR-siwis-medium
1179
-
1180
- # Male voices
1181
- npx agentvibes speak "Hello" --voice en_US-lessac-medium
1182
- npx agentvibes speak "Good day" --voice en_GB-alan-medium
1183
-
1184
- # Add personality!
1185
- bash ~/.claude/hooks/personality-manager.sh set sarcastic
1186
- bash ~/.claude/hooks/play-tts.sh "Oh wonderful, another request"
1187
- ```
1188
-
1189
- ---
1190
-
1191
- #### Component 2: AgentVibes Receiver (Local/Phone) ⚠️ REQUIRED
1192
-
1193
- **CRITICAL: You MUST install AgentVibes on your phone (or local machine) to receive and play audio!**
1194
-
1195
- Without this, audio cannot be heard - the server generates TTS but needs a receiver to play it.
1196
-
1197
- **Install on Android Phone (Termux):**
1198
-
1199
- 1. **Install Termux from F-Droid** (NOT Google Play):
1200
- - Download: https://f-droid.org/en/packages/com.termux/
1201
-
1202
- 2. **Install Node.js in Termux:**
1203
- ```bash
1204
- pkg update && pkg upgrade
1205
- pkg install nodejs-lts
1206
- ```
1207
-
1208
- 3. **Install AgentVibes in Termux:**
1209
- ```bash
1210
- npx agentvibes install
1211
- ```
1212
-
1213
- 4. **Install Termux:API** (for audio playback):
1214
- - Download: https://f-droid.org/en/packages/com.termux.api/
1215
- - Then in Termux: `pkg install termux-api`
1216
-
1217
- **Install on Local Mac/Linux:**
1218
-
1219
- ```bash
1220
- npx agentvibes install
1221
- ```
1222
-
1223
- **Why is this needed?**
1224
- - The **server generates TTS** but has no speakers (headless)
1225
- - AgentVibes on your **phone acts as the audio receiver** via SSH tunnel
1226
- - Audio tunnels from server → SSH → phone → speakers 🔊
1227
-
1228
- Without AgentVibes installed on the receiving device, you'll generate audio but hear nothing!
1229
-
1230
- #### How It Works: Server → SSH Tunnel → Local Playback
1231
-
1232
- ```
1233
- ┌─────────────────────────────────────────────────────────┐
1234
- │ 1. User messages OpenClaw via Telegram/WhatsApp │
1235
- │ "Tell me about the weather" │
1236
- └─────────────────────────────────────────────────────────┘
1237
-
1238
- ┌─────────────────────────────────────────────────────────┐
1239
- │ 2. OpenClaw (Server) processes request with Claude │
1240
- │ AgentVibes skill generates TTS audio │
1241
- └─────────────────────────────────────────────────────────┘
1242
-
1243
- ┌─────────────────────────────────────────────────────────┐
1244
- │ 3. Audio tunnels through SSH → PulseAudio (port 14713)│
1245
- │ Server: PULSE_SERVER=tcp:localhost:14713 │
1246
- └─────────────────────────────────────────────────────────┘
1247
-
1248
- ┌─────────────────────────────────────────────────────────┐
1249
- │ 4. Local AgentVibes receives and plays audio │
1250
- │ Phone speakers, laptop speakers, etc. │
1251
- │ 🔊 "The weather is sunny and 72 degrees" │
1252
- └─────────────────────────────────────────────────────────┘
1253
- ```
1254
-
1255
- **Architecture:**
1256
- - **Server (OpenClaw)**: Generates TTS, sends via PulseAudio
1257
- - **SSH Tunnel**: RemoteForward port 14713 (encrypted transport)
1258
- - **Local (Termux/Desktop)**: AgentVibes receives audio, plays on speakers
1259
-
1260
- This creates a **Siri-like experience** - message from anywhere, hear responses on your phone! 📱🎤
1261
-
1262
- ### 📝 Usage
1263
-
1264
- #### Basic TTS Commands
1265
-
1266
- ```bash
1267
- # Basic TTS
1268
- npx agentvibes speak "Hello from OpenClaw"
1269
-
1270
- # With different voices
1271
- npx agentvibes speak "Hello" --voice en_US-amy-medium
1272
- npx agentvibes speak "Bonjour" --voice fr_FR-siwis-medium
1273
-
1274
- # List available voices
1275
- npx agentvibes voices
1276
- ```
1277
-
1278
- #### Advanced: Direct Hook Usage with Voice Override
1279
-
1280
- For programmatic control, use the TTS hook directly:
1281
-
1282
- ```bash
1283
- # Basic: Use default voice
1284
- bash ~/.claude/hooks/play-tts.sh "Hello from OpenClaw"
1285
-
1286
- # Advanced: Override voice per message
1287
- bash ~/.claude/hooks/play-tts.sh "Welcome message" "en_US-amy-medium"
1288
- bash ~/.claude/hooks/play-tts.sh "Bonjour!" "fr_FR-siwis-medium"
1289
- bash ~/.claude/hooks/play-tts.sh "British greeting" "en_GB-alan-medium"
1290
- ```
1291
-
1292
- **Parameters:**
1293
- - `$1` - **TEXT** (required): Message to speak
1294
- - `$2` - **VOICE** (optional): Voice name to override default
1295
-
1296
- #### Audio Effects Configuration for OpenClaw
1297
-
1298
- **File**: `.claude/config/audio-effects.cfg`
1299
-
1300
- Customize audio effects, background music, and voice processing per agent or use default settings:
1301
-
1302
- **Format:**
1303
- ```
1304
- AGENT_NAME|SOX_EFFECTS|BACKGROUND_FILE|BACKGROUND_VOLUME
1305
- ```
1306
-
1307
- **Example Configuration:**
1308
-
1309
- ```bash
1310
- # Default - subtle background music
1311
- default||agentvibes_soft_flamenco_loop.mp3|0.30
1312
-
1313
- # Custom agent with reverb + background
1314
- MyAgent|reverb 40 50 90 gain -2|agentvibes_soft_flamenco_loop.mp3|0.20
1315
-
1316
- # Agent with pitch shift and EQ
1317
- Assistant|pitch -100 equalizer 3000 1q +2|agentvibes_dark_chill_step_loop.mp3|0.15
1318
- ```
1319
-
1320
- **Available SOX Effects:**
1321
-
1322
- | Effect | Syntax | Example | Description |
1323
- |--------|--------|---------|-------------|
1324
- | **Reverb** | `reverb <reverberance> <HF-damping> <room-scale>` | `reverb 40 50 90` | Adds room ambiance (light: 30 40 70, heavy: 50 60 100) |
1325
- | **Pitch** | `pitch <cents>` | `pitch -100` | Shift pitch (100 cents = 1 semitone, negative = lower) |
1326
- | **Equalizer** | `equalizer <freq> <width>q <gain-dB>` | `equalizer 3000 1q +2` | Boost/cut frequencies (bass: 200Hz, treble: 4000Hz) |
1327
- | **Gain** | `gain <dB>` | `gain -2` | Adjust volume (negative = quieter, positive = louder) |
1328
- | **Compand** | `compand <attack,decay> <threshold:in,out>` | `compand 0.3,1 6:-70,-60,-20` | Dynamic range compression (makes quiet parts louder) |
1329
-
1330
- **Background Music Tracks:**
1331
-
1332
- Built-in tracks available in `.claude/audio/tracks/`:
1333
- - `agentvibes_soft_flamenco_loop.mp3` - Warm, rhythmic flamenco
1334
- - `agentvibes_dark_chill_step_loop.mp3` - Modern chill electronic
1335
- - (50+ additional tracks available)
1336
-
1337
- **Background Volume:**
1338
- - `0.10` - Very subtle (10%)
1339
- - `0.20` - Subtle (20%)
1340
- - `0.30` - Moderate (30%, recommended default)
1341
- - `0.40` - Noticeable (40%, party mode)
1342
-
1343
- **Example: OpenClaw Custom Configuration**
1344
-
1345
- Create `.claude/config/audio-effects.cfg` on your OpenClaw server:
1346
-
1347
- ```bash
1348
- # OpenClaw assistant - warm voice with subtle reverb
1349
- OpenClaw|reverb 30 40 70 gain -1|agentvibes_soft_flamenco_loop.mp3|0.25
1350
-
1351
- # Help desk agent - clear, bright voice
1352
- HelpDesk|equalizer 4000 1q +3 compand 0.2,0.5 6:-70,-60,-20|agentvibes_dark_chill_step_loop.mp3|0.15
1353
-
1354
- # Default fallback
1355
- default||agentvibes_soft_flamenco_loop.mp3|0.30
1356
- ```
1357
-
1358
- **How AgentVibes Applies Effects:**
1359
-
1360
- 1. **Generate TTS** - Create base audio with Piper TTS
1361
- 2. **Apply SOX effects** - Process audio (reverb, EQ, pitch, etc.)
1362
- 3. **Mix background** - Blend background music at specified volume
1363
- 4. **Tunnel via SSH** - Send processed audio to local receiver
1364
- 5. **Play on device** - Output to phone/laptop speakers
1365
-
1366
- This allows **per-message customization** or **consistent agent branding** with unique audio signatures!
1367
-
1368
- ### 🔊 Remote SSH Audio
1369
-
1370
- Perfect for running OpenClaw on a remote server with audio on your local machine:
1371
-
1372
- **Quick Setup:**
1373
-
1374
- 1. **Remote server** - Configure PulseAudio:
1375
- ```bash
1376
- echo 'export PULSE_SERVER=tcp:localhost:14713' >> ~/.bashrc
1377
- source ~/.bashrc
1378
- ```
1379
-
1380
- 2. **Local machine** - Add SSH tunnel (`~/.ssh/config`):
1381
- ```
1382
- Host your-server
1383
- RemoteForward 14713 localhost:14713
1384
- ```
1385
-
1386
- 3. **Connect and test**:
1387
- ```bash
1388
- ssh your-server
1389
- agentvibes speak "Testing remote audio from OpenClaw"
1390
- ```
1391
-
1392
- Audio plays on your local speakers! 🔊
1393
-
1394
- ### 📚 Documentation
1395
-
1396
- - **OpenClaw Skill**: [.clawdbot/README.md](.clawdbot/README.md)
1397
- - **OpenClaw Website**: https://openclaw.ai/
1398
- - **Remote Audio Setup**: [docs/remote-audio-setup.md](docs/remote-audio-setup.md)
1399
- - **Security Hardening**: [docs/security-hardening-guide.md](docs/security-hardening-guide.md) ⚠️
1400
-
1401
- [↑ Back to top](#-table-of-contents)
1402
-
1403
- ---
1404
-
1405
- ## 🎙️ AgentVibes Receiver: Remote Audio Streaming from Voiceless Servers
1406
-
1407
- **Receive and play TTS audio from servers that have no audio output!**
1408
-
1409
- AgentVibes Receiver is a lightweight audio client that runs on your phone, tablet, or personal computer, which receives TTS audio from remote voiceless servers, where your OpenClaw Personal Assistant or your Claude Code project is installed.
1410
-
1411
- ### 🎯 What AgentVibes Receiver Solves
1412
-
1413
- You have OpenClaw running on a Mac mini or remote server with **no audio output**:
1414
- - 🖥️ Mac mini (silent)
1415
- - 🖥️ Ubuntu server (headless)
1416
- - ☁️ AWS/DigitalOcean instance
1417
- - 📦 Docker container
1418
- - 🪟 WSL (Windows Subsystem for Linux)
1419
-
1420
- Users message you via WhatsApp, Telegram, Discord but only get text responses:
1421
- - No voice = Less engaging experience
1422
- - ❌ No personality = Feels robotic
1423
- - No audio cues = Miss important context
1424
-
1425
- **AgentVibes Receiver transforms this:**
1426
- - ✅ OpenClaw speaks with voice (Siri-like experience)
1427
- - Audio streams to your device automatically
1428
- - ✅ You hear responses on your speakers
1429
- - Users get a conversational AI experience
1430
-
1431
- ### 🔧 How It Works
1432
-
1433
- **One-time setup:**
1434
- 1. Install AgentVibes on your voiceless server with OpenClaw
1435
- 2. Install AgentVibes Receiver on your personal device (phone/tablet/laptop)
1436
- 3. Connect via SSH tunnel (or Tailscale VPN)
1437
- 4. Done - automatic from then on
1438
-
1439
- **Flow diagram:**
1440
- ```
1441
- ┌──────────────────────────────────────────┐
1442
- Your Mac mini / Server │
1443
- (OpenClaw + AgentVibes) │
1444
- Generates TTS audio │
1445
- Sends via SSH tunnel │
1446
- └──────────────────────────────────────────┘
1447
- Encrypted SSH tunnel
1448
- ┌──────────────────────────────────────────┐
1449
- Your Phone / Laptop │
1450
- (AgentVibes Receiver) │
1451
- Receives audio stream (or text stream) │
1452
- │ • Auto-plays on device speakers │
1453
- └──────────────────────────────────────────┘
1454
- ```
1455
-
1456
- **Real-world example:**
1457
- ```
1458
- 📱 WhatsApp: "Tell me about quantum computing"
1459
-
1460
- 🖥️ Mac mini: OpenClaw processes + generates TTS
1461
- SSH tunnel (audio or text stream)
1462
- 📱 Your phone (Agent Vibes Receiver): Plays audio 🔊
1463
-
1464
- You hear on your device speakers: "Quantum computing uses quantum bits..."
1465
-
1466
- 💬 Conversation feels alive!
1467
- ```
1468
-
1469
- ### Key Features
1470
-
1471
- | Feature | Benefit |
1472
- |---------|---------|
1473
- | **One-Time Pairing** | SSH key setup, automatic reconnect |
1474
- | **Real-Time Streaming** | Low-latency audio playback |
1475
- | **SSH Encryption** | Secure audio tunnel |
1476
- | **Tailscale Support** | Easy VPN for remote servers |
1477
- | **Voice Selection** | Configure server-side voice |
1478
- | **Audio Effects** | Reverb, echo, pitch on server |
1479
- | **Cache Tracking** | Monitor audio generation |
1480
- | **Multiple Servers** | Connect to different OpenClaw instances |
1481
-
1482
- ### 🚀 Perfect For
1483
-
1484
- - 🖥️ **Mac mini + OpenClaw** - Home server with professional voices
1485
- - ☁️ **Remote Servers** - OpenClaw on AWS/GCP/DigitalOcean
1486
- - 📱 **WhatsApp/Telegram** - Users message, hear responses
1487
- - 🎓 **Discord Bots** - Bot speaks with voices
1488
- - 🏗️ **Docker/Containers** - Containerized OpenClaw with audio
1489
- - 🔧 **WSL Development** - Windows developers using voiceless WSL
1490
-
1491
- ### 📝 Setup
1492
-
1493
- ```bash
1494
- # On your server (Mac mini, Ubuntu, AWS, etc.)
1495
- npx agentvibes install
1496
- # Selects OpenClaw option
1497
- # AgentVibes installs with SSH-Remote provider
1498
-
1499
- # On your personal device (phone, laptop, tablet)
1500
- npx agentvibes receiver setup
1501
- # Pairing prompt with server SSH key
1502
- # Done!
1503
- ```
1504
-
1505
- ### 📚 Documentation
1506
-
1507
- **[→ View AgentVibes Receiver Setup Guide](docs/agentvibes-receiver.md)** - Pairing, SSH configuration, Tailscale setup, troubleshooting
1508
-
1509
- **[→ View OpenClaw Integration Guide](docs/openclaw-integration.md)** - Server setup, voice configuration, audio effects, and best practices
1510
-
1511
- [↑ Back to top](#-table-of-contents)
1512
-
1513
- ---
1514
-
1515
- ## 📦 Installation Structure
1516
-
1517
- **What gets installed:** Commands, hooks, personalities, and plugins in `.claude/` directory.
1518
-
1519
- **[→ View Complete Installation Structure](docs/installation-structure.md)** - Full directory tree, file descriptions, and settings storage
1520
-
1521
- [↑ Back to top](#-table-of-contents)
1522
-
1523
- ---
1524
-
1525
- ## 💡 Common Workflows
1526
-
1527
- ```bash
1528
- # Switch voices
1529
- /agent-vibes:list # See all voices
1530
- /agent-vibes:switch Aria # Change voice
1531
-
1532
- # Try personalities
1533
- /agent-vibes:personality pirate # Pirate voice + style
1534
- /agent-vibes:personality list # See all 19 personalities
1535
-
1536
- # Speak in other languages
1537
- /agent-vibes:set-language spanish # Speak in Spanish
1538
- /agent-vibes:set-language list # See 30+ languages
1539
-
1540
- # Replay audio
1541
- /agent-vibes:replay # Replay last message
1542
- ```
1543
-
1544
- **💡 Tip:** Using MCP? Just say "Switch to Aria voice" or "Speak in Spanish" instead of typing commands.
1545
-
1546
- [↑ Back to top](#-table-of-contents)
1547
-
1548
- ---
1549
-
1550
- ## 🔧 Advanced Features
1551
-
1552
- AgentVibes supports **custom personalities** and **custom voices**.
1553
-
1554
- **Quick Examples:**
1555
- ```bash
1556
- # Create custom personality
1557
- /agent-vibes:personality add mycustom
1558
-
1559
- # Add custom Piper voice
1560
- /agent-vibes:add "My Voice" abc123xyz789
1561
-
1562
- # Use in custom output styles
1563
- [Bash: .claude/hooks/play-tts.sh "Starting" "Aria"]
1564
- ```
1565
-
1566
- **[→ View Advanced Features Guide](docs/advanced-features.md)** - Custom personalities, custom voices, and more
1567
-
1568
- [↑ Back to top](#-table-of-contents)
1569
-
1570
- ---
1571
-
1572
- ## 🔊 Remote Audio Setup
1573
-
1574
- **Running AgentVibes on a remote server?** No problem!
1575
-
1576
- **Auto-detects SSH sessions** - Works with VS Code Remote SSH, regular SSH, cloud dev environments
1577
- ✅ **Zero configuration** - Audio optimizes automatically
1578
- **No static/clicking** - Clean playback through SSH tunnels
1579
-
1580
- **[→ Remote Audio Setup Guide](docs/remote-audio-setup.md)** - Full PulseAudio configuration details
1581
-
1582
- [↑ Back to top](#-table-of-contents)
1583
-
1584
- ---
1585
-
1586
- ## 🛠️ Technical Documentation
1587
-
1588
- ### Audio Architecture
1589
-
1590
- AgentVibes uses a cross-platform audio module (`src/console/audio-env.js`) that handles player detection and environment configuration for all supported platforms.
1591
-
1592
- #### Platform Audio Support Matrix
1593
-
1594
- | Platform | PulseAudio Config | MP3 Players (preference order) | WAV Players (preference order) |
1595
- |----------|-------------------|-------------------------------|-------------------------------|
1596
- | **Native Linux** | System default (not overridden) | ffplay → play (sox) → mpg123 → cvlc → mpv | aplay → paplay → play → ffplay |
1597
- | **WSL2** | Auto-detects `/mnt/wslg/PulseServer` | Same as Linux | Same as Linux |
1598
- | **macOS** | Not applicable | ffplay play mpg123 cvlc mpv → afplay | aplay → paplay → play → ffplay → afplay |
1599
- | **Windows** | Not applicable | ffplay → mpv (if installed) | ffplay → mpv → PowerShell SoundPlayer (built-in) |
1600
-
1601
- #### Key Design Decisions
1602
-
1603
- - **Direct spawn, not shell chains**: Audio players are spawned directly via Node's `spawn()` instead of `sh -c 'cmd1 || cmd2'` chains. VLC/cvlc crashes when stderr is redirected inside shell wrappers.
1604
- - **Player detection at startup**: The available player is detected once using `which` and cached. No runtime fallback chains.
1605
- - **PULSE_SERVER safety**: The WSL2 PulseServer path (`/mnt/wslg/PulseServer`) is only set when the socket file actually exists. Hardcoding it on native Linux silently breaks audio output.
1606
- - **Windows WAV fallback**: PowerShell's `System.Media.SoundPlayer` is used as a built-in fallback when no cross-platform player is installed.
1607
-
1608
- #### Multi-Speaker Voice Models
1609
-
1610
- Piper supports multi-speaker ONNX models (e.g., `16Speakers.onnx`) that contain multiple voices in a single file. AgentVibes expands these automatically:
1611
-
1612
- - The `.onnx.json` metadata file contains `num_speakers` and `speaker_id_map`
1613
- - `scanInstalledVoices()` expands multi-speaker models into individual selectable entries (e.g., `16Speakers::Cori_Samuel`)
1614
- - When selected, the system writes `tts-piper-model.txt` and `tts-piper-speaker-id.txt` to `.claude/`
1615
- - `play-tts-piper.sh` reads these files and passes `--speaker <id>` to the piper binary
1616
-
1617
- #### Voice Directory Resolution
1618
-
1619
- Voice storage follows the same precedence chain in both JavaScript and shell:
1620
-
1621
- 1. `PIPER_VOICES_DIR` environment variable
1622
- 2. Project-local `.claude/piper-voices-dir.txt` (walks up directory tree)
1623
- 3. Global `~/.claude/piper-voices-dir.txt`
1624
- 4. Default `~/.claude/piper-voices`
1625
-
1626
- #### Voice Catalog System
1627
-
1628
- AgentVibes includes a 914-voice catalog (`voice-assignments.json`) that lets users browse, preview, and install voices directly from the Voices tab:
1629
-
1630
- - **10 Curated Voices** — Hand-picked high-quality voices installed by default
1631
- - **904 LibriTTS Speakers** — Automatically extracted from the `16Speakers` multi-speaker model's `speaker_id_map`, plus the full LibriTTS catalog from Hugging Face
1632
- - **Download on Demand** Uninstalled voices appear greyed-out in the list; pressing Enter opens a download modal that fetches the voice via `piper-voice-manager.sh`
1633
- - **Catalog Metadata** — Each entry includes `voiceId`, `displayName`, `gender`, `type` (curated/libritts), and download URL
1634
- - **LibriTTS Speaker Names** Raw numeric IDs are patched at load time using `patchLibriTTSSpeakerNames()` which maps speaker IDs to human-readable names from the registry
1635
-
1636
- The catalog is loaded once at tab initialization by `loadCatalog()`. Installed voices (from disk scan) are shown with full color; catalog-only voices are dimmed until downloaded.
1637
-
1638
- #### Required System Dependencies for Background Music
1639
-
1640
- Background music requires an MP3-capable audio player. The installer detects missing players and offers to install `ffmpeg` automatically. If no player is found, the Music tab displays a clear error message.
1641
-
1642
- ```bash
1643
- # Install ffmpeg (recommended — provides ffplay)
1644
- # Ubuntu/Debian/WSL2:
1645
- sudo apt install ffmpeg
1646
-
1647
- # macOS:
1648
- brew install ffmpeg
1649
-
1650
- # Arch Linux:
1651
- sudo pacman -S ffmpeg
1652
- ```
1653
-
1654
- [↑ Back to top](#-table-of-contents)
1655
-
1656
- ---
1657
-
1658
- ## 🔗 Useful Links
1659
-
1660
- ### Voice & AI Tools
1661
-
1662
- - 🎤 **[WhisperTyping](https://whispertyping.com/)** - Fast voice-to-text typing for developers
1663
- - 🗣️ **[OpenWhisper (Azure)](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/whisper-overview)** - Microsoft's speech-to-text service
1664
- - 🆓 **[Piper TTS](https://github.com/rhasspy/piper)** - Free offline neural TTS
1665
- - 🤖 **[Claude Code](https://claude.com/claude-code)** - AI coding assistant
1666
- - 🎭 **[BMAD METHOD](https://github.com/bmad-code-org/BMAD-METHOD)** - Multi-agent framework
1667
-
1668
- ### AgentVibes Resources
1669
-
1670
- - 🐛 **[Issues](https://github.com/paulpreibisch/AgentVibes/issues)** - Report bugs
1671
- - 📝 **[Changelog](https://github.com/paulpreibisch/AgentVibes/releases)** - Version history
1672
- - 📰 **[Technical Deep Dive - LinkedIn Article](https://www.linkedin.com/pulse/agent-vibes-add-voice-claude-code-deep-dive-npx-paul-preibisch-8zrcc/)** - How AgentVibes works under the hood
1673
-
1674
- [↑ Back to top](#-table-of-contents)
1675
-
1676
- ---
1677
-
1678
- ## ❓ Troubleshooting
1679
-
1680
- **Common Issues:**
1681
-
1682
- **❌ Error: "git-lfs is not installed"**
1683
-
1684
- **AgentVibes does NOT require git-lfs.** This error suggests:
1685
-
1686
- 1. **Wrong installation method** - Use npm, not git clone:
1687
- ```bash
1688
- # CORRECT - Use this:
1689
- npx agentvibes install
1690
-
1691
- # ❌ WRONG - Don't clone unless contributing:
1692
- git clone https://github.com/paulpreibisch/AgentVibes.git
1693
- ```
1694
-
1695
- 2. **Different project** - You may be in a BMAD-METHOD or other repo that uses git-lfs
1696
-
1697
- 3. **Global git config** - Your git may have lfs enabled globally:
1698
- ```bash
1699
- git config --global --list | grep lfs
1700
- ```
1701
-
1702
- **Solution:** Use `npx agentvibes install` - no git operations needed!
1703
-
1704
- ---
1705
-
1706
- **No Audio Playing?**
1707
- 1. Verify hook is installed: `ls -la .claude/hooks/session-start-tts.sh`
1708
- 2. Test: `/agent-vibes:sample Aria`
1709
-
1710
- **Commands Not Found?**
1711
- ```bash
1712
- npx agentvibes install --yes
1713
- ```
1714
-
1715
- **[→ View Complete Troubleshooting Guide](docs/troubleshooting.md)** - Solutions for audio issues, command problems, MCP errors, voice issues, and more
1716
-
1717
- [↑ Back to top](#-table-of-contents)
1718
-
1719
- ---
1720
-
1721
- ## 🔄 Updating
1722
-
1723
- **Quick Update (From Claude Code):**
1724
- ```bash
1725
- /agent-vibes:update
1726
- ```
1727
-
1728
- **Alternative Methods:**
1729
- ```bash
1730
- # Via npx
1731
- npx agentvibes update --yes
1732
-
1733
- # Via npm (if installed globally)
1734
- npm update -g agentvibes && agentvibes update --yes
1735
- ```
1736
-
1737
- **Check Version:** `/agent-vibes:version`
1738
-
1739
- **[ View Complete Update Guide](docs/updating.md)** - All update methods, version checking, what gets updated, and troubleshooting
1740
-
1741
- [↑ Back to top](#-table-of-contents)
1742
-
1743
- ---
1744
-
1745
- ## 🗑️ Uninstalling
1746
-
1747
- **Quick Uninstall (Project Only):**
1748
- ```bash
1749
- npx agentvibes uninstall
1750
- ```
1751
-
1752
- **Uninstall Options:**
1753
- ```bash
1754
- # Interactive uninstall (confirms before removing)
1755
- npx agentvibes uninstall
1756
-
1757
- # Auto-confirm (skip confirmation prompt)
1758
- npx agentvibes uninstall --yes
1759
-
1760
- # Also remove global configuration
1761
- npx agentvibes uninstall --global
1762
-
1763
- # Complete uninstall including Piper TTS
1764
- npx agentvibes uninstall --global --with-piper
1765
- ```
1766
-
1767
- **What Gets Removed:**
1768
-
1769
- **Project-level (default):**
1770
- - `.claude/commands/agent-vibes/` - Slash commands
1771
- - `.claude/hooks/` - TTS scripts
1772
- - `.claude/personalities/` - Personality templates
1773
- - `.claude/output-styles/` - Output styles
1774
- - `.claude/audio/` - Audio cache
1775
- - `.claude/tts-*.txt` - TTS configuration files
1776
- - `.agentvibes/` - BMAD integration files
1777
-
1778
- **Global (with `--global` flag):**
1779
- - `~/.claude/` - Global configuration
1780
- - `~/.agentvibes/` - Global cache
1781
-
1782
- **Piper TTS (with `--with-piper` flag):**
1783
- - `~/piper/` - Piper TTS installation
1784
-
1785
- **To Reinstall:**
1786
- ```bash
1787
- npx agentvibes install
1788
- ```
1789
-
1790
- **💡 Tips:**
1791
- - Default uninstall only removes project-level files
1792
- - Use `--global` if you want to completely reset AgentVibes
1793
- - Use `--with-piper` if you also want to remove the Piper TTS engine
1794
- - Run `npx agentvibes status` to check installation status
1795
-
1796
- [↑ Back to top](#-table-of-contents)
1797
-
1798
- ---
1799
-
1800
- ## Frequently Asked Questions (FAQ)
1801
-
1802
- ### Installation & Setup
1803
-
1804
- **Q: Does AgentVibes require git-lfs?**
1805
- **A:** **NO.** AgentVibes has zero git-lfs requirement. Use `npx agentvibes install` - no git operations needed.
1806
-
1807
- **Q: Do I need to clone the GitHub repository?**
1808
- **A:** **NO** (unless you're contributing code). Normal users should use `npx agentvibes install`. Repository cloning is only for developers who want to contribute to the project.
1809
-
1810
- **Q: Why is the GitHub repo so large?**
1811
- **A:** The repo includes demo files and development dependencies (node_modules). The actual npm package you download is **< 50MB** and optimized for users.
1812
-
1813
- **Q: What's the difference between npm install and git clone?**
1814
- **A:**
1815
- - `npx agentvibes install` **For users** - Downloads pre-built package, zero git operations, instant setup
1816
- - `git clone ...` **For developers only** - Full source code, development setup, contributing code
1817
-
1818
- **Q: I saw an error about git-lfs, is something wrong?**
1819
- **A:** You're likely:
1820
- 1. Using wrong installation method (use `npx` not `git clone`)
1821
- 2. In a different project directory that uses git-lfs
1822
- 3. Have global git config with lfs enabled
1823
-
1824
- AgentVibes itself does NOT use or require git-lfs.
1825
-
1826
- ### Features & Usage
1827
-
1828
- **Q: Does MCP consume tokens from my context window?**
1829
- **A:** **YES.** Every MCP tool schema adds to the context window. AgentVibes MCP is designed to be minimal (~1500-2000 tokens), but if you're concerned about token usage, you can use slash commands instead of MCP.
1830
-
1831
- **Q: What's the difference between using MCP vs slash commands?**
1832
- **A:**
1833
- - **MCP**: Natural language ("Switch to Aria voice"), uses ~1500-2000 context tokens
1834
- - **Slash commands**: Explicit commands (`/agent-vibes:switch Aria`), zero token overhead
1835
-
1836
- Both do the exact same thing - MCP is more convenient, slash commands are more token-efficient.
1837
-
1838
- **Q: Is AgentVibes just a bash script?**
1839
- **A:** No. AgentVibes includes:
1840
- - Multi-provider TTS abstraction (Piper TTS, macOS Say)
1841
- - Voice management system with 50+ voices
1842
- - Personality & sentiment system
1843
- - Language learning mode with bilingual playback
1844
- - Audio effects processing (reverb, EQ, compression)
1845
- - MCP server for natural language control
1846
- - BMAD integration for multi-agent voice switching
1847
- - Remote audio optimization for SSH/RDP sessions
1848
-
1849
- **Q: Can I use AgentVibes without BMAD?**
1850
- **A:** **YES.** AgentVibes works standalone. BMAD integration is optional - only activates if you install BMAD separately.
1851
-
1852
- **Q: What are the audio dependencies?**
1853
- **A:**
1854
- - **Required**: Node.js 16+, Python 3.10+ (for Piper TTS)
1855
- - **Optional**: sox (audio effects), ffmpeg (background music, padding)
1856
- - All TTS generation works without optional dependencies - they just enhance the experience
1857
-
1858
- ### Voice Features
1859
-
1860
- **Q: How do I browse and install voices?**
1861
- **A:** Use the built-in TUI installer by running `/audio-browser` in Claude Code. Navigate with arrow keys, press ENTER to sample voices, and select one to install. AgentVibes switches to the chosen voice automatically.
1862
-
1863
- **Q: What are friendly voice names?**
1864
- **A:** Instead of technical IDs like `en_US-ryan-high`, you can now use simple names like "Ryan" when switching voices. All 904+ voices have friendly names matched to their characteristics.
1865
-
1866
- **Q: How do I set up custom intro text?**
1867
- **A:** During installation you'll be prompted for intro text. You can also configure it anytime via `npx agentvibes` → Settings tab. Enter text like "FireBot: " and it will prefix all TTS announcements.
1868
-
1869
- **Q: Can I use my own background music?**
1870
- **A:** Yes! Run `npx agentvibes` and open the Music tab. Select "Change music" and provide the path to your audio file (.mp3, .wav, .ogg, or .m4a). Files are validated for security and must be under 50MB.
1871
-
1872
- **Q: What's the recommended duration for custom music?**
1873
- **A:** Between 30-90 seconds is ideal for smooth looping. The system supports up to 300 seconds (5 minutes) but will warn you if the duration is non-optimal.
1874
-
1875
- **Q: Are friendly voice names case-sensitive?**
1876
- **A:** No! You can type "ryan", "Ryan", or "RYAN" - they all work. The voice resolution is case-insensitive.
1877
-
1878
- **Q: Does custom music work with all TTS providers?**
1879
- **A:** Yes! Custom background music works with Piper TTS, Soprano, macOS Say, and Windows SAPI.
1880
-
1881
- **Q: Can I preview music before setting it as my background?**
1882
- **A:** Yes! In `npx agentvibes` Music tab, select "Preview current" to hear your music. During installation, you can also sample all built-in tracks.
1883
-
1884
- **Q: What security measures protect custom music uploads?**
1885
- **A:** AgentVibes implements **defense-in-depth security with 7 validation layers**, tested against 180+ attack variations:
1886
-
1887
- 1. **Path Validation** - `path.resolve()` prevents traversal attacks (../, encoded, Unicode)
1888
- 2. **Home Directory Boundary** - Files must be within your home directory
1889
- 3. **File Existence Check** - Verifies file actually exists
1890
- 4. **File Type Verification** - Must be a regular file (not device, socket, etc.)
1891
- 5. **Ownership Verification** - File must be owned by you (UID check)
1892
- 6. **Format Validation** - Magic number checking ensures real audio files
1893
- 7. **Secure Storage** - Files copied to restricted directory with 600 permissions
1894
-
1895
- **Security Certification:**
1896
- - ✅ 100% attack rejection rate (107/107 tests passed)
1897
- - OWASP CWE-22 compliant (path traversal prevention)
1898
- - No information disclosure in error messages
1899
- - ✅ Production-ready and certified secure
1900
-
1901
- See full security audit: `docs/security/SECURITY-AUDIT.md`
1902
-
1903
- **Q: Has the security been independently verified?**
1904
- **A:** Yes! AgentVibes v3.6.0 includes a comprehensive security audit with 180+ attack variations tested. All path traversal, symlink, Unicode, null byte, and edge case attacks were successfully blocked (100% rejection rate). The system is OWASP CWE-22 compliant and includes a detailed security audit report at `docs/security/SECURITY-AUDIT.md`.
1905
-
1906
- **Q: What attack patterns were tested?**
1907
- **A:** The security test suite covers:
1908
- - **Path Traversal:** 100 variations (basic, URL-encoded, Unicode, null bytes, mixed)
1909
- - **Symlink Attacks:** 10 variations (sensitive files, chains, traversal targets)
1910
- - **Hard Link Attacks:** 5 variations (ownership verification)
1911
- - **Edge Cases:** 65+ variations (CRLF, whitespace, Unicode normalization, platform-specific)
1912
-
1913
- Every attack was correctly rejected with no information disclosure.
1914
-
1915
- ### Troubleshooting
1916
-
1917
- **Q: Why isn't Claude speaking?**
1918
- **A:** Common causes:
1919
- 1. Hook not installed - Run `npx agentvibes install --yes`
1920
- 2. Audio player missing - Install `sox` and `ffmpeg`
1921
- 3. TTS protocol not enabled in settings
1922
- 4. Test with `/agent-vibes:sample Aria`
1923
-
1924
- **Q: Can I use this on Windows?**
1925
- **A:** Yes! AgentVibes supports **native Windows** with PowerShell scripts (Soprano, Piper, SAPI providers). See [Windows Native Setup](WINDOWS-SETUP.md). WSL is also supported for legacy workflows - see [Windows WSL Guide](mcp-server/WINDOWS_SETUP.md).
1926
-
1927
- **Q: How do I reduce token usage?**
1928
- **A:**
1929
- 1. Use slash commands instead of MCP (zero context token overhead)
1930
- 2. Set verbosity to LOW (`/agent-vibes:verbosity low`)
1931
- 3. Disable BMAD integration if not using it
1932
-
1933
- [↑ Back to top](#-table-of-contents)
1934
-
1935
- ---
1936
-
1937
- ## ⚠️ Important Disclaimers
1938
-
1939
- **API Costs & Usage:**
1940
- - Usage is completely free with Piper TTS and Mac Say (no API costs)
1941
- - Users are solely responsible for their own API costs and usage
1942
-
1943
-
1944
- **Third-Party Services:**
1945
- - This project integrates with Piper TTS (local processing) and macOS Say (system built-in)
1946
- - We are **not affiliated with, endorsed by, or officially connected** to Anthropic, Apple, or Claude
1947
- - Piper TTS is subject to its terms of service
1948
-
1949
- **Privacy & Data:**
1950
- - **Piper TTS**: All processing happens locally on your machine, no external data transmission
1951
- - **macOS Say**: All processing happens locally using Apple's built-in speech synthesis
1952
-
1953
- **Software License:**
1954
- - Provided "as-is" under Apache 2.0 License without warranty of any kind
1955
- - See [LICENSE](LICENSE) file for full terms
1956
- - No liability for data loss, bugs, service interruptions, or any damages
1957
-
1958
- **Use at Your Own Risk:**
1959
- - This is open-source software maintained by the community
1960
- - Always test in development before production use
1961
- - Monitor your API usage and costs regularly
1962
-
1963
- [↑ Back to top](#-table-of-contents)
1964
-
1965
- ---
1966
-
1967
- ## 🙏 Credits
1968
-
1969
- **Built with ❤️ by [Paul Preibisch](https://github.com/paulpreibisch)**
1970
-
1971
- - 🐦 Twitter: [@997Fire](https://x.com/997Fire)
1972
- - 💼 LinkedIn: [paul-preibisch](https://www.linkedin.com/in/paul-preibisch/)
1973
- - 🌐 GitHub: [paulpreibisch](https://github.com/paulpreibisch)
1974
-
1975
- **Powered by:**
1976
- - [Piper TTS](https://github.com/rhasspy/piper) - Free neural voices
1977
- - [Soprano TTS](https://github.com/suno-ai/bark) - Ultra-fast neural TTS
1978
- - **Windows SAPI** - Native Windows text-to-speech
1979
- - **macOS Say** - Native macOS text-to-speech
1980
- - [Claude Code](https://claude.com/claude-code) - AI coding assistant
1981
- - Licensed under Apache 2.0
1982
-
1983
- **Contributors:**
1984
- - 🎤 [@nathanchase](https://github.com/nathanchase) - Soprano TTS Provider integration (PR #95) - Ultra-fast neural TTS with GPU acceleration
1985
-
1986
- **Special Thanks:**
1987
- - 💡 [Claude Code Hooks Mastery](https://github.com/disler/claude-code-hooks-mastery) by [@disler](https://github.com/disler) - Hooks inspiration
1988
- - 🤖 [BMAD METHOD](https://github.com/bmad-code-org/BMAD-METHOD) - Multi-agent framework with auto voice switching integration
1989
-
1990
- [↑ Back to top](#-table-of-contents)
1991
-
1992
- ---
1993
-
1994
- ## 🤝 Contributing
1995
-
1996
- If AgentVibes makes your coding more fun:
1997
- - ⭐ **Star this repo** on GitHub
1998
- - 🐦 **Tweet** and tag [@997Fire](https://x.com/997Fire)
1999
- - 🎥 **Share videos** of Claude with personality
2000
- - 💬 **Tell dev friends** about voice-powered AI
2001
-
2002
- ---
2003
-
2004
- **Ready to give Claude a voice? Install now and code with personality! 🎤✨**
2005
-
2006
- [ Back to top](#-table-of-contents)
2007
-
1
+ # 🎤 AgentVibes
2
+
3
+ > **Finally! Your agents can talk back!**
4
+ >
5
+ > 🌐 **[agentvibes.org](https://agentvibes.org)**
6
+ >
7
+ > Professional text-to-speech for **Claude Code**, **Claude Desktop**, **Warp Terminal**, and **OpenClaw** - **Soprano** (Neural), **Piper TTS** (Free!), **macOS Say** (Built-in!), or **Windows SAPI** (Zero Setup!)
8
+
9
+ [![npm version](https://img.shields.io/npm/v/agentvibes)](https://www.npmjs.com/package/agentvibes)
10
+ [![Test Suite](https://github.com/paulpreibisch/AgentVibes/actions/workflows/test.yml/badge.svg)](https://github.com/paulpreibisch/AgentVibes/actions/workflows/test.yml)
11
+ [![Publish](https://github.com/paulpreibisch/AgentVibes/actions/workflows/publish.yml/badge.svg)](https://github.com/paulpreibisch/AgentVibes/actions/workflows/publish.yml)
12
+ [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
13
+
14
+ **Author**: Paul Preibisch ([@997Fire](https://x.com/997Fire)) | **Version**: v4.4
15
+
16
+ ---
17
+
18
+ ## 🚀 Quick Links
19
+
20
+ | I want to... | Go here |
21
+ |--------------|---------|
22
+ | **Install AgentVibes** (just `npx`, no git!) | [Quick Start Guide](docs/quick-start.md) |
23
+ | **Run Claude Code on Android** | [Android/Termux Setup](#-android--termux) |
24
+ | **Secure OpenClaw on Remote Server** | [Security Hardening Guide](docs/security-hardening-guide.md) ⚠️ |
25
+ | **Understand what I need** | [Prerequisites](#-prerequisites) |
26
+ | **Set up on Windows (Native)** | [Windows Native Setup](WINDOWS-SETUP.md) |
27
+ | **Set up on Windows (Claude Desktop/WSL)** | [Windows WSL Guide](mcp-server/WINDOWS_SETUP.md) |
28
+ | **Use with OpenClaw** | [OpenClaw Integration](#-openclaw-integration) |
29
+ | **Use natural language** | [MCP Setup](docs/mcp-setup.md) |
30
+ | **Switch voices** | [Voice Library](docs/voice-library.md) |
31
+ | **Fix issues** (git-lfs? MCP tokens? Read this!) | [Troubleshooting](docs/troubleshooting.md) & [FAQ](#-frequently-asked-questions-faq) |
32
+
33
+ ---
34
+
35
+ ## ✨ What is AgentVibes?
36
+
37
+ **AgentVibes adds lively voice narration to your Claude AI sessions!**
38
+
39
+ Whether you're coding in Claude Code, chatting in Claude Desktop, using Warp Terminal, or running OpenClaw - AgentVibes brings AI to life with professional voices and personalities.
40
+
41
+ ---
42
+
43
+ ## 🌟 NEW IN v4.4Full Platform Parity Release
44
+
45
+ ### 🪟 Windows MCP Parity27/27 Tools Working
46
+
47
+ All MCP tools now work natively on Windows. Previously 12 tools silently failed due to missing scripts:
48
+
49
+ - **6 new PowerShell scripts** personality-manager, speed-manager, language-manager, learn-manager, verbosity-manager, clean-audio-cache
50
+ - **Unified provider naming** — `piper` and `sapi` on all platforms (no more `windows-piper`/`windows-sapi`)
51
+ - **replay command** added to voice-manager for Windows
52
+ - **Adversarial review** — 24 issues found, 10 fixed (3 CRITICAL, 4 HIGH, 3 MEDIUM)
53
+ - **28 new tests** covering script parity, effects round-trip, provider management, and naming consistency
54
+ - **Feature-platform matrix** — [docs/feature-platform-matrix.md](docs/feature-platform-matrix.md) tracks all 85 features across Linux, macOS, Windows, and WSL
55
+
56
+ ### Bug Fixes (HIGH)
57
+ - ffmpeg stderr redirected to temp file instead of literal `"NUL"` file
58
+ - `AGENTVIBES_NO_PLAY` env var properly cleaned up on error paths
59
+ - `PIPER_SPEAKER` env var no longer leaks between voice switches
60
+ - Provider config now uses project-local `.claude` (not always global)
61
+ - Text sanitization relaxed`$50 (USD)` no longer becomes `50 USD`
62
+
63
+ ---
64
+
65
+ ## 🌟 v4.3Windows Parity + BMAD Party Mode
66
+
67
+ ### 🎭 BMAD Party Mode Every Agent Has Its Own Voice
68
+
69
+ The BMad Method (Build More Architect Dreams) is an AI-driven development framework that helps you build software from ideation through agentic implementation with specialized AI agents, guided workflows, and intelligent planning that adapts to your project's complexity.
70
+
71
+ **Every BMAD agent now speaks with their own unique voice, music, and personality.**
72
+
73
+ When party mode runs a multi-agent discussion, the Architect, PM, Developer, QA, and Analyst each sound completely different — making every role immediately recognizable.
74
+
75
+ **Auto-enabled** — if BMAD is installed, party mode activates automatically. Open the BMad Tab to configure each agent:
76
+
77
+ ```bash
78
+ npx agentvibes # Press B to open the BMad Tab
79
+ ```
80
+
81
+ **Per-agent configuration:**
82
+ - 🎙️ **Voice** — 914 voices to choose from, auto-assigned gender-aware
83
+ - 🎵 **Background Music** — Unique ambient track per agent (cinematic, lo-fi, jazz...)
84
+ - 🎚️ **Music Volume** — Per-agent level, or set all at once via Bulk Edit
85
+ - 🎛️ **Reverb**none / room / hall / cathedral / studio per agent
86
+ - 💬 **Pretext**Custom intro phrase ("Winston says:..." before every line)
87
+ - 🎭 **Personality** — sarcastic, dramatic, pirate, cheerful, and more
88
+ - 🔇 **No Overlap** — Speech lock ensures agents never talk over each other
89
+ - ✨ **Markdown-Clean** — Asterisks and formatting stripped before TTS
90
+
91
+ ### 🎛️ BMad Tab Visual Agent Configurator
92
+
93
+ The `npx agentvibes` TUI now includes a full **BMad Tab** for managing every agent visually — inspired by the Voices tab, with the same columns and navigation polish:
94
+
95
+ ```bash
96
+ npx agentvibes # Press B for BMad Tab
97
+ ```
98
+
99
+ | Agent | Voice | Gender | Provider | Reverb | Music | Vol | Pretext |
100
+ |-------|-------|--------|----------|--------|-------|-----|---------|
101
+ | 🏢 Winston | Rose Ibex | Female | Piper (LibriTTS) | studio | jazz | 65% | Winston says |
102
+ | 🧠 Larry | Kusal | Male | Piper | hall | cinematic | 80% | Larry says |
103
+
104
+ **Highlights:**
105
+ - **Beautified voice names** — `16Speakers::Rose_Ibex` shows as `Rose Ibex`; `en_US-kusal-medium` shows as `Kusal`
106
+ - **Gender & Provider columns**see voice metadata at a glance, just like the Voices tab
107
+ - **Inline row hints** — navigate to any agent and see `[Space] Preview [Enter] Configure` on the row itself
108
+ - **Preview spinner** — animated `⠋⠙⠹⠸` braille spinner while audio plays
109
+
110
+ | Key | Action |
111
+ |-----|--------|
112
+ | `↑↓` / `jk` | Navigate agents |
113
+ | `Space` | Preview agent (spinner shows while playing) |
114
+ | `Enter` | Configure voice, music, volume, reverb, personality, pretext |
115
+ | `A` | Auto-assign unique voices (gender-aware, no repeats) |
116
+ | `B` | Bulk Edit — set music / volume / pretext / reverb for all agents |
117
+ | `X` | Reset agent to defaults |
118
+
119
+ ---
120
+
121
+ ### 🖥️ SSH Receiver — Hear Your Headless Server
122
+
123
+ **Run Claude on a cloud box and hear the TTS on your local machine.**
124
+
125
+ The new **Receiver Tab** streams TTS audio from voiceless remote servers to your local machine over TCP — perfect for AWS/GCP dev boxes, WSL2, and SSH sessions.
126
+
127
+ ```bash
128
+ # On your local machine — open TUI, go to Receiver tab, click Start
129
+ npx agentvibes
130
+
131
+ # On the remote server — AgentVibes auto-detects the receiver and streams
132
+ ```
133
+
134
+ Zero-config forwarding. Works with Piper, macOS Say, and Soprano.
135
+
136
+ ---
137
+
138
+ ### TTS Latency -~1 Second
139
+
140
+ - **Batched Node.js calls** — 6 separate profile reads collapsed into 1 (~900ms saved)
141
+ - **inotifywait queue** file-event-based worker, no polling delay
142
+ - **Background cache cleanup** — off the critical path every 10th call
143
+
144
+ ---
145
+
146
+ ### 🎨 ANSI Banner Colors + Toggle
147
+
148
+ Full color in the TTS banner (gold voice, cyan reverb, traffic-light cache). Hide it without muting:
149
+
150
+ ```bash
151
+ touch ~/.agentvibes/banner-disabled # or say "turn off the TTS banner"
152
+ ```
153
+
154
+ ---
155
+
156
+ ### 💬 Intro Text (Pretext) - Your Personal AI Branding
157
+
158
+ **Add custom prefixes to every TTS announcement!**
159
+
160
+ Configure via the AgentVibes TUI Settings tab:
161
+
162
+ ```bash
163
+ npx agentvibes # Navigate to Settings tab
164
+ ```
165
+
166
+ Transform generic AI responses into your personal brand:
167
+
168
+ **Before:**
169
+ ```
170
+ "Starting analysis of the codebase..."
171
+ ```
172
+
173
+ **After (with "FireBot: " intro text):**
174
+ ```
175
+ "FireBot: Starting analysis of the codebase..."
176
+ ```
177
+
178
+ **Perfect for:**
179
+ - 🤖 **Personal AI Branding** - Make Claude sound like your custom assistant
180
+ - 🏢 **Team Identity** - Company bots with branded voices
181
+ - 🎮 **Character Roleplay** - Gaming assistants with character names
182
+ - 🎓 **Teaching Contexts** - Professor Bot, Tutor AI, etc.
183
+
184
+ **Features:**
185
+ - Up to 50 characters
186
+ - UTF-8 and emoji support 🎉
187
+ - Set during installation or anytime after
188
+ - Works with all TTS providers
189
+ - Applies to every single announcement
190
+
191
+ **Examples:**
192
+ - `"JARVIS: "` - Iron Man style
193
+ - `"🤖 Assistant: "` - With emoji
194
+ - `"CodeBot: "` - Development assistant
195
+ - `"Chef AI: "` - Cooking helper
196
+
197
+ Configure via: `npx agentvibes` → Settings tab
198
+
199
+ ---
200
+
201
+ ### 🎵 Custom Background Music - Complete Audio Control
202
+
203
+ **Upload your own background music with battle-tested security!**
204
+
205
+ Configure via the AgentVibes TUI Music tab:
206
+
207
+ ```bash
208
+ npx agentvibes # Navigate to Music tab
209
+ ```
210
+
211
+ Replace the default background tracks with your own audio files.
212
+
213
+ **Supported Formats:**
214
+ - 🎵 MP3 (.mp3)
215
+ - 🎵 WAV (.wav)
216
+ - 🎵 OGG (.ogg)
217
+ - 🎵 M4A (.m4a)
218
+
219
+ **Security First:**
220
+ - **180+ attack variations tested** - Path traversal, symlinks, Unicode tricks
221
+ - ✅ **100% attack rejection rate** - Every malicious attempt blocked
222
+ - **OWASP CWE-22 compliant** - Industry-standard security
223
+ - **7 validation layers** - Defense-in-depth architecture
224
+ - **File ownership verification** - Only your files accepted
225
+ - ✅ **Magic number validation** - Real audio files only
226
+ - ✅ **Secure storage** - 600 permissions, restricted directory
227
+
228
+ **Smart Validation:**
229
+ - Recommended duration: 30-90 seconds (optimal looping)
230
+ - Maximum: 300 seconds (5 minutes)
231
+ - Maximum size: 50MB
232
+ - Automatic format detection
233
+ - Duration warnings for non-optimal lengths
234
+
235
+ **Perfect for:**
236
+ - 🎮 **Making coding fun** - Your favorite beats while you build
237
+ - 🎼 **Setting the mood** - Match the music to the task (lo-fi for debugging, epic for shipping)
238
+ - 🗂️ **Identifying projects** - Different track per repo so you always know which project Claude is in
239
+ - 🎹 **Deep focus** - Ambient or classical to stay in flow
240
+
241
+ **Features:**
242
+ - Preview before setting
243
+ - One-command upload
244
+ - Works with all TTS providers
245
+ - Loops seamlessly under voice
246
+ - Easy restore to defaults
247
+
248
+ **Menu Options:**
249
+ 1. Change music - Upload new audio file
250
+ 2. Remove music - Clear custom music
251
+ 3. Reset to default - Restore built-in tracks (16 genres)
252
+ 4. Enable/Disable - Toggle background music
253
+ 5. Preview current - Sample your music
254
+
255
+ Configure via: `npx agentvibes` Music tab
256
+
257
+ **Security Certified:** See full audit report at `docs/security/SECURITY-AUDIT.md`
258
+
259
+ ---
260
+
261
+ ### 🎯 Key Features
262
+
263
+ **🌟 v4.2 BMAD Party Mode & SSH Receiver:**
264
+ - 🎭 **BMAD Party Mode Voices** — Each agent speaks with their unique voice, music, reverb, personality
265
+ - 🖥️ **SSH Receiver Tab** — Stream TTS audio from headless servers to your local machine over TCP
266
+ - 🎛️ **BMad Tab (TUI)** Visual agent configurator with auto-assign and bulk edit
267
+ - **TTS Latency -1s** — Batched Node.js calls, inotifywait queue, background cleanup
268
+ - 🎨 **ANSI Banner Colors Restored** — Gold/cyan/traffic-light colors in TTS info banner
269
+ - 🔕 **Banner Toggle** — Hide TTS banner without muting (`~/.agentvibes/banner-disabled`)
270
+ - 🔇 **No Party Mode Overlap** — Agents wait for full audio before next speaks
271
+ - 🧹 **Markdown-Clean Speech** Asterisks/formatting stripped automatically from party mode
272
+
273
+ **🌟 NEW IN v3.6.0 Voice Explorer Release:**
274
+ - 🏷️ **Friendly Voice Names** - "Ryan" instead of "en_US-libritts_r-medium-speaker-123"
275
+ - 💬 **Intro Text (Pretext)** - Custom prefix for all TTS ("FireBot: Starting...")
276
+ - 🎵 **Custom Background Music** - Upload your own audio files with battle-tested security
277
+ - 🎨 **Interactive Installer** - Preview voices and music during installation
278
+ - 🛡️ **Security Hardening** - 180+ attack variations tested, 100% blocked, OWASP compliant
279
+
280
+ **🪟 NEW IN v3.5.5 Native Windows Support:**
281
+ - 🖥️ **Windows Native TTS** - Soprano, Piper, and Windows SAPI providers. No WSL required!
282
+ - 🎵 **Background Music** - 16 genre tracks mixed under voice
283
+ - 🎛️ **Reverb & Audio Effects** - 5 reverb levels via ffmpeg
284
+ - 🔊 **Verbosity Control** - High, Medium, or Low settings
285
+ - 🎨 **Beautiful Installer** - `npx agentvibes install` or `.\setup-windows.ps1`
286
+
287
+ **⚡ v3.4.0 Highlights:**
288
+ - 🎤 **Soprano TTS Provider** - Ultra-fast neural TTS with 20x CPU, 2000x GPU acceleration (thanks [@nathanchase](https://github.com/nathanchase)!)
289
+ - 🛡️ **Security Hardening** - 9.5/10 score with comprehensive validation and timeouts
290
+ - 🌐 **Environment Intelligence** - PulseAudio tunnel auto-detection for SSH scenarios
291
+
292
+ **⚡ Core Features:**
293
+ - ⚡ **One-Command Install** - Get started in 30 seconds (`npx agentvibes install` or `.\setup-windows.ps1` without Node.js)
294
+ - 🎭 **Multi-Provider Support** - Soprano (neural), Piper TTS (50+ free voices), macOS Say (100+ built-in), or Windows SAPI
295
+ - 🎙️ **27+ Professional AI Voices** - Character voices, accents, and unique personalities
296
+ - 🎙️ **Verbosity Control** - Choose how much Claude speaks (LOW, MEDIUM, HIGH)
297
+ - 🎙️ **AgentVibes MCP** - Natural language control ("Switch to Aria voice") for Claude Code, Desktop & Warp
298
+ - 🔊 **SSH Audio Optimization** - Auto-detects remote sessions and eliminates static (VS Code Remote SSH, cloud dev)
299
+
300
+ **🎭 Personalization:**
301
+ - 🎭 **19 Built-in Personalities** - From sarcastic to flirty, pirate to dry humor
302
+ - 💬 **Advanced Sentiment System** - Apply personality styles to ANY voice without changing it
303
+ - 🎵 **Voice Preview & Replay** - Listen before you choose, replay last 10 TTS messages
304
+
305
+ **🚀 Integrations & Power Features:**
306
+ - 🔌 **Enhanced BMAD Plugin** - Auto voice switching for BMAD agents with multilingual support
307
+ - 🔊 **Live Audio Feedback** - Hear task acknowledgments and completions in any language
308
+ - 🌍 **30+ Languages** - Multilingual support with native voice quality
309
+ - 🆓 **Free & Open** - Use Piper TTS with no API key required
310
+
311
+ ### 🤗 Hugging Face AI Voice Models
312
+
313
+ **AgentVibes' Piper TTS uses 100% Hugging Face-trained AI voice models** from [rhasspy/piper-voices](https://huggingface.co/rhasspy/piper-voices).
314
+
315
+ **What are Hugging Face voice models?**
316
+
317
+ Hugging Face voice models are pre-trained artificial intelligence models hosted on the Hugging Face Model Hub platform, designed to convert text into human-like speech (Text-to-Speech or TTS) or perform other speech tasks like voice cloning and speech-to-speech translation. They're accessible via their Transformers library for easy use in applications like voice assistants, audio generation, and more.
318
+
319
+ **Key Benefits:**
320
+ - 🎯 **Human-like Speech** - VITS-based neural models for natural pronunciation and intonation
321
+ - 🌍 **35+ Languages** - Multilingual support with native accents
322
+ - 🆓 **100% Open Source** - All Piper voices are free HF models (Tacotron2, FastSpeech2, VITS)
323
+ - 🔧 **Developer-Friendly** - Fine-tune, customize, or deploy for various audio projects
324
+ - **Offline & Fast** - No API keys, no internet needed once installed
325
+
326
+ All 50+ Piper voices AgentVibes provides are sourced from Hugging Face's open-source AI voice models, ensuring high-quality, natural-sounding speech synthesis across all supported platforms.
327
+
328
+ ---
329
+
330
+ ## 📑 Table of Contents
331
+
332
+ ### Getting Started
333
+ - [🚀 Quick Start](#-quick-start) - Get voice in 30 seconds (3 simple steps)
334
+ - [📱 Android/Termux](#-quick-setup-android--termux-claude-code-on-your-phone) - Run Claude Code on your phone
335
+ - [📋 Prerequisites](#-prerequisites) - What you actually need (Node.js + optional tools)
336
+ - [✨ What is AgentVibes?](#-what-is-agentvibes) - Overview & key features
337
+ - [🌟 NEW FEATURE HIGHLIGHTS](#-new-feature-highlights) - **START HERE!**
338
+ - [🎭 BMAD Party Mode](#-bmad-party-mode--multi-agent-voice-conversations) - Per-agent voices, music, reverb
339
+ - [🖥️ SSH Receiver](#️-agentvibes-receiver--remote-audio-streaming) - Stream audio from headless servers
340
+ - [💬 Intro Text](#-intro-text-pretext---your-personal-ai-branding) - Custom TTS prefixes
341
+ - [🎵 Custom Background Music](#-custom-background-music---complete-audio-control) - Upload your own tracks
342
+ - [📰 Latest Release](#-latest-release) - v4.3 "Windows Parity" — background music, voice selection, ffmpeg auto-install on Windows
343
+ - [🪟 Windows Setup Guide for Claude Desktop](mcp-server/WINDOWS_SETUP.md) - Complete Windows installation with WSL & Python
344
+
345
+ ### AgentVibes MCP (Natural Language Control)
346
+ - [🎙️ AgentVibes MCP Overview](#%EF%B8%8F-agentvibes-mcp) - **Easiest way** - Natural language commands
347
+ - [For Claude Desktop](docs/mcp-setup.md#for-claude-desktop) - Windows/WSL setup, Python requirements
348
+ - [For Warp Terminal](docs/mcp-setup.md#for-warp-terminal) - Warp configuration
349
+ - [For Claude Code](docs/mcp-setup.md#for-claude-code) - Project-specific setup
350
+
351
+ ### Core Features
352
+ - [🎤 Commands Reference](#-commands-reference) - All available commands
353
+ - [🎙️ Verbosity Control](#%EF%B8%8F-verbosity-control) - Control how much Claude speaks (low/medium/high)
354
+ - [🎭 Personalities vs Sentiments](#-personalities-vs-sentiments) - Two systems explained
355
+ - [🗣️ Voice Library](#%EF%B8%8F-voice-library) - 914 voices with friendly names
356
+ - [🔌 BMAD Plugin](#-bmad-plugin) - Auto voice switching for BMAD agents
357
+ - [🎙️ AgentVibes Receiver - NEW!](#%EF%B8%8F-agentvibes-receiver-remote-audio-streaming-from-voiceless-servers) - Remote audio streaming from voiceless servers
358
+
359
+ ### Integrations & Platforms
360
+ - [🤖 OpenClaw Integration](#-openclaw-integration) - Use AgentVibes with OpenClaw messaging platform
361
+ - [🎙️ AgentVibes Skill for OpenClaw](#-agentvibes-skill-for-openclaw---what-you-get) - 50+ voices, effects, personalities for OpenClaw
362
+ - [📱 AgentVibes Receiver](#-agentvibes-receiver-local-phone-) - Remote audio on phones/local machines
363
+
364
+ ### Advanced Topics
365
+ - [📦 Installation Structure](#-installation-structure) - What gets installed
366
+ - [💡 Common Workflows](#-common-workflows) - Quick examples
367
+ - [🔧 Advanced Features](#-advanced-features) - Custom voices & personalities
368
+ - [🔊 Remote Audio Setup](#-remote-audio-setup) - Play TTS from remote servers
369
+ - [🛠️ Technical Documentation](#️-technical-documentation) - Audio architecture, cross-platform support, voice resolution
370
+ - [🚨 Security Hardening Guide](docs/security-hardening-guide.md) - **REQUIRED if running OpenClaw on remote server**: SSH hardening, Fail2Ban, Tailscale, UFW, AIDE
371
+ - [🔬 Technical Deep Dive](docs/technical-deep-dive.md) - How AgentVibes works under the hood
372
+ - [❓ Troubleshooting](#-troubleshooting) - Common issues & fixes
373
+
374
+ ### Additional Resources
375
+ - [🔗 Useful Links](#-useful-links) - Voice typing & AI tools
376
+ - [🔄 Updating](#-updating) - Keep AgentVibes current
377
+ - [🗑️ Uninstalling](#️-uninstalling) - Remove AgentVibes cleanly
378
+ - [❓ FAQ](#-frequently-asked-questions-faq) - **NEW!** Common questions answered (git-lfs, MCP tokens, installation)
379
+ - [🍎 macOS Testing](docs/macos-testing.md) - Automated testing on macOS with GitHub Actions
380
+ - [🤗 Hugging Face Voice Models](docs/hugging-face-models.md) - Technical details on AI voice models
381
+ - [🙏 Credits](#-credits) - Acknowledgments
382
+ - [🤝 Contributing](#-contributing) - Show support
383
+
384
+ ---
385
+
386
+ ## 📰 Latest Release
387
+
388
+ **[v4.3 - "Windows Parity" Release](https://github.com/paulpreibisch/AgentVibes/releases/tag/v4.3)** 🎉
389
+
390
+ This is the biggest AgentVibes release since the TUI launched in v4.0. Two headline features: **BMAD Party Mode** gives every agent their own voice and music, and the **SSH Receiver** lets you hear your headless server speak on your local machine.
391
+
392
+ ### 🎭 BMAD Party Mode Multi-Agent Voice Conversations
393
+
394
+ The BMad Method (Build More Architect Dreams) is an AI-driven development framework module that helps you build software from ideation through agentic implementation with specialized AI agents, guided workflows, and intelligent planning.
395
+
396
+ Every agent in a BMAD discussion now speaks with their own individually configured voice, music, reverb, and personality — making the Architect, PM, Developer, QA, and Analyst immediately recognizable the moment they speak.
397
+
398
+ **Auto-enabled** party mode activates automatically when BMAD is detected. Configure agents visually:
399
+
400
+ ```bash
401
+ npx agentvibes # Press B for BMad Tab
402
+ ```
403
+
404
+ **Each agent gets:**
405
+ - 🎙️ **Their own voice** — 914 to choose from, or auto-assign gender-aware
406
+ - 🎵 **Their own music track** cinematic for the Architect, lo-fi for the Dev
407
+ - 🎚️ **Their own volume** — fine-tune per-agent, or bulk-set all at once
408
+ - 🎛️ **Their own reverb** — studio, hall, cathedral, room, or none
409
+ - 💬 **Their own pretext** — "Winston says:..." before every line
410
+ - 🎭 **Their own personality** — sarcastic, dramatic, pirate, cheerful...
411
+ - 🔇 **No overlap** agents wait for full audio before the next one speaks
412
+ - ✨ **Markdown stripped** — no "asterisk asterisk" in TTS output
413
+
414
+ ### 🎛️ BMad Tab Full Visual Agent Configurator
415
+
416
+ Manage every agent from an interactive table — same polish as the Voices tab:
417
+
418
+ | Key | Action |
419
+ |-----|--------|
420
+ | `Space` | Preview agent with full profile (animated spinner while playing) |
421
+ | `Enter` | Configure voice, music, volume, reverb, personality, pretext |
422
+ | `A` | Auto-assign unique voices (gender-aware, no repeats) |
423
+ | `B` | Bulk Edit set music / volume / pretext / reverb for all agents |
424
+ | `X` | Reset agent to defaults |
425
+
426
+ The table shows **Voice, Gender, Provider, Reverb, Music, Vol, Pretext** columns. Voice names are automatically beautified: `16Speakers::Rose_Ibex` → `Rose Ibex`.
427
+
428
+ ### 🖥️ SSH Receiver — Hear Your Headless Server
429
+
430
+ Stream TTS from a cloud box, WSL2, or any voiceless server directly to your local machine over TCP:
431
+
432
+ ```bash
433
+ # Local: open TUI → Receiver tab → Start
434
+ npx agentvibes
435
+
436
+ # Remote: AgentVibes auto-detects the receiver and streams audio to you
437
+ ```
438
+
439
+ ### ⚡ ~1 Second Faster TTS
440
+
441
+ - 6 Node.js profile reads collapsed into 1 (~900ms saved per speech)
442
+ - `inotifywait` queue worker — no polling delay
443
+ - Cache cleanup runs off the critical path
444
+
445
+ ### 🎨 ANSI Colors Restored + Banner Toggle
446
+
447
+ Full color in the TTS banner. Silence it without muting audio:
448
+ ```bash
449
+ touch ~/.agentvibes/banner-disabled # or: "turn off the TTS banner" via MCP
450
+ ```
451
+
452
+ ### Quick Install
453
+
454
+ ```bash
455
+ npx agentvibes install
456
+ ```
457
+
458
+ 💡 **Tip:** If `npx agentvibes` shows an older version: `npm cache clean --force && npx agentvibes@latest`
459
+
460
+ 🐛 **Found a bug?** [GitHub Issues](https://github.com/paulpreibisch/AgentVibes/issues)
461
+
462
+ [→ View Complete Release Notes](RELEASE_NOTES.md) | [→ View Previous Release (v4.0.1)](https://github.com/paulpreibisch/AgentVibes/releases/tag/v4.0.1) | [→ View All Releases](https://github.com/paulpreibisch/AgentVibes/releases)
463
+
464
+ [↑ Back to top](#-table-of-contents)
465
+
466
+ ---
467
+
468
+ ## 🎙️ AgentVibes MCP
469
+
470
+ Agent Vibes was originally created to give the Claude Code assistant a voice! Simply install it with an npx command in your terminal, and Claude Code can talk back to you.
471
+
472
+ We've now enhanced this capability by adding an MCP (Model Context Protocol) server. This integration exposes Agent Vibes' functionality directly to your AI assistant, allowing you to configure and control Agent Vibes using natural language instead of typing "/" slash commands.
473
+
474
+ Setting it up is straightforward: just add the MCP server to your Claude Code configuration files.
475
+
476
+ But the convenience doesn't stop there. With the MCP server in place, Claude Desktop can now use Agent Vibes too! We've even tested it successfully with Warp, an AI assistant that helps you navigate Windows and other operating systems.
477
+
478
+ We're thrilled about this expansion because it means Claude Desktop and Warp can finally talk back as well!
479
+
480
+ If you decide to use the MCP server on Claude Desktop, after configuration, give Claude Desktop this command: "every time i give you a command, speak the acknowledgement using agentvibes and the confirmation about what you completed, when done"—and watch the magic happen!
481
+
482
+ **🎯 Control AgentVibes with natural language - no slash commands to remember!**
483
+
484
+ Just say "Switch to Aria voice" or "Speak in Spanish" instead of typing commands.
485
+
486
+ **Works in:** Claude Desktop, Claude Code, Warp Terminal
487
+
488
+ **[→ View Complete MCP Setup Guide](docs/mcp-setup.md)** - Full setup for all platforms, configuration examples, available tools, and MCP vs slash commands comparison
489
+
490
+ [↑ Back to top](#-table-of-contents)
491
+
492
+ ---
493
+
494
+ ## 🚀 Quick Start - Get Voice in 30 Seconds
495
+
496
+ **3 Simple Steps:**
497
+
498
+ ### 1️⃣ Install
499
+ ```bash
500
+ npx agentvibes install
501
+ ```
502
+
503
+ ### 2️⃣ Choose Provider (Auto-Detected)
504
+ - **macOS**: Native `say` provider (100+ voices) ✨
505
+ - **Linux/WSL**: Piper TTS (50+ free voices) 🎙️
506
+ - **Windows Native**: Soprano, Piper, or SAPI 🪟
507
+ - **Android**: Termux with auto-setup 📱
508
+
509
+ ### 3️⃣ Use in Claude Code
510
+ Just code normally - AgentVibes automatically speaks task acknowledgments and completions! 🔊
511
+
512
+ ---
513
+
514
+ ### TUI Console Commands
515
+
516
+ AgentVibes includes a full **Text User Interface (TUI)** built with blessed.js for managing voices, music, settings, and installation — all from a single interactive console.
517
+
518
+ | Command | Description |
519
+ |---------|-------------|
520
+ | `npx agentvibes` | Smart detection — opens Settings if installed, Install if not |
521
+ | `npx agentvibes install` | Open the Install tab directly |
522
+ | `npx agentvibes config` | Open the Settings tab directly |
523
+
524
+ Once inside, use **Tab** / **Shift+Tab** to switch between tabs: **Voices**, **Music**, **BMad**, **Settings**, **Receiver**, and **Install**. Use **[** / **]** to page through voice and music catalogs.
525
+
526
+ ---
527
+
528
+ **🍎 macOS Users (One-Time Setup):**
529
+ ```bash
530
+ brew install bash # Required for bash 5.x features
531
+ ```
532
+ macOS ships with bash 3.2 (from 2007). After this, everything works perfectly!
533
+
534
+ ---
535
+
536
+ **[→ Full Setup Guide](docs/quick-start.md)** - Advanced options, provider switching, and detailed setup
537
+
538
+ [↑ Back to top](#-table-of-contents)
539
+
540
+ [↑ Back to top](#-table-of-contents)
541
+
542
+ ---
543
+
544
+ ## 📋 Prerequisites - What You Actually Need
545
+
546
+ ### Minimum (Core Features)
547
+ **✅ REQUIRED:**
548
+ - **Node.js** ≥16.0 - Check with: `node --version`
549
+
550
+ ### Required for Full Features
551
+ **✅ STRONGLY RECOMMENDED:**
552
+ - **Python** 3.10+ - Needed for Piper TTS voice engine
553
+ - **bash** 5.0+ - macOS only (macOS ships with 3.2 from 2007)
554
+
555
+ ### Optional but Recommended
556
+ **⭕ OPTIONAL (TTS still works without them):**
557
+ - **sox** - Audio effects (reverb, EQ, pitch shifting)
558
+ - **ffmpeg** - Background music, audio padding, RDP compression
559
+
560
+ ### NOT Required (Despite What You've Heard)
561
+ **❌ DEFINITELY NOT NEEDED:**
562
+ - ❌ Git or git-lfs (npm handles everything)
563
+ - ❌ Repository cloning (unless you're contributing code)
564
+ - Build tools or C++ compilers (pre-built package ready to use)
565
+
566
+ ### Installation Methods
567
+
568
+ | Method | Command | Use Case |
569
+ |--------|---------|----------|
570
+ | **✅ RECOMMENDED: NPX (via npm)** | `npx agentvibes install` | **All platforms** - Just want to use AgentVibes |
571
+ | **🪟 Windows PowerShell** | `.\setup-windows.ps1` | **Windows** - Standalone installer (no Node.js needed) |
572
+ | **⚠️ Git Clone** | `git clone ...` | **Developers Only** - Contributing code |
573
+
574
+ **Why npx?** Zero git operations, no build steps, just 30 seconds to voice!
575
+
576
+ ### For Developers (Contributing Code)
577
+
578
+ If you want to contribute to AgentVibes:
579
+ ```bash
580
+ git clone https://github.com/paulpreibisch/AgentVibes.git
581
+ cd AgentVibes
582
+ npm install
583
+ npm link
584
+ ```
585
+
586
+ Requires: Node.js 16+, Git (no git-lfs), and `npm link` familiarity.
587
+
588
+ [↑ Back to top](#-table-of-contents)
589
+
590
+ ---
591
+
592
+ ---
593
+
594
+ ## 📱 Quick Setup: Android & Termux (Claude Code on Your Phone!)
595
+
596
+ **Want to run Claude Code on your Android phone with professional voices?**
597
+
598
+ Simply install Termux from F-Droid (NOT Google Play) and run:
599
+ ```bash
600
+ pkg update && pkg upgrade
601
+ pkg install nodejs-lts
602
+ npx agentvibes install
603
+ ```
604
+
605
+ Termux auto-detects and installs everything needed (proot-distro for compatibility, Piper TTS, audio playback).
606
+
607
+ **[→ Full Android/Termux Setup Guide](#-android--termux)** - Detailed troubleshooting and verification steps
608
+
609
+ [↑ Back to top](#-table-of-contents)
610
+
611
+ ---
612
+
613
+ ## 📋 System Requirements
614
+
615
+ AgentVibes requires certain system dependencies for optimal audio processing and playback. Requirements vary by operating system and TTS provider.
616
+
617
+ ### Core Requirements (All Platforms)
618
+
619
+ | Tool | Required For | Why It's Needed |
620
+ |------|-------------|-----------------|
621
+ | **Node.js** ≥16.0 | All platforms | Runtime for AgentVibes installer and MCP server |
622
+ | **Bash** ≥5.0 | macOS | Modern bash features (macOS ships with 3.2 from 2007) |
623
+ | **Python** 3.10+ | Piper TTS, MCP server | Runs Piper voice engine and MCP server |
624
+
625
+ ### Audio Processing Tools (Recommended)
626
+
627
+ | Tool | Status | Purpose | Impact if Missing |
628
+ |------|--------|---------|------------------|
629
+ | **sox** | Recommended | Audio effects (reverb, EQ, pitch, compression) | No audio effects, still works |
630
+ | **ffmpeg** | Recommended | Background music mixing, audio padding, RDP compression | No background music or RDP optimization |
631
+
632
+ ### Platform-Specific Requirements
633
+
634
+ #### 🐧 Linux / WSL
635
+
636
+ ```bash
637
+ # Ubuntu/Debian
638
+ sudo apt-get update
639
+ sudo apt-get install -y sox ffmpeg python3-pip pipx
640
+
641
+ # Fedora/RHEL
642
+ sudo dnf install -y sox ffmpeg python3-pip pipx
643
+
644
+ # Arch Linux
645
+ sudo pacman -S sox ffmpeg python-pip python-pipx
646
+ ```
647
+
648
+ **Audio Playback** (one of the following):
649
+ - `paplay` (PulseAudio - usually pre-installed)
650
+ - `aplay` (ALSA - fallback)
651
+ - `mpg123` (fallback)
652
+ - `mpv` (fallback)
653
+
654
+ **Why these tools?**
655
+ - **sox**: Applies audio effects defined in `.claude/config/audio-effects.cfg` (reverb, pitch shifting, EQ, compression)
656
+ - **ffmpeg**: Mixes background music tracks, adds silence padding to prevent audio cutoff, compresses audio for RDP/SSH sessions
657
+ - **paplay/aplay**: Plays generated TTS audio files
658
+ - **pipx**: Isolated Python environment manager for Piper TTS installation
659
+
660
+ #### 🍎 macOS
661
+
662
+ ```bash
663
+ # Install Homebrew if not already installed
664
+ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
665
+
666
+ # Required: Modern bash
667
+ brew install bash
668
+
669
+ # Recommended: Audio processing tools
670
+ brew install sox ffmpeg pipx
671
+ ```
672
+
673
+ **Audio Playback**:
674
+ - `afplay` (built-in - always available)
675
+ - `say` (built-in - for macOS TTS provider)
676
+
677
+ **Why these tools?**
678
+ - **bash 5.x**: macOS ships with bash 3.2 which lacks associative arrays and other modern features AgentVibes uses
679
+ - **sox**: Same audio effects processing as Linux
680
+ - **ffmpeg**: Same background music and padding as Linux
681
+ - **afplay**: Built-in macOS audio player
682
+ - **say**: Built-in macOS text-to-speech (alternative to Piper)
683
+
684
+ #### 🪟 Windows
685
+
686
+ **Option A: Native Windows (Recommended)**
687
+
688
+ AgentVibes now supports native Windows with three TTS providers. No WSL required!
689
+
690
+ ```powershell
691
+ # Interactive Node.js installer (recommended)
692
+ npx agentvibes install
693
+
694
+ # Or use the standalone PowerShell installer
695
+ .\setup-windows.ps1
696
+ ```
697
+
698
+ **Providers available natively:**
699
+ - **Soprano** - Ultra-fast neural TTS (best quality, requires `pip install soprano-tts`)
700
+ - **Windows Piper** - High quality offline neural voices (auto-downloaded)
701
+ - **Windows SAPI** - Built-in Windows voices (zero setup)
702
+
703
+ **Requirements:** Node.js 16+, PowerShell 5.1+, ffmpeg (optional, for background music & reverb)
704
+
705
+ See [Windows Native Setup Guide](WINDOWS-SETUP.md) for full instructions.
706
+
707
+ **Option B: WSL (Legacy)**
708
+
709
+ For Claude Desktop or WSL-based workflows, follow the [Windows WSL Guide](mcp-server/WINDOWS_SETUP.md).
710
+
711
+ ```powershell
712
+ # Install WSL from PowerShell (Administrator)
713
+ wsl --install -d Ubuntu
714
+ ```
715
+
716
+ Then follow Linux requirements above inside WSL.
717
+
718
+ #### 🤖 Android / Termux
719
+
720
+ **Running Claude Code on Your Android Using Termux**
721
+
722
+ AgentVibes fully supports Android devices through the [Termux app](https://termux.dev/). This enables you to run Claude Code with professional TTS voices directly on your Android phone or tablet!
723
+
724
+ **Quick Setup:**
725
+
726
+ ```bash
727
+ # 1. Install Termux from F-Droid (NOT Google Play - it's outdated)
728
+ # Download: https://f-droid.org/en/packages/com.termux/
729
+
730
+ # 2. Install Node.js in Termux
731
+ pkg update && pkg upgrade
732
+ pkg install nodejs-lts
733
+
734
+ # 3. Install AgentVibes (auto-detects Android and runs Termux installer)
735
+ npx agentvibes install
736
+ ```
737
+
738
+ **What Gets Installed?**
739
+
740
+ The Termux installer automatically sets up:
741
+ - **proot-distro** with Debian (for glibc compatibility)
742
+ - **Piper TTS** via proot wrapper (Android uses bionic libc, not glibc)
743
+ - **termux-media-player** for audio playback (`paplay` doesn't work on Android)
744
+ - **Audio dependencies**: ffmpeg, sox, bc for processing
745
+ - **termux-api** for Android-specific audio routing
746
+
747
+ **Why Termux Instead of Standard Installation?**
748
+
749
+ Android's architecture requires special handling:
750
+ - Standard pip/pipx fails (missing wheels for bionic libc)
751
+ - ❌ Linux binaries require glibc (Android uses bionic)
752
+ - `/tmp` directory is not accessible on Android
753
+ - Standard audio tools like `paplay` don't exist
754
+
755
+ Termux installer solves all these issues with proot-distro and Android-native audio playback!
756
+
757
+ **Requirements:**
758
+ - [Termux app](https://f-droid.org/en/packages/com.termux/) (from F-Droid, NOT Google Play)
759
+ - [Termux:API](https://f-droid.org/en/packages/com.termux.api/) (for audio playback)
760
+ - Android 7.0+ (recommended: Android 10+)
761
+ - ~500MB free storage (for Piper TTS + voice models)
762
+
763
+ **Audio Playback:**
764
+ - Uses `termux-media-player` instead of `paplay`
765
+ - Audio automatically routes through Android's media system
766
+ - Supports all Piper TTS voices (50+ languages)
767
+
768
+ **Verifying Your Setup:**
769
+
770
+ ```bash
771
+ # Check Termux environment
772
+ echo $PREFIX # Should show /data/data/com.termux/files/usr
773
+
774
+ # Check Node.js
775
+ node --version # Should be ≥16.0
776
+
777
+ # Check if Piper is installed
778
+ which piper # Should return /data/data/com.termux/files/usr/bin/piper
779
+
780
+ # Test audio playback
781
+ termux-media-player play /path/to/audio.wav
782
+ ```
783
+
784
+ **Troubleshooting:**
785
+
786
+ | Issue | Solution |
787
+ |-------|----------|
788
+ | "piper: not found" | Run `npx agentvibes install` - auto-detects Termux |
789
+ | No audio playback | Install Termux:API from F-Droid |
790
+ | Permission denied | Run `termux-setup-storage` to grant storage access |
791
+ | Slow installation | Use WiFi, not mobile data (~300MB download) |
792
+
793
+ **Why F-Droid and Not Google Play?**
794
+
795
+ Google Play's Termux version is outdated and unsupported. Always use the [F-Droid version](https://f-droid.org/en/packages/com.termux/) for the latest security updates and compatibility.
796
+
797
+ ### TTS Provider Requirements
798
+
799
+ #### Piper TTS (Free, Offline)
800
+ - **Python** 3.10+
801
+ - **pipx** (for isolated installation)
802
+ - **Disk Space**: ~50MB per voice model
803
+ - **Internet**: Only for initial voice downloads
804
+
805
+ ```bash
806
+ # Installed automatically by AgentVibes
807
+ pipx install piper-tts
808
+ ```
809
+
810
+ #### macOS Say (Built-in, macOS Only)
811
+ - No additional requirements
812
+ - 100+ voices pre-installed on macOS
813
+ - Use: `/agent-vibes:provider switch macos`
814
+
815
+ ### Verifying Your Setup
816
+
817
+ ```bash
818
+ # Check all dependencies
819
+ node --version # Should be ≥16.0
820
+ python3 --version # Should be ≥3.10
821
+ bash --version # Should be ≥5.0 (macOS users!)
822
+ sox --version # Optional but recommended
823
+ ffmpeg -version # Optional but recommended
824
+ pipx --version # Required for Piper TTS
825
+
826
+ # Check audio playback (Linux/WSL)
827
+ paplay --version || aplay --version
828
+
829
+ # Check audio playback (macOS)
830
+ which afplay # Should return /usr/bin/afplay
831
+ ```
832
+
833
+ ### What Happens Without Optional Dependencies?
834
+
835
+ | Missing Tool | Impact | Workaround |
836
+ |-------------|--------|------------|
837
+ | sox | No audio effects (reverb, EQ, pitch) | TTS still works, just no effects |
838
+ | ffmpeg | No background music, no audio padding | TTS still works, audio may cut off slightly early |
839
+ | paplay/aplay | No audio playback on Linux | Install at least one audio player |
840
+
841
+ **All TTS generation still works** - optional tools only enhance the experience!
842
+
843
+ [↑ Back to top](#-table-of-contents)
844
+
845
+ ---
846
+
847
+ ## 🎭 Choose Your Voice Provider
848
+
849
+ **Piper TTS** (free, works offline on Linux/WSL) or **macOS Say** (free, built-in on Mac) - pick one and switch anytime.
850
+
851
+ | Provider | Platform | Cost | Quality | Setup |
852
+ |----------|----------|------|---------|-------|
853
+ | **macOS Say** | macOS only | Free (built-in) | ⭐⭐⭐⭐ | Zero config |
854
+ | **Piper** | Linux/WSL/Windows | Free | ⭐⭐⭐⭐ | Auto-downloads |
855
+ | **Soprano** | Linux/WSL/Windows | Free | ⭐⭐⭐⭐⭐ | `pip install soprano-tts` |
856
+ | **Windows SAPI** | Windows | Free (built-in) | ⭐⭐⭐ | Zero config |
857
+
858
+ On macOS, the native `say` provider is automatically detected and recommended!
859
+
860
+ **[→ Provider Comparison Guide](docs/providers.md)**
861
+
862
+ [↑ Back to top](#-table-of-contents)
863
+
864
+ ---
865
+
866
+ ## 🎤 Commands Reference
867
+
868
+ AgentVibes provides **50+ slash commands** and **natural language MCP equivalents**.
869
+
870
+ **Quick Examples:**
871
+ ```bash
872
+ # Voice control
873
+ /agent-vibes:switch Aria # Or: "Switch to Aria voice"
874
+ /agent-vibes:list # Or: "List all voices"
875
+
876
+ # Personality & sentiment
877
+ /agent-vibes:personality pirate # Or: "Set personality to pirate"
878
+ /agent-vibes:sentiment sarcastic # Or: "Apply sarcastic sentiment"
879
+
880
+ # Language & learning
881
+ /agent-vibes:set-language spanish # Or: "Speak in Spanish"
882
+ /agent-vibes:learn # Or: "Enable learning mode"
883
+ ```
884
+
885
+ **[→ View Complete Command Reference](docs/commands.md)** - All voice, system, personality, sentiment, language, and BMAD commands with MCP equivalents
886
+
887
+ ### Intro Text Commands
888
+
889
+ ```bash
890
+ # Configure intro text — open Settings tab
891
+ npx agentvibes
892
+
893
+ # View current intro text
894
+ cat ~/.claude/config/intro-text.txt
895
+ ```
896
+
897
+ **MCP Equivalent:**
898
+ ```
899
+ "Set my intro text to 'FireBot: '"
900
+ "What's my current intro text?"
901
+ "Clear my intro text"
902
+ ```
903
+
904
+ ### Custom Music Commands
905
+
906
+ ```bash
907
+ # Configure background music open Music tab
908
+ npx agentvibes
909
+ ```
910
+
911
+ **MCP Equivalent:**
912
+ ```
913
+ "Configure my background music"
914
+ "Add custom background music"
915
+ "Remove custom music"
916
+ "Preview my background music"
917
+ ```
918
+
919
+ ### Friendly Voice Name Commands
920
+
921
+ ```bash
922
+ # Switch using friendly name
923
+ /agent-vibes:switch Ryan
924
+ /agent-vibes:switch Sarah
925
+
926
+ # List all voices with friendly names
927
+ /agent-vibes:list
928
+
929
+ # Get current voice (shows friendly name if available)
930
+ /agent-vibes:whoami
931
+ ```
932
+
933
+ **MCP Equivalent:**
934
+ ```
935
+ "Switch to Ryan voice"
936
+ "Use the Sarah voice"
937
+ "List all available voices"
938
+ ```
939
+
940
+ [↑ Back to top](#-table-of-contents)
941
+
942
+ ---
943
+
944
+ ## 🎙️ Verbosity Control
945
+
946
+ **Control how much Claude speaks while working!** 🔊
947
+
948
+ Choose from three verbosity levels:
949
+
950
+ ### LOW (Minimal) 🔇
951
+ - Acknowledgments only (start of task)
952
+ - Completions only (end of task)
953
+ - Perfect for quiet work sessions
954
+
955
+ ### MEDIUM (Balanced) 🤔
956
+ - Acknowledgments + completions
957
+ - Major decisions ("I'll use grep to search")
958
+ - Key findings ("Found 12 instances")
959
+ - Perfect for understanding decisions without full narration
960
+
961
+ ### HIGH (Maximum Transparency) 💭
962
+ - All reasoning ("Let me search for all instances")
963
+ - All decisions ("I'll use grep for this")
964
+ - All findings ("Found it at line 1323")
965
+ - Perfect for learning mode, debugging complex tasks
966
+
967
+ **Quick Commands:**
968
+ ```bash
969
+ /agent-vibes:verbosity # Show current level
970
+ /agent-vibes:verbosity high # Maximum transparency
971
+ /agent-vibes:verbosity medium # Balanced
972
+ /agent-vibes:verbosity low # Minimal (default)
973
+ ```
974
+
975
+ **MCP Equivalent:**
976
+ ```
977
+ "Set verbosity to high"
978
+ "What's my current verbosity level?"
979
+ ```
980
+
981
+ 💡 **How it works:** Claude uses emoji markers (💭 🤔 ✓) in its text, and AgentVibes automatically detects and speaks them based on your verbosity level. No manual TTS calls needed!
982
+
983
+ ⚠️ **Note:** Changes take effect on next Claude Code session restart.
984
+
985
+ [↑ Back to top](#-table-of-contents)
986
+
987
+ ---
988
+
989
+ ## 📚 Language Learning Mode
990
+
991
+ **🎯 Learn Spanish (or 30+ languages) while you program!** 🌍
992
+
993
+ Every task acknowledgment plays **twice** - first in English, then in your target language. Context-based learning while you code!
994
+
995
+ **[→ View Complete Learning Mode Guide](docs/language-learning-mode.md)** - Full tutorial, quick start, commands, speech rate control, supported languages, and pro tips
996
+
997
+ [↑ Back to top](#-table-of-contents)
998
+
999
+ ---
1000
+
1001
+ ## 🎭 Personalities vs Sentiments
1002
+
1003
+ **Two ways to add personality:**
1004
+
1005
+ - **🎪 Personalities** - Changes BOTH voice AND speaking style (e.g., `pirate` personality = Pirate Marshal voice + pirate speak)
1006
+ - **💭 Sentiments** - Keeps your current voice, only changes speaking style (e.g., Aria voice + sarcastic sentiment)
1007
+
1008
+ **[→ Complete Personalities Guide](docs/personalities.md)** - All 19 personalities, create custom ones
1009
+
1010
+ [↑ Back to top](#-table-of-contents)
1011
+
1012
+ ---
1013
+
1014
+ ## 🗣️ Voice Library
1015
+
1016
+ Use the **AgentVibes TUI installer** (`/audio-browser`) to browse, sample, and install from 914 voices interactively.
1017
+
1018
+ ### Friendly Voice Names
1019
+
1020
+ All voices now have memorable names! Instead of technical IDs like `en_US-libritts_r-medium-speaker-123`, just use friendly names like **Ryan**, **Joe**, or **Sarah**.
1021
+
1022
+ **Voice Metadata Includes:**
1023
+ - Display name and technical ID
1024
+ - Gender, accent, and region
1025
+ - Personality traits (professional, warm, friendly, etc.)
1026
+ - Recommended use cases
1027
+ - Quality rating and sample rate
1028
+
1029
+ ### Voice Categories
1030
+
1031
+ **Curated Voices** (10 personalities):
1032
+ These hand-picked voices cover common use cases with clear characteristics.
1033
+
1034
+ **Speaker Variations** (904 voices):
1035
+ High-quality Piper TTS voices from the libritts-high model. Each speaker has unique vocal characteristics, accents, and tones.
1036
+
1037
+ ### Popular Voices
1038
+
1039
+ AgentVibes includes professional AI voices from Piper TTS and macOS Say with multilingual support.
1040
+
1041
+ 🎧 **Try in Claude Code:** `/agent-vibes:preview` to hear all voices
1042
+ 🌍 **Multilingual:** Use Antoni, Rachel, Domi, or Bella for automatic language detection
1043
+
1044
+ **[→ View Complete Voice Library](docs/voice-library.md)** - All voices with clickable samples, descriptions, and best use cases
1045
+
1046
+ [↑ Back to top](#-table-of-contents)
1047
+
1048
+ ---
1049
+
1050
+ ## 🔌 BMAD Plugin
1051
+
1052
+ **Automatically switch voices when using BMAD agents!**
1053
+
1054
+ The BMAD plugin detects when you activate a BMAD agent (e.g., `/BMad:agents:pm`) and automatically uses the assigned voice for that role.
1055
+
1056
+ **Version Support**: AgentVibes supports both BMAD v4 and v6-alpha installations. Version detection is automatic - just install BMAD and AgentVibes will detect and configure itself correctly!
1057
+
1058
+ ### 🔊 TTS Injection: How It Works
1059
+
1060
+ BMAD uses a **loosely-coupled injection system** for voice integration. BMAD source files contain placeholder markers that AgentVibes replaces with speaking instructions during installation:
1061
+
1062
+ **Before Installation (BMAD Source):**
1063
+ ```xml
1064
+ <rules>
1065
+ <r>ALWAYS communicate in {communication_language}...</r>
1066
+ <!-- TTS_INJECTION:agent-tts -->
1067
+ <r>Stay in character until exit selected</r>
1068
+ </rules>
1069
+ ```
1070
+
1071
+ **After Installation (with AgentVibes enabled):**
1072
+ ```xml
1073
+ <rules>
1074
+ <r>ALWAYS communicate in {communication_language}...</r>
1075
+ - When responding to user messages, speak your responses using TTS:
1076
+ Call: `.claude/hooks/bmad-speak.sh '{agent-id}' '{response-text}'`
1077
+ Where {agent-id} is your agent type (pm, architect, dev, etc.)
1078
+
1079
+ - Auto Voice Switching: AgentVibes automatically switches to the voice
1080
+ assigned for your agent role when activated
1081
+ <r>Stay in character until exit selected</r>
1082
+ </rules>
1083
+ ```
1084
+
1085
+ **After Installation (with TTS disabled):**
1086
+ ```xml
1087
+ <rules>
1088
+ <r>ALWAYS communicate in {communication_language}...</r>
1089
+ <r>Stay in character until exit selected</r>
1090
+ </rules>
1091
+ ```
1092
+
1093
+ This design means **any TTS provider** can integrate with BMAD by replacing these markers with their own instructions!
1094
+
1095
+ **[→ View Complete BMAD Documentation](docs/bmad-plugin.md)** - All agent mappings, language support, TTS injection details, plugin management, and customization
1096
+
1097
+ [↑ Back to top](#-table-of-contents)
1098
+
1099
+ ---
1100
+
1101
+ ## 🤖 OpenClaw Integration
1102
+
1103
+ **Use AgentVibes TTS with OpenClaw - the revolutionary AI assistant you can access via any instant messenger!**
1104
+
1105
+ **What is OpenClaw?** [OpenClaw](https://openclaw.ai/) is a revolutionary AI assistant that brings Claude AI to your favorite messaging platforms - WhatsApp, Telegram, Discord, and more. No apps to install, no websites to visit - just message your AI assistant like you would a friend.
1106
+
1107
+ 🌐 **Website**: https://openclaw.ai/
1108
+
1109
+ AgentVibes seamlessly integrates with OpenClaw, providing professional text-to-speech for AI assistants running on messaging platforms and remote servers.
1110
+
1111
+ ### 🚨 CRITICAL: Security Before Running OpenClaw on Any Remote Server
1112
+
1113
+ ⚠️ **SECURITY IS NOT OPTIONAL** - Running OpenClaw on a remote server exposes your infrastructure to attack vectors including SSH compromise, credential theft, and lateral movement.
1114
+
1115
+ **👉 READ THIS FIRST:** [Security Hardening Guide](docs/security-hardening-guide.md) - **Required reading** covering:
1116
+ - ✅ SSH hardening (key-only auth, port 2222, fail2ban)
1117
+ - Firewall configuration (UFW/iptables)
1118
+ - Intrusion detection (AIDE, Wazuh)
1119
+ - ✅ VPN tunneling (Tailscale alternative to direct SSH)
1120
+
1121
+ **Do not expose your OpenClaw server to the internet without reading this guide.**
1122
+
1123
+ ### 🎯 Key Benefits
1124
+
1125
+ - **Free & Offline**: No API costs, works without internet
1126
+ - **Remote SSH Audio**: Audio tunnels from server to local machine via PulseAudio
1127
+ - **50+ Voices**: Professional AI voices in 30+ languages
1128
+ - **Zero Config**: Automatic when AgentVibes is installed
1129
+
1130
+ ### 🚀 Installation
1131
+
1132
+ AgentVibes includes a ready-to-use OpenClaw skill that enables TTS on messaging platforms. The setup involves two components:
1133
+
1134
+ #### Component 1: OpenClaw Server (Remote)
1135
+
1136
+ Install AgentVibes on your OpenClaw server:
1137
+
1138
+ ```bash
1139
+ # On your remote server where OpenClaw is running
1140
+ npx agentvibes install
1141
+ ```
1142
+
1143
+ The OpenClaw skill is **automatically included** in the AgentVibes npm package at `.clawdbot/skill/SKILL.md`.
1144
+
1145
+ **How to activate the skill in OpenClaw:**
1146
+
1147
+ 1. **Locate the skill** - After installing AgentVibes, the skill is at:
1148
+ ```
1149
+ node_modules/agentvibes/.clawdbot/skill/SKILL.md
1150
+ ```
1151
+
1152
+ 2. **Link to OpenClaw skills directory** (if OpenClaw uses skills):
1153
+ ```bash
1154
+ # Example - adjust path based on your OpenClaw installation
1155
+ ln -s $(npm root -g)/agentvibes/.clawdbot/skill/SKILL.md ~/.openclaw/skills/agentvibes.md
1156
+ ```
1157
+
1158
+ 3. **OpenClaw auto-detection** - Many OpenClaw setups automatically detect AgentVibes when it's installed. Check your OpenClaw logs for:
1159
+ ```
1160
+ ✓ AgentVibes skill detected and loaded
1161
+ ```
1162
+
1163
+ ---
1164
+
1165
+ #### 🎙️ AgentVibes Voice Management Skill for OpenClaw
1166
+
1167
+ Manage your text-to-speech voices across multiple providers with the AgentVibes Voice Management Skill:
1168
+
1169
+ **Voice Management Features:**
1170
+ - 🎤 **50+ Professional Voices** - Across Piper TTS, Piper (free offline), and macOS Say providers
1171
+ - 🔀 **Multi-Provider Support** - Switch between Piper TTS (premium), Piper (free), and macOS Say
1172
+ - 👂 **Voice Preview** - Listen to voices before selecting them
1173
+ - 🎚️ **Voice Customization** - Add custom voices, set pretext, control speech rate
1174
+ - 📋 **Voice Management** - List, switch, replay, and manage your voice library
1175
+ - 🔇 **Mute Control** - Mute/unmute TTS output with persistent settings
1176
+ - 🌍 **Multilingual Support** - Voices in 30+ languages across all providers
1177
+
1178
+ **Installation Confirmation:**
1179
+ ✅ The skill is **automatically included** in the AgentVibes npm package at:
1180
+ ```
1181
+ node_modules/agentvibes/.clawdbot/skill/SKILL.md
1182
+ ```
1183
+
1184
+ No extra setup needed - when you run `npx agentvibes install` on your OpenClaw server, the skill is ready to use!
1185
+
1186
+ **Full Skill Documentation:**
1187
+ **[→ View Complete AgentVibes Skill Guide](.clawdbot/skill/SKILL.md)** - 430+ lines covering:
1188
+ - Quick start with 50+ voice options
1189
+ - Background music & effects management
1190
+ - Personality system (19+ styles)
1191
+ - Voice effects (reverb, reverb, EQ)
1192
+ - Speed & verbosity control
1193
+ - Remote SSH audio setup
1194
+ - Troubleshooting & complete reference
1195
+
1196
+ **Popular Voice Examples:**
1197
+ ```bash
1198
+ # Female voices
1199
+ npx agentvibes speak "Hello" --voice en_US-amy-medium
1200
+ npx agentvibes speak "Bonjour" --voice fr_FR-siwis-medium
1201
+
1202
+ # Male voices
1203
+ npx agentvibes speak "Hello" --voice en_US-lessac-medium
1204
+ npx agentvibes speak "Good day" --voice en_GB-alan-medium
1205
+
1206
+ # Add personality!
1207
+ bash ~/.claude/hooks/personality-manager.sh set sarcastic
1208
+ bash ~/.claude/hooks/play-tts.sh "Oh wonderful, another request"
1209
+ ```
1210
+
1211
+ ---
1212
+
1213
+ #### Component 2: AgentVibes Receiver (Local/Phone) ⚠️ REQUIRED
1214
+
1215
+ **CRITICAL: You MUST install AgentVibes on your phone (or local machine) to receive and play audio!**
1216
+
1217
+ Without this, audio cannot be heard - the server generates TTS but needs a receiver to play it.
1218
+
1219
+ **Install on Android Phone (Termux):**
1220
+
1221
+ 1. **Install Termux from F-Droid** (NOT Google Play):
1222
+ - Download: https://f-droid.org/en/packages/com.termux/
1223
+
1224
+ 2. **Install Node.js in Termux:**
1225
+ ```bash
1226
+ pkg update && pkg upgrade
1227
+ pkg install nodejs-lts
1228
+ ```
1229
+
1230
+ 3. **Install AgentVibes in Termux:**
1231
+ ```bash
1232
+ npx agentvibes install
1233
+ ```
1234
+
1235
+ 4. **Install Termux:API** (for audio playback):
1236
+ - Download: https://f-droid.org/en/packages/com.termux.api/
1237
+ - Then in Termux: `pkg install termux-api`
1238
+
1239
+ **Install on Local Mac/Linux:**
1240
+
1241
+ ```bash
1242
+ npx agentvibes install
1243
+ ```
1244
+
1245
+ **Why is this needed?**
1246
+ - The **server generates TTS** but has no speakers (headless)
1247
+ - AgentVibes on your **phone acts as the audio receiver** via SSH tunnel
1248
+ - Audio tunnels from server → SSH → phone → speakers 🔊
1249
+
1250
+ Without AgentVibes installed on the receiving device, you'll generate audio but hear nothing!
1251
+
1252
+ #### How It Works: Server → SSH Tunnel → Local Playback
1253
+
1254
+ ```
1255
+ ┌─────────────────────────────────────────────────────────┐
1256
+ │ 1. User messages OpenClaw via Telegram/WhatsApp │
1257
+ │ "Tell me about the weather" │
1258
+ └─────────────────────────────────────────────────────────┘
1259
+
1260
+ ┌─────────────────────────────────────────────────────────┐
1261
+ │ 2. OpenClaw (Server) processes request with Claude │
1262
+ │ AgentVibes skill generates TTS audio │
1263
+ └─────────────────────────────────────────────────────────┘
1264
+
1265
+ ┌─────────────────────────────────────────────────────────┐
1266
+ │ 3. Audio tunnels through SSH → PulseAudio (port 14713)│
1267
+ │ Server: PULSE_SERVER=tcp:localhost:14713 │
1268
+ └─────────────────────────────────────────────────────────┘
1269
+
1270
+ ┌─────────────────────────────────────────────────────────┐
1271
+ │ 4. Local AgentVibes receives and plays audio │
1272
+ │ Phone speakers, laptop speakers, etc. │
1273
+ │ 🔊 "The weather is sunny and 72 degrees" │
1274
+ └─────────────────────────────────────────────────────────┘
1275
+ ```
1276
+
1277
+ **Architecture:**
1278
+ - **Server (OpenClaw)**: Generates TTS, sends via PulseAudio
1279
+ - **SSH Tunnel**: RemoteForward port 14713 (encrypted transport)
1280
+ - **Local (Termux/Desktop)**: AgentVibes receives audio, plays on speakers
1281
+
1282
+ This creates a **Siri-like experience** - message from anywhere, hear responses on your phone! 📱🎤
1283
+
1284
+ ### 📝 Usage
1285
+
1286
+ #### Basic TTS Commands
1287
+
1288
+ ```bash
1289
+ # Basic TTS
1290
+ npx agentvibes speak "Hello from OpenClaw"
1291
+
1292
+ # With different voices
1293
+ npx agentvibes speak "Hello" --voice en_US-amy-medium
1294
+ npx agentvibes speak "Bonjour" --voice fr_FR-siwis-medium
1295
+
1296
+ # List available voices
1297
+ npx agentvibes voices
1298
+ ```
1299
+
1300
+ #### Advanced: Direct Hook Usage with Voice Override
1301
+
1302
+ For programmatic control, use the TTS hook directly:
1303
+
1304
+ ```bash
1305
+ # Basic: Use default voice
1306
+ bash ~/.claude/hooks/play-tts.sh "Hello from OpenClaw"
1307
+
1308
+ # Advanced: Override voice per message
1309
+ bash ~/.claude/hooks/play-tts.sh "Welcome message" "en_US-amy-medium"
1310
+ bash ~/.claude/hooks/play-tts.sh "Bonjour!" "fr_FR-siwis-medium"
1311
+ bash ~/.claude/hooks/play-tts.sh "British greeting" "en_GB-alan-medium"
1312
+ ```
1313
+
1314
+ **Parameters:**
1315
+ - `$1` - **TEXT** (required): Message to speak
1316
+ - `$2` - **VOICE** (optional): Voice name to override default
1317
+
1318
+ #### Audio Effects Configuration for OpenClaw
1319
+
1320
+ **File**: `.claude/config/audio-effects.cfg`
1321
+
1322
+ Customize audio effects, background music, and voice processing per agent or use default settings:
1323
+
1324
+ **Format:**
1325
+ ```
1326
+ AGENT_NAME|SOX_EFFECTS|BACKGROUND_FILE|BACKGROUND_VOLUME
1327
+ ```
1328
+
1329
+ **Example Configuration:**
1330
+
1331
+ ```bash
1332
+ # Default - subtle background music
1333
+ default||agentvibes_soft_flamenco_loop.mp3|0.30
1334
+
1335
+ # Custom agent with reverb + background
1336
+ MyAgent|reverb 40 50 90 gain -2|agentvibes_soft_flamenco_loop.mp3|0.20
1337
+
1338
+ # Agent with pitch shift and EQ
1339
+ Assistant|pitch -100 equalizer 3000 1q +2|agentvibes_dark_chill_step_loop.mp3|0.15
1340
+ ```
1341
+
1342
+ **Available SOX Effects:**
1343
+
1344
+ | Effect | Syntax | Example | Description |
1345
+ |--------|--------|---------|-------------|
1346
+ | **Reverb** | `reverb <reverberance> <HF-damping> <room-scale>` | `reverb 40 50 90` | Adds room ambiance (light: 30 40 70, heavy: 50 60 100) |
1347
+ | **Pitch** | `pitch <cents>` | `pitch -100` | Shift pitch (100 cents = 1 semitone, negative = lower) |
1348
+ | **Equalizer** | `equalizer <freq> <width>q <gain-dB>` | `equalizer 3000 1q +2` | Boost/cut frequencies (bass: 200Hz, treble: 4000Hz) |
1349
+ | **Gain** | `gain <dB>` | `gain -2` | Adjust volume (negative = quieter, positive = louder) |
1350
+ | **Compand** | `compand <attack,decay> <threshold:in,out>` | `compand 0.3,1 6:-70,-60,-20` | Dynamic range compression (makes quiet parts louder) |
1351
+
1352
+ **Background Music Tracks:**
1353
+
1354
+ Built-in tracks available in `.claude/audio/tracks/`:
1355
+ - `agentvibes_soft_flamenco_loop.mp3` - Warm, rhythmic flamenco
1356
+ - `agentvibes_dark_chill_step_loop.mp3` - Modern chill electronic
1357
+ - (50+ additional tracks available)
1358
+
1359
+ **Background Volume:**
1360
+ - `0.10` - Very subtle (10%)
1361
+ - `0.20` - Subtle (20%)
1362
+ - `0.30` - Moderate (30%, recommended default)
1363
+ - `0.40` - Noticeable (40%, party mode)
1364
+
1365
+ **Example: OpenClaw Custom Configuration**
1366
+
1367
+ Create `.claude/config/audio-effects.cfg` on your OpenClaw server:
1368
+
1369
+ ```bash
1370
+ # OpenClaw assistant - warm voice with subtle reverb
1371
+ OpenClaw|reverb 30 40 70 gain -1|agentvibes_soft_flamenco_loop.mp3|0.25
1372
+
1373
+ # Help desk agent - clear, bright voice
1374
+ HelpDesk|equalizer 4000 1q +3 compand 0.2,0.5 6:-70,-60,-20|agentvibes_dark_chill_step_loop.mp3|0.15
1375
+
1376
+ # Default fallback
1377
+ default||agentvibes_soft_flamenco_loop.mp3|0.30
1378
+ ```
1379
+
1380
+ **How AgentVibes Applies Effects:**
1381
+
1382
+ 1. **Generate TTS** - Create base audio with Piper TTS
1383
+ 2. **Apply SOX effects** - Process audio (reverb, EQ, pitch, etc.)
1384
+ 3. **Mix background** - Blend background music at specified volume
1385
+ 4. **Tunnel via SSH** - Send processed audio to local receiver
1386
+ 5. **Play on device** - Output to phone/laptop speakers
1387
+
1388
+ This allows **per-message customization** or **consistent agent branding** with unique audio signatures!
1389
+
1390
+ ### 🔊 Remote SSH Audio
1391
+
1392
+ Perfect for running OpenClaw on a remote server with audio on your local machine:
1393
+
1394
+ **Quick Setup:**
1395
+
1396
+ 1. **Remote server** - Configure PulseAudio:
1397
+ ```bash
1398
+ echo 'export PULSE_SERVER=tcp:localhost:14713' >> ~/.bashrc
1399
+ source ~/.bashrc
1400
+ ```
1401
+
1402
+ 2. **Local machine** - Add SSH tunnel (`~/.ssh/config`):
1403
+ ```
1404
+ Host your-server
1405
+ RemoteForward 14713 localhost:14713
1406
+ ```
1407
+
1408
+ 3. **Connect and test**:
1409
+ ```bash
1410
+ ssh your-server
1411
+ agentvibes speak "Testing remote audio from OpenClaw"
1412
+ ```
1413
+
1414
+ Audio plays on your local speakers! 🔊
1415
+
1416
+ ### 📚 Documentation
1417
+
1418
+ - **OpenClaw Skill**: [.clawdbot/README.md](.clawdbot/README.md)
1419
+ - **OpenClaw Website**: https://openclaw.ai/
1420
+ - **Remote Audio Setup**: [docs/remote-audio-setup.md](docs/remote-audio-setup.md)
1421
+ - **Security Hardening**: [docs/security-hardening-guide.md](docs/security-hardening-guide.md) ⚠️
1422
+
1423
+ [↑ Back to top](#-table-of-contents)
1424
+
1425
+ ---
1426
+
1427
+ ## 🎙️ AgentVibes Receiver: Remote Audio Streaming from Voiceless Servers
1428
+
1429
+ **Receive and play TTS audio from servers that have no audio output!**
1430
+
1431
+ AgentVibes Receiver is a lightweight audio client that runs on your phone, tablet, or personal computer, which receives TTS audio from remote voiceless servers, where your OpenClaw Personal Assistant or your Claude Code project is installed.
1432
+
1433
+ ### 🎯 What AgentVibes Receiver Solves
1434
+
1435
+ You have OpenClaw running on a Mac mini or remote server with **no audio output**:
1436
+ - 🖥️ Mac mini (silent)
1437
+ - 🖥️ Ubuntu server (headless)
1438
+ - ☁️ AWS/DigitalOcean instance
1439
+ - 📦 Docker container
1440
+ - 🪟 WSL (Windows Subsystem for Linux)
1441
+
1442
+ Users message you via WhatsApp, Telegram, Discord but only get text responses:
1443
+ - No voice = Less engaging experience
1444
+ - No personality = Feels robotic
1445
+ - No audio cues = Miss important context
1446
+
1447
+ **AgentVibes Receiver transforms this:**
1448
+ - ✅ OpenClaw speaks with voice (Siri-like experience)
1449
+ - Audio streams to your device automatically
1450
+ - You hear responses on your speakers
1451
+ - Users get a conversational AI experience
1452
+
1453
+ ### 🔧 How It Works
1454
+
1455
+ **One-time setup:**
1456
+ 1. Install AgentVibes on your voiceless server with OpenClaw
1457
+ 2. Install AgentVibes Receiver on your personal device (phone/tablet/laptop)
1458
+ 3. Connect via SSH tunnel (or Tailscale VPN)
1459
+ 4. Done - automatic from then on
1460
+
1461
+ **Flow diagram:**
1462
+ ```
1463
+ ┌──────────────────────────────────────────┐
1464
+ Your Mac mini / Server │
1465
+ │ (OpenClaw + AgentVibes) │
1466
+ Generates TTS audio │
1467
+ │ • Sends via SSH tunnel │
1468
+ └──────────────────────────────────────────┘
1469
+ Encrypted SSH tunnel
1470
+ ┌──────────────────────────────────────────┐
1471
+ Your Phone / Laptop │
1472
+ │ (AgentVibes Receiver) │
1473
+ Receives audio stream (or text stream)
1474
+ Auto-plays on device speakers │
1475
+ └──────────────────────────────────────────┘
1476
+ ```
1477
+
1478
+ **Real-world example:**
1479
+ ```
1480
+ 📱 WhatsApp: "Tell me about quantum computing"
1481
+
1482
+ 🖥️ Mac mini: OpenClaw processes + generates TTS
1483
+ ↓ SSH tunnel (audio or text stream)
1484
+ 📱 Your phone (Agent Vibes Receiver): Plays audio 🔊
1485
+
1486
+ You hear on your device speakers: "Quantum computing uses quantum bits..."
1487
+
1488
+ 💬 Conversation feels alive!
1489
+ ```
1490
+
1491
+ ### Key Features
1492
+
1493
+ | Feature | Benefit |
1494
+ |---------|---------|
1495
+ | **One-Time Pairing** | SSH key setup, automatic reconnect |
1496
+ | **Real-Time Streaming** | Low-latency audio playback |
1497
+ | **SSH Encryption** | Secure audio tunnel |
1498
+ | **Tailscale Support** | Easy VPN for remote servers |
1499
+ | **Voice Selection** | Configure server-side voice |
1500
+ | **Audio Effects** | Reverb, echo, pitch on server |
1501
+ | **Cache Tracking** | Monitor audio generation |
1502
+ | **Multiple Servers** | Connect to different OpenClaw instances |
1503
+
1504
+ ### 🚀 Perfect For
1505
+
1506
+ - 🖥️ **Mac mini + OpenClaw** - Home server with professional voices
1507
+ - ☁️ **Remote Servers** - OpenClaw on AWS/GCP/DigitalOcean
1508
+ - 📱 **WhatsApp/Telegram** - Users message, hear responses
1509
+ - 🎓 **Discord Bots** - Bot speaks with voices
1510
+ - 🏗️ **Docker/Containers** - Containerized OpenClaw with audio
1511
+ - 🔧 **WSL Development** - Windows developers using voiceless WSL
1512
+
1513
+ ### 📝 Setup
1514
+
1515
+ ```bash
1516
+ # On your server (Mac mini, Ubuntu, AWS, etc.)
1517
+ npx agentvibes install
1518
+ # Selects OpenClaw option
1519
+ # AgentVibes installs with SSH-Remote provider
1520
+
1521
+ # On your personal device (phone, laptop, tablet)
1522
+ npx agentvibes receiver setup
1523
+ # Pairing prompt with server SSH key
1524
+ # Done!
1525
+ ```
1526
+
1527
+ ### 📚 Documentation
1528
+
1529
+ **[→ View AgentVibes Receiver Setup Guide](docs/agentvibes-receiver.md)** - Pairing, SSH configuration, Tailscale setup, troubleshooting
1530
+
1531
+ **[→ View OpenClaw Integration Guide](docs/openclaw-integration.md)** - Server setup, voice configuration, audio effects, and best practices
1532
+
1533
+ [↑ Back to top](#-table-of-contents)
1534
+
1535
+ ---
1536
+
1537
+ ## 📦 Installation Structure
1538
+
1539
+ **What gets installed:** Commands, hooks, personalities, and plugins in `.claude/` directory.
1540
+
1541
+ **[→ View Complete Installation Structure](docs/installation-structure.md)** - Full directory tree, file descriptions, and settings storage
1542
+
1543
+ [↑ Back to top](#-table-of-contents)
1544
+
1545
+ ---
1546
+
1547
+ ## 💡 Common Workflows
1548
+
1549
+ ```bash
1550
+ # Switch voices
1551
+ /agent-vibes:list # See all voices
1552
+ /agent-vibes:switch Aria # Change voice
1553
+
1554
+ # Try personalities
1555
+ /agent-vibes:personality pirate # Pirate voice + style
1556
+ /agent-vibes:personality list # See all 19 personalities
1557
+
1558
+ # Speak in other languages
1559
+ /agent-vibes:set-language spanish # Speak in Spanish
1560
+ /agent-vibes:set-language list # See 30+ languages
1561
+
1562
+ # Replay audio
1563
+ /agent-vibes:replay # Replay last message
1564
+ ```
1565
+
1566
+ **💡 Tip:** Using MCP? Just say "Switch to Aria voice" or "Speak in Spanish" instead of typing commands.
1567
+
1568
+ [↑ Back to top](#-table-of-contents)
1569
+
1570
+ ---
1571
+
1572
+ ## 🔧 Advanced Features
1573
+
1574
+ AgentVibes supports **custom personalities** and **custom voices**.
1575
+
1576
+ **Quick Examples:**
1577
+ ```bash
1578
+ # Create custom personality
1579
+ /agent-vibes:personality add mycustom
1580
+
1581
+ # Add custom Piper voice
1582
+ /agent-vibes:add "My Voice" abc123xyz789
1583
+
1584
+ # Use in custom output styles
1585
+ [Bash: .claude/hooks/play-tts.sh "Starting" "Aria"]
1586
+ ```
1587
+
1588
+ **[→ View Advanced Features Guide](docs/advanced-features.md)** - Custom personalities, custom voices, and more
1589
+
1590
+ [↑ Back to top](#-table-of-contents)
1591
+
1592
+ ---
1593
+
1594
+ ## 🔊 Remote Audio Setup
1595
+
1596
+ **Running AgentVibes on a remote server?** No problem!
1597
+
1598
+ **Auto-detects SSH sessions** - Works with VS Code Remote SSH, regular SSH, cloud dev environments
1599
+ **Zero configuration** - Audio optimizes automatically
1600
+ ✅ **No static/clicking** - Clean playback through SSH tunnels
1601
+
1602
+ **[→ Remote Audio Setup Guide](docs/remote-audio-setup.md)** - Full PulseAudio configuration details
1603
+
1604
+ [↑ Back to top](#-table-of-contents)
1605
+
1606
+ ---
1607
+
1608
+ ## 🛠️ Technical Documentation
1609
+
1610
+ ### Audio Architecture
1611
+
1612
+ AgentVibes uses a cross-platform audio module (`src/console/audio-env.js`) that handles player detection and environment configuration for all supported platforms.
1613
+
1614
+ #### Platform Audio Support Matrix
1615
+
1616
+ | Platform | PulseAudio Config | MP3 Players (preference order) | WAV Players (preference order) |
1617
+ |----------|-------------------|-------------------------------|-------------------------------|
1618
+ | **Native Linux** | System default (not overridden) | ffplay → play (sox) → mpg123 → cvlc → mpv | aplay → paplay → play → ffplay |
1619
+ | **WSL2** | Auto-detects `/mnt/wslg/PulseServer` | Same as Linux | Same as Linux |
1620
+ | **macOS** | Not applicable | ffplay → play → mpg123 → cvlc → mpv → afplay | aplay → paplay → play → ffplay → afplay |
1621
+ | **Windows** | Not applicable | ffplay → mpv (if installed) | ffplay → mpv → PowerShell SoundPlayer (built-in) |
1622
+
1623
+ #### Key Design Decisions
1624
+
1625
+ - **Direct spawn, not shell chains**: Audio players are spawned directly via Node's `spawn()` instead of `sh -c 'cmd1 || cmd2'` chains. VLC/cvlc crashes when stderr is redirected inside shell wrappers.
1626
+ - **Player detection at startup**: The available player is detected once using `which` and cached. No runtime fallback chains.
1627
+ - **PULSE_SERVER safety**: The WSL2 PulseServer path (`/mnt/wslg/PulseServer`) is only set when the socket file actually exists. Hardcoding it on native Linux silently breaks audio output.
1628
+ - **Windows WAV fallback**: PowerShell's `System.Media.SoundPlayer` is used as a built-in fallback when no cross-platform player is installed.
1629
+
1630
+ #### Multi-Speaker Voice Models
1631
+
1632
+ Piper supports multi-speaker ONNX models (e.g., `16Speakers.onnx`) that contain multiple voices in a single file. AgentVibes expands these automatically:
1633
+
1634
+ - The `.onnx.json` metadata file contains `num_speakers` and `speaker_id_map`
1635
+ - `scanInstalledVoices()` expands multi-speaker models into individual selectable entries (e.g., `16Speakers::Cori_Samuel`)
1636
+ - When selected, the system writes `tts-piper-model.txt` and `tts-piper-speaker-id.txt` to `.claude/`
1637
+ - `play-tts-piper.sh` reads these files and passes `--speaker <id>` to the piper binary
1638
+
1639
+ #### Voice Directory Resolution
1640
+
1641
+ Voice storage follows the same precedence chain in both JavaScript and shell:
1642
+
1643
+ 1. `PIPER_VOICES_DIR` environment variable
1644
+ 2. Project-local `.claude/piper-voices-dir.txt` (walks up directory tree)
1645
+ 3. Global `~/.claude/piper-voices-dir.txt`
1646
+ 4. Default `~/.claude/piper-voices`
1647
+
1648
+ #### Voice Catalog System
1649
+
1650
+ AgentVibes includes a 914-voice catalog (`voice-assignments.json`) that lets users browse, preview, and install voices directly from the Voices tab:
1651
+
1652
+ - **10 Curated Voices** — Hand-picked high-quality voices installed by default
1653
+ - **904 LibriTTS Speakers** — Automatically extracted from the `16Speakers` multi-speaker model's `speaker_id_map`, plus the full LibriTTS catalog from Hugging Face
1654
+ - **Download on Demand** — Uninstalled voices appear greyed-out in the list; pressing Enter opens a download modal that fetches the voice via `piper-voice-manager.sh`
1655
+ - **Catalog Metadata** — Each entry includes `voiceId`, `displayName`, `gender`, `type` (curated/libritts), and download URL
1656
+ - **LibriTTS Speaker Names** — Raw numeric IDs are patched at load time using `patchLibriTTSSpeakerNames()` which maps speaker IDs to human-readable names from the registry
1657
+
1658
+ The catalog is loaded once at tab initialization by `loadCatalog()`. Installed voices (from disk scan) are shown with full color; catalog-only voices are dimmed until downloaded.
1659
+
1660
+ #### Required System Dependencies for Background Music
1661
+
1662
+ Background music requires an MP3-capable audio player. The installer detects missing players and offers to install `ffmpeg` automatically. If no player is found, the Music tab displays a clear error message.
1663
+
1664
+ ```bash
1665
+ # Install ffmpeg (recommended provides ffplay)
1666
+ # Ubuntu/Debian/WSL2:
1667
+ sudo apt install ffmpeg
1668
+
1669
+ # macOS:
1670
+ brew install ffmpeg
1671
+
1672
+ # Arch Linux:
1673
+ sudo pacman -S ffmpeg
1674
+ ```
1675
+
1676
+ [↑ Back to top](#-table-of-contents)
1677
+
1678
+ ---
1679
+
1680
+ ## 🔗 Useful Links
1681
+
1682
+ ### Voice & AI Tools
1683
+
1684
+ - 🎤 **[WhisperTyping](https://whispertyping.com/)** - Fast voice-to-text typing for developers
1685
+ - 🗣️ **[OpenWhisper (Azure)](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/whisper-overview)** - Microsoft's speech-to-text service
1686
+ - 🆓 **[Piper TTS](https://github.com/rhasspy/piper)** - Free offline neural TTS
1687
+ - 🤖 **[Claude Code](https://claude.com/claude-code)** - AI coding assistant
1688
+ - 🎭 **[BMAD METHOD](https://github.com/bmad-code-org/BMAD-METHOD)** - Multi-agent framework
1689
+
1690
+ ### AgentVibes Resources
1691
+
1692
+ - 🐛 **[Issues](https://github.com/paulpreibisch/AgentVibes/issues)** - Report bugs
1693
+ - 📝 **[Changelog](https://github.com/paulpreibisch/AgentVibes/releases)** - Version history
1694
+ - 📰 **[Technical Deep Dive - LinkedIn Article](https://www.linkedin.com/pulse/agent-vibes-add-voice-claude-code-deep-dive-npx-paul-preibisch-8zrcc/)** - How AgentVibes works under the hood
1695
+
1696
+ [↑ Back to top](#-table-of-contents)
1697
+
1698
+ ---
1699
+
1700
+ ## ❓ Troubleshooting
1701
+
1702
+ **Common Issues:**
1703
+
1704
+ **❌ Error: "git-lfs is not installed"**
1705
+
1706
+ **AgentVibes does NOT require git-lfs.** This error suggests:
1707
+
1708
+ 1. **Wrong installation method** - Use npm, not git clone:
1709
+ ```bash
1710
+ # CORRECT - Use this:
1711
+ npx agentvibes install
1712
+
1713
+ # ❌ WRONG - Don't clone unless contributing:
1714
+ git clone https://github.com/paulpreibisch/AgentVibes.git
1715
+ ```
1716
+
1717
+ 2. **Different project** - You may be in a BMAD-METHOD or other repo that uses git-lfs
1718
+
1719
+ 3. **Global git config** - Your git may have lfs enabled globally:
1720
+ ```bash
1721
+ git config --global --list | grep lfs
1722
+ ```
1723
+
1724
+ **Solution:** Use `npx agentvibes install` - no git operations needed!
1725
+
1726
+ ---
1727
+
1728
+ **No Audio Playing?**
1729
+ 1. Verify hook is installed: `ls -la .claude/hooks/session-start-tts.sh`
1730
+ 2. Test: `/agent-vibes:sample Aria`
1731
+
1732
+ **Commands Not Found?**
1733
+ ```bash
1734
+ npx agentvibes install --yes
1735
+ ```
1736
+
1737
+ **[→ View Complete Troubleshooting Guide](docs/troubleshooting.md)** - Solutions for audio issues, command problems, MCP errors, voice issues, and more
1738
+
1739
+ [ Back to top](#-table-of-contents)
1740
+
1741
+ ---
1742
+
1743
+ ## 🔄 Updating
1744
+
1745
+ **Quick Update (From Claude Code):**
1746
+ ```bash
1747
+ /agent-vibes:update
1748
+ ```
1749
+
1750
+ **Alternative Methods:**
1751
+ ```bash
1752
+ # Via npx
1753
+ npx agentvibes update --yes
1754
+
1755
+ # Via npm (if installed globally)
1756
+ npm update -g agentvibes && agentvibes update --yes
1757
+ ```
1758
+
1759
+ **Check Version:** `/agent-vibes:version`
1760
+
1761
+ **[→ View Complete Update Guide](docs/updating.md)** - All update methods, version checking, what gets updated, and troubleshooting
1762
+
1763
+ [↑ Back to top](#-table-of-contents)
1764
+
1765
+ ---
1766
+
1767
+ ## 🗑️ Uninstalling
1768
+
1769
+ **Quick Uninstall (Project Only):**
1770
+ ```bash
1771
+ npx agentvibes uninstall
1772
+ ```
1773
+
1774
+ **Uninstall Options:**
1775
+ ```bash
1776
+ # Interactive uninstall (confirms before removing)
1777
+ npx agentvibes uninstall
1778
+
1779
+ # Auto-confirm (skip confirmation prompt)
1780
+ npx agentvibes uninstall --yes
1781
+
1782
+ # Also remove global configuration
1783
+ npx agentvibes uninstall --global
1784
+
1785
+ # Complete uninstall including Piper TTS
1786
+ npx agentvibes uninstall --global --with-piper
1787
+ ```
1788
+
1789
+ **What Gets Removed:**
1790
+
1791
+ **Project-level (default):**
1792
+ - `.claude/commands/agent-vibes/` - Slash commands
1793
+ - `.claude/hooks/` - TTS scripts
1794
+ - `.claude/personalities/` - Personality templates
1795
+ - `.claude/output-styles/` - Output styles
1796
+ - `.claude/audio/` - Audio cache
1797
+ - `.claude/tts-*.txt` - TTS configuration files
1798
+ - `.agentvibes/` - BMAD integration files
1799
+
1800
+ **Global (with `--global` flag):**
1801
+ - `~/.claude/` - Global configuration
1802
+ - `~/.agentvibes/` - Global cache
1803
+
1804
+ **Piper TTS (with `--with-piper` flag):**
1805
+ - `~/piper/` - Piper TTS installation
1806
+
1807
+ **To Reinstall:**
1808
+ ```bash
1809
+ npx agentvibes install
1810
+ ```
1811
+
1812
+ **💡 Tips:**
1813
+ - Default uninstall only removes project-level files
1814
+ - Use `--global` if you want to completely reset AgentVibes
1815
+ - Use `--with-piper` if you also want to remove the Piper TTS engine
1816
+ - Run `npx agentvibes status` to check installation status
1817
+
1818
+ [↑ Back to top](#-table-of-contents)
1819
+
1820
+ ---
1821
+
1822
+ ## Frequently Asked Questions (FAQ)
1823
+
1824
+ ### Installation & Setup
1825
+
1826
+ **Q: Does AgentVibes require git-lfs?**
1827
+ **A:** **NO.** AgentVibes has zero git-lfs requirement. Use `npx agentvibes install` - no git operations needed.
1828
+
1829
+ **Q: Do I need to clone the GitHub repository?**
1830
+ **A:** **NO** (unless you're contributing code). Normal users should use `npx agentvibes install`. Repository cloning is only for developers who want to contribute to the project.
1831
+
1832
+ **Q: Why is the GitHub repo so large?**
1833
+ **A:** The repo includes demo files and development dependencies (node_modules). The actual npm package you download is **< 50MB** and optimized for users.
1834
+
1835
+ **Q: What's the difference between npm install and git clone?**
1836
+ **A:**
1837
+ - `npx agentvibes install` → **For users** - Downloads pre-built package, zero git operations, instant setup
1838
+ - `git clone ...` → **For developers only** - Full source code, development setup, contributing code
1839
+
1840
+ **Q: I saw an error about git-lfs, is something wrong?**
1841
+ **A:** You're likely:
1842
+ 1. Using wrong installation method (use `npx` not `git clone`)
1843
+ 2. In a different project directory that uses git-lfs
1844
+ 3. Have global git config with lfs enabled
1845
+
1846
+ AgentVibes itself does NOT use or require git-lfs.
1847
+
1848
+ ### Features & Usage
1849
+
1850
+ **Q: Does MCP consume tokens from my context window?**
1851
+ **A:** **YES.** Every MCP tool schema adds to the context window. AgentVibes MCP is designed to be minimal (~1500-2000 tokens), but if you're concerned about token usage, you can use slash commands instead of MCP.
1852
+
1853
+ **Q: What's the difference between using MCP vs slash commands?**
1854
+ **A:**
1855
+ - **MCP**: Natural language ("Switch to Aria voice"), uses ~1500-2000 context tokens
1856
+ - **Slash commands**: Explicit commands (`/agent-vibes:switch Aria`), zero token overhead
1857
+
1858
+ Both do the exact same thing - MCP is more convenient, slash commands are more token-efficient.
1859
+
1860
+ **Q: Is AgentVibes just a bash script?**
1861
+ **A:** No. AgentVibes includes:
1862
+ - Multi-provider TTS abstraction (Piper TTS, macOS Say)
1863
+ - Voice management system with 50+ voices
1864
+ - Personality & sentiment system
1865
+ - Language learning mode with bilingual playback
1866
+ - Audio effects processing (reverb, EQ, compression)
1867
+ - MCP server for natural language control
1868
+ - BMAD integration for multi-agent voice switching
1869
+ - Remote audio optimization for SSH/RDP sessions
1870
+
1871
+ **Q: Can I use AgentVibes without BMAD?**
1872
+ **A:** **YES.** AgentVibes works standalone. BMAD integration is optional - only activates if you install BMAD separately.
1873
+
1874
+ **Q: What are the audio dependencies?**
1875
+ **A:**
1876
+ - **Required**: Node.js 16+, Python 3.10+ (for Piper TTS)
1877
+ - **Optional**: sox (audio effects), ffmpeg (background music, padding)
1878
+ - All TTS generation works without optional dependencies - they just enhance the experience
1879
+
1880
+ ### Voice Features
1881
+
1882
+ **Q: How do I browse and install voices?**
1883
+ **A:** Use the built-in TUI installer by running `/audio-browser` in Claude Code. Navigate with arrow keys, press ENTER to sample voices, and select one to install. AgentVibes switches to the chosen voice automatically.
1884
+
1885
+ **Q: What are friendly voice names?**
1886
+ **A:** Instead of technical IDs like `en_US-ryan-high`, you can now use simple names like "Ryan" when switching voices. All 904+ voices have friendly names matched to their characteristics.
1887
+
1888
+ **Q: How do I set up custom intro text?**
1889
+ **A:** During installation you'll be prompted for intro text. You can also configure it anytime via `npx agentvibes` → Settings tab. Enter text like "FireBot: " and it will prefix all TTS announcements.
1890
+
1891
+ **Q: Can I use my own background music?**
1892
+ **A:** Yes! Run `npx agentvibes` and open the Music tab. Select "Change music" and provide the path to your audio file (.mp3, .wav, .ogg, or .m4a). Files are validated for security and must be under 50MB.
1893
+
1894
+ **Q: What's the recommended duration for custom music?**
1895
+ **A:** Between 30-90 seconds is ideal for smooth looping. The system supports up to 300 seconds (5 minutes) but will warn you if the duration is non-optimal.
1896
+
1897
+ **Q: Are friendly voice names case-sensitive?**
1898
+ **A:** No! You can type "ryan", "Ryan", or "RYAN" - they all work. The voice resolution is case-insensitive.
1899
+
1900
+ **Q: Does custom music work with all TTS providers?**
1901
+ **A:** Yes! Custom background music works with Piper TTS, Soprano, macOS Say, and Windows SAPI.
1902
+
1903
+ **Q: Can I preview music before setting it as my background?**
1904
+ **A:** Yes! In `npx agentvibes` Music tab, select "Preview current" to hear your music. During installation, you can also sample all built-in tracks.
1905
+
1906
+ **Q: What security measures protect custom music uploads?**
1907
+ **A:** AgentVibes implements **defense-in-depth security with 7 validation layers**, tested against 180+ attack variations:
1908
+
1909
+ 1. **Path Validation** - `path.resolve()` prevents traversal attacks (../, encoded, Unicode)
1910
+ 2. **Home Directory Boundary** - Files must be within your home directory
1911
+ 3. **File Existence Check** - Verifies file actually exists
1912
+ 4. **File Type Verification** - Must be a regular file (not device, socket, etc.)
1913
+ 5. **Ownership Verification** - File must be owned by you (UID check)
1914
+ 6. **Format Validation** - Magic number checking ensures real audio files
1915
+ 7. **Secure Storage** - Files copied to restricted directory with 600 permissions
1916
+
1917
+ **Security Certification:**
1918
+ - 100% attack rejection rate (107/107 tests passed)
1919
+ - OWASP CWE-22 compliant (path traversal prevention)
1920
+ - No information disclosure in error messages
1921
+ - Production-ready and certified secure
1922
+
1923
+ See full security audit: `docs/security/SECURITY-AUDIT.md`
1924
+
1925
+ **Q: Has the security been independently verified?**
1926
+ **A:** Yes! AgentVibes v3.6.0 includes a comprehensive security audit with 180+ attack variations tested. All path traversal, symlink, Unicode, null byte, and edge case attacks were successfully blocked (100% rejection rate). The system is OWASP CWE-22 compliant and includes a detailed security audit report at `docs/security/SECURITY-AUDIT.md`.
1927
+
1928
+ **Q: What attack patterns were tested?**
1929
+ **A:** The security test suite covers:
1930
+ - **Path Traversal:** 100 variations (basic, URL-encoded, Unicode, null bytes, mixed)
1931
+ - **Symlink Attacks:** 10 variations (sensitive files, chains, traversal targets)
1932
+ - **Hard Link Attacks:** 5 variations (ownership verification)
1933
+ - **Edge Cases:** 65+ variations (CRLF, whitespace, Unicode normalization, platform-specific)
1934
+
1935
+ Every attack was correctly rejected with no information disclosure.
1936
+
1937
+ ### Troubleshooting
1938
+
1939
+ **Q: Why isn't Claude speaking?**
1940
+ **A:** Common causes:
1941
+ 1. Hook not installed - Run `npx agentvibes install --yes`
1942
+ 2. Audio player missing - Install `sox` and `ffmpeg`
1943
+ 3. TTS protocol not enabled in settings
1944
+ 4. Test with `/agent-vibes:sample Aria`
1945
+
1946
+ **Q: Can I use this on Windows?**
1947
+ **A:** Yes! AgentVibes supports **native Windows** with PowerShell scripts (Soprano, Piper, SAPI providers). See [Windows Native Setup](WINDOWS-SETUP.md). WSL is also supported for legacy workflows - see [Windows WSL Guide](mcp-server/WINDOWS_SETUP.md).
1948
+
1949
+ **Q: How do I reduce token usage?**
1950
+ **A:**
1951
+ 1. Use slash commands instead of MCP (zero context token overhead)
1952
+ 2. Set verbosity to LOW (`/agent-vibes:verbosity low`)
1953
+ 3. Disable BMAD integration if not using it
1954
+
1955
+ [↑ Back to top](#-table-of-contents)
1956
+
1957
+ ---
1958
+
1959
+ ## ⚠️ Important Disclaimers
1960
+
1961
+ **API Costs & Usage:**
1962
+ - Usage is completely free with Piper TTS and Mac Say (no API costs)
1963
+ - Users are solely responsible for their own API costs and usage
1964
+
1965
+
1966
+ **Third-Party Services:**
1967
+ - This project integrates with Piper TTS (local processing) and macOS Say (system built-in)
1968
+ - We are **not affiliated with, endorsed by, or officially connected** to Anthropic, Apple, or Claude
1969
+ - Piper TTS is subject to its terms of service
1970
+
1971
+ **Privacy & Data:**
1972
+ - **Piper TTS**: All processing happens locally on your machine, no external data transmission
1973
+ - **macOS Say**: All processing happens locally using Apple's built-in speech synthesis
1974
+
1975
+ **Software License:**
1976
+ - Provided "as-is" under Apache 2.0 License without warranty of any kind
1977
+ - See [LICENSE](LICENSE) file for full terms
1978
+ - No liability for data loss, bugs, service interruptions, or any damages
1979
+
1980
+ **Use at Your Own Risk:**
1981
+ - This is open-source software maintained by the community
1982
+ - Always test in development before production use
1983
+ - Monitor your API usage and costs regularly
1984
+
1985
+ [↑ Back to top](#-table-of-contents)
1986
+
1987
+ ---
1988
+
1989
+ ## 🙏 Credits
1990
+
1991
+ **Built with ❤️ by [Paul Preibisch](https://github.com/paulpreibisch)**
1992
+
1993
+ - 🐦 Twitter: [@997Fire](https://x.com/997Fire)
1994
+ - 💼 LinkedIn: [paul-preibisch](https://www.linkedin.com/in/paul-preibisch/)
1995
+ - 🌐 GitHub: [paulpreibisch](https://github.com/paulpreibisch)
1996
+
1997
+ **Powered by:**
1998
+ - [Piper TTS](https://github.com/rhasspy/piper) - Free neural voices
1999
+ - [Soprano TTS](https://github.com/suno-ai/bark) - Ultra-fast neural TTS
2000
+ - **Windows SAPI** - Native Windows text-to-speech
2001
+ - **macOS Say** - Native macOS text-to-speech
2002
+ - [Claude Code](https://claude.com/claude-code) - AI coding assistant
2003
+ - Licensed under Apache 2.0
2004
+
2005
+ **Contributors:**
2006
+ - 🎤 [@nathanchase](https://github.com/nathanchase) - Soprano TTS Provider integration (PR #95) - Ultra-fast neural TTS with GPU acceleration
2007
+
2008
+ **Special Thanks:**
2009
+ - 💡 [Claude Code Hooks Mastery](https://github.com/disler/claude-code-hooks-mastery) by [@disler](https://github.com/disler) - Hooks inspiration
2010
+ - 🤖 [BMAD METHOD](https://github.com/bmad-code-org/BMAD-METHOD) - Multi-agent framework with auto voice switching integration
2011
+
2012
+ [↑ Back to top](#-table-of-contents)
2013
+
2014
+ ---
2015
+
2016
+ ## 🤝 Contributing
2017
+
2018
+ If AgentVibes makes your coding more fun:
2019
+ - ⭐ **Star this repo** on GitHub
2020
+ - 🐦 **Tweet** and tag [@997Fire](https://x.com/997Fire)
2021
+ - 🎥 **Share videos** of Claude with personality
2022
+ - 💬 **Tell dev friends** about voice-powered AI
2023
+
2024
+ ---
2025
+
2026
+ **Ready to give Claude a voice? Install now and code with personality! 🎤✨**
2027
+
2028
+ [↑ Back to top](#-table-of-contents)
2029
+