agentvibes 4.0.1 โ†’ 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/.agentvibes/bmad/bmad-voices.md +69 -69
  2. package/.agentvibes/config.json +12 -0
  3. package/.claude/activation-instructions +54 -54
  4. package/.claude/audio/tracks/README.md +52 -52
  5. package/.claude/commands/agent-vibes/add.md +21 -21
  6. package/.claude/commands/agent-vibes/agent-vibes.md +101 -101
  7. package/.claude/commands/agent-vibes/agent.md +79 -79
  8. package/.claude/commands/agent-vibes/background-music.md +111 -111
  9. package/.claude/commands/agent-vibes/bmad.md +198 -198
  10. package/.claude/commands/agent-vibes/clean.md +18 -18
  11. package/.claude/commands/agent-vibes/cleanup.md +18 -18
  12. package/.claude/commands/agent-vibes/commands.json +145 -145
  13. package/.claude/commands/agent-vibes/effects.md +97 -97
  14. package/.claude/commands/agent-vibes/get.md +9 -9
  15. package/.claude/commands/agent-vibes/hide.md +91 -91
  16. package/.claude/commands/agent-vibes/language.md +23 -23
  17. package/.claude/commands/agent-vibes/learn.md +67 -67
  18. package/.claude/commands/agent-vibes/list.md +13 -13
  19. package/.claude/commands/agent-vibes/mute.md +37 -37
  20. package/.claude/commands/agent-vibes/preview.md +17 -17
  21. package/.claude/commands/agent-vibes/provider.md +68 -68
  22. package/.claude/commands/agent-vibes/replay-target.md +14 -14
  23. package/.claude/commands/agent-vibes/sample.md +12 -12
  24. package/.claude/commands/agent-vibes/set-favorite-voice.md +84 -84
  25. package/.claude/commands/agent-vibes/set-pretext.md +65 -65
  26. package/.claude/commands/agent-vibes/set-speed.md +41 -41
  27. package/.claude/commands/agent-vibes/show.md +84 -84
  28. package/.claude/commands/agent-vibes/switch.md +87 -87
  29. package/.claude/commands/agent-vibes/target-voice.md +26 -26
  30. package/.claude/commands/agent-vibes/target.md +30 -30
  31. package/.claude/commands/agent-vibes/translate.md +68 -68
  32. package/.claude/commands/agent-vibes/unmute.md +45 -45
  33. package/.claude/commands/agent-vibes/verbosity.md +89 -89
  34. package/.claude/commands/agent-vibes/whoami.md +7 -7
  35. package/.claude/commands/agent-vibes-bmad-voices.md +117 -117
  36. package/.claude/commands/agent-vibes-rdp.md +24 -24
  37. package/.claude/config/agentvibes.json +1 -0
  38. package/.claude/config/audio-effects.cfg +3 -2
  39. package/.claude/config/audio-effects.cfg.sample +52 -52
  40. package/.claude/config/background-music-volume.txt +1 -0
  41. package/.claude/config/intro-text.txt +1 -0
  42. package/.claude/config/piper-speech-rate.txt +4 -0
  43. package/.claude/config/piper-target-speech-rate.txt +1 -0
  44. package/.claude/config/reverb-level.txt +1 -0
  45. package/.claude/config/tts-speech-rate.txt +4 -0
  46. package/.claude/config/tts-target-speech-rate.txt +1 -0
  47. package/.claude/docs/TERMUX_SETUP.md +408 -408
  48. package/.claude/github-star-reminder.txt +1 -1
  49. package/.claude/hooks/README-TTS-QUEUE.md +135 -135
  50. package/.claude/hooks/audio-cache-utils.sh +246 -246
  51. package/.claude/hooks/audio-processor.sh +433 -389
  52. package/.claude/hooks/background-music-manager.sh +404 -404
  53. package/.claude/hooks/bmad-speak-enhanced.sh +165 -165
  54. package/.claude/hooks/bmad-speak.sh +269 -112
  55. package/.claude/hooks/bmad-tts-injector.sh +568 -568
  56. package/.claude/hooks/bmad-voice-manager.sh +928 -928
  57. package/.claude/hooks/clawdbot-receiver-SECURE.sh +129 -129
  58. package/.claude/hooks/clawdbot-receiver.sh +107 -107
  59. package/.claude/hooks/clean-audio-cache.sh +22 -22
  60. package/.claude/hooks/cleanup-cache.sh +106 -106
  61. package/.claude/hooks/configure-rdp-mode.sh +137 -137
  62. package/.claude/hooks/download-extra-voices.sh +244 -244
  63. package/.claude/hooks/effects-manager.sh +268 -268
  64. package/.claude/hooks/github-star-reminder.sh +154 -154
  65. package/.claude/hooks/language-manager.sh +362 -362
  66. package/.claude/hooks/learn-manager.sh +492 -492
  67. package/.claude/hooks/macos-voice-manager.sh +205 -205
  68. package/.claude/hooks/migrate-background-music.sh +125 -125
  69. package/.claude/hooks/migrate-to-agentvibes.sh +161 -161
  70. package/.claude/hooks/optimize-background-music.sh +87 -87
  71. package/.claude/hooks/path-resolver.sh +60 -60
  72. package/.claude/hooks/personality-manager.sh +448 -448
  73. package/.claude/hooks/piper-download-voices.sh +225 -225
  74. package/.claude/hooks/piper-installer.sh +292 -292
  75. package/.claude/hooks/piper-multispeaker-registry.sh +171 -171
  76. package/.claude/hooks/piper-voice-manager.sh +24 -3
  77. package/.claude/hooks/play-tts-agentvibes-receiver-for-voiceless-connections.sh +90 -90
  78. package/.claude/hooks/play-tts-enhanced.sh +105 -70
  79. package/.claude/hooks/play-tts-macos.sh +368 -345
  80. package/.claude/hooks/play-tts-piper.sh +679 -578
  81. package/.claude/hooks/play-tts-soprano.sh +356 -320
  82. package/.claude/hooks/play-tts-ssh-remote.sh +167 -88
  83. package/.claude/hooks/play-tts-termux-ssh.sh +169 -169
  84. package/.claude/hooks/play-tts.sh +301 -298
  85. package/.claude/hooks/prepare-release.sh +54 -54
  86. package/.claude/hooks/provider-commands.sh +617 -617
  87. package/.claude/hooks/provider-manager.sh +399 -399
  88. package/.claude/hooks/replay-target-audio.sh +95 -95
  89. package/.claude/hooks/requirements.txt +6 -6
  90. package/.claude/hooks/sentiment-manager.sh +201 -201
  91. package/.claude/hooks/session-start-tts.sh +81 -71
  92. package/.claude/hooks/soprano-gradio-synth.py +139 -139
  93. package/.claude/hooks/speed-manager.sh +291 -291
  94. package/.claude/hooks/stop-tts.sh +84 -0
  95. package/.claude/hooks/termux-installer.sh +261 -261
  96. package/.claude/hooks/translate-manager.sh +341 -341
  97. package/.claude/hooks/translator.py +237 -237
  98. package/.claude/hooks/tts-queue-worker.sh +145 -114
  99. package/.claude/hooks/tts-queue.sh +165 -136
  100. package/.claude/hooks/verbosity-manager.sh +178 -178
  101. package/.claude/hooks/voice-manager.sh +548 -544
  102. package/.claude/hooks-windows/audio-cache-utils.ps1 +119 -119
  103. package/.claude/hooks-windows/background-music-manager.ps1 +348 -0
  104. package/.claude/hooks-windows/clean-audio-cache.ps1 +53 -0
  105. package/.claude/hooks-windows/download-extra-voices.ps1 +185 -0
  106. package/.claude/hooks-windows/effects-manager.ps1 +294 -0
  107. package/.claude/hooks-windows/language-manager.ps1 +193 -0
  108. package/.claude/hooks-windows/learn-manager.ps1 +241 -0
  109. package/.claude/hooks-windows/personality-manager.ps1 +266 -0
  110. package/.claude/hooks-windows/play-tts-piper.ps1 +209 -0
  111. package/.claude/hooks-windows/play-tts-sapi.ps1 +108 -0
  112. package/.claude/hooks-windows/play-tts-soprano.ps1 +159 -158
  113. package/.claude/hooks-windows/play-tts-windows-piper.ps1 +50 -5
  114. package/.claude/hooks-windows/play-tts-windows-sapi.ps1 +108 -108
  115. package/.claude/hooks-windows/play-tts.ps1 +344 -266
  116. package/.claude/hooks-windows/provider-manager.ps1 +29 -10
  117. package/.claude/hooks-windows/session-start-tts.ps1 +124 -124
  118. package/.claude/hooks-windows/soprano-gradio-synth.py +153 -153
  119. package/.claude/hooks-windows/speed-manager.ps1 +166 -0
  120. package/.claude/hooks-windows/verbosity-manager.ps1 +119 -0
  121. package/.claude/hooks-windows/voice-manager-windows.ps1 +92 -8
  122. package/.claude/output-styles/agent-vibes.md +202 -202
  123. package/.claude/personalities/angry.md +14 -14
  124. package/.claude/personalities/annoying.md +14 -14
  125. package/.claude/personalities/crass.md +14 -14
  126. package/.claude/personalities/dramatic.md +14 -14
  127. package/.claude/personalities/dry-humor.md +50 -50
  128. package/.claude/personalities/flirty.md +20 -20
  129. package/.claude/personalities/funny.md +14 -14
  130. package/.claude/personalities/grandpa.md +32 -32
  131. package/.claude/personalities/millennial.md +14 -14
  132. package/.claude/personalities/moody.md +14 -14
  133. package/.claude/personalities/normal.md +16 -16
  134. package/.claude/personalities/pirate.md +14 -14
  135. package/.claude/personalities/poetic.md +14 -14
  136. package/.claude/personalities/professional.md +14 -14
  137. package/.claude/personalities/rapper.md +55 -55
  138. package/.claude/personalities/robot.md +14 -14
  139. package/.claude/personalities/sarcastic.md +38 -38
  140. package/.claude/personalities/sassy.md +14 -14
  141. package/.claude/personalities/surfer-dude.md +14 -14
  142. package/.claude/personalities/zen.md +14 -14
  143. package/.claude/settings.json +15 -15
  144. package/.claude/verbosity.txt +1 -1
  145. package/.clawdbot/README.md +105 -105
  146. package/.clawdbot/skill/SKILL.md +241 -241
  147. package/.mcp.json +12 -0
  148. package/CLAUDE.md +170 -181
  149. package/README.md +2029 -1909
  150. package/RELEASE_NOTES.md +1310 -66
  151. package/WINDOWS-SETUP.md +208 -208
  152. package/bin/agent-vibes +39 -39
  153. package/bin/agentvibes-voice-browser.js +1840 -1826
  154. package/bin/agentvibes.js +48 -2
  155. package/bin/mcp-server.js +121 -121
  156. package/bin/mcp-server.sh +206 -206
  157. package/bin/test-bmad-pr +78 -78
  158. package/mcp-server/QUICK_START.md +203 -203
  159. package/mcp-server/README.md +345 -345
  160. package/mcp-server/WINDOWS_SETUP.md +260 -260
  161. package/mcp-server/docs/troubleshooting-audio.md +313 -313
  162. package/mcp-server/examples/claude_desktop_config.json +11 -11
  163. package/mcp-server/examples/claude_desktop_config_piper.json +9 -9
  164. package/mcp-server/examples/custom_instructions.md +169 -169
  165. package/mcp-server/install-deps.js +130 -130
  166. package/mcp-server/pyproject.toml +52 -52
  167. package/mcp-server/requirements.txt +2 -2
  168. package/mcp-server/server.py +1465 -1417
  169. package/mcp-server/test_server.py +395 -395
  170. package/mcp-server/test_windows_script_parity.py +336 -0
  171. package/package.json +110 -112
  172. package/setup-windows.ps1 +815 -815
  173. package/src/bmad-detector.js +71 -71
  174. package/src/cli/list-personalities.js +110 -110
  175. package/src/cli/list-voices.js +114 -114
  176. package/src/commands/bmad-voices.js +394 -394
  177. package/src/commands/install-mcp.js +476 -476
  178. package/src/console/app.js +824 -806
  179. package/src/console/audio-env.js +20 -1
  180. package/src/console/brand-colors.js +13 -13
  181. package/src/console/constants/personalities.js +44 -0
  182. package/src/console/footer-config.js +50 -46
  183. package/src/console/modals/modal-overlay.js +247 -247
  184. package/src/console/navigation.js +62 -61
  185. package/src/console/tabs/agents-tab.js +1684 -369
  186. package/src/console/tabs/help-tab.js +261 -261
  187. package/src/console/tabs/install-tab.js +1007 -991
  188. package/src/console/tabs/music-tab.js +22 -8
  189. package/src/console/tabs/placeholder-tab.js +53 -46
  190. package/src/console/tabs/readme-tab.js +267 -267
  191. package/src/console/tabs/receiver-tab.js +1472 -0
  192. package/src/console/tabs/settings-tab.js +185 -402
  193. package/src/console/tabs/voices-tab.js +100 -21
  194. package/src/console/widgets/destroy-list.js +25 -0
  195. package/src/console/widgets/format-utils.js +89 -0
  196. package/src/console/widgets/notice.js +55 -0
  197. package/src/console/widgets/personality-picker.js +185 -0
  198. package/src/console/widgets/reverb-picker.js +94 -0
  199. package/src/console/widgets/track-picker.js +285 -0
  200. package/src/installer/music-file-input.js +304 -304
  201. package/src/installer.js +5882 -5777
  202. package/src/services/agent-voice-store.js +423 -163
  203. package/src/services/config-service.js +264 -264
  204. package/src/services/navigation-service.js +123 -123
  205. package/src/services/provider-service.js +132 -132
  206. package/src/services/verbosity-service.js +157 -157
  207. package/src/utils/audio-duration-validator.js +298 -298
  208. package/src/utils/audio-format-validator.js +277 -277
  209. package/src/utils/dependency-checker.js +469 -466
  210. package/src/utils/file-ownership-verifier.js +358 -358
  211. package/src/utils/list-formatter.js +194 -194
  212. package/src/utils/music-file-validator.js +285 -275
  213. package/src/utils/preview-list-prompt.js +136 -136
  214. package/src/utils/provider-validator.js +96 -12
  215. package/src/utils/secure-music-storage.js +412 -412
  216. package/templates/agentvibes-receiver.sh +482 -162
  217. package/templates/audio/welcome-music.mp3 +0 -0
  218. package/voice-assignments.json +8244 -8244
  219. package/.claude/config/background-music-position.txt +0 -1
@@ -1,578 +1,679 @@
1
- #!/usr/bin/env bash
2
- #
3
- # File: .claude/hooks/play-tts-piper.sh
4
- #
5
- # AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
6
- # Website: https://agentvibes.org
7
- # Repository: https://github.com/paulpreibisch/AgentVibes
8
- #
9
- # Co-created by Paul Preibisch with Claude AI
10
- # Copyright (c) 2025 Paul Preibisch
11
- #
12
- # Licensed under the Apache License, Version 2.0 (the "License");
13
- # you may not use this file except in compliance with the License.
14
- # You may obtain a copy of the License at
15
- #
16
- # http://www.apache.org/licenses/LICENSE-2.0
17
- #
18
- # Unless required by applicable law or agreed to in writing, software
19
- # distributed under the License is distributed on an "AS IS" BASIS,
20
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
- # See the License for the specific language governing permissions and
22
- # limitations under the License.
23
- #
24
- # DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
- # express or implied. Use at your own risk. See the Apache License for details.
26
- #
27
- # ---
28
- #
29
- # @fileoverview Piper TTS Provider Implementation - Free, offline neural TTS
30
- # @context Provides local, privacy-first TTS alternative to cloud services for WSL/Linux
31
- # @architecture Implements provider interface contract for Piper binary integration
32
- # @dependencies piper (pipx), piper-voice-manager.sh, mpv/aplay, ffmpeg (optional padding)
33
- # @entrypoints Called by play-tts.sh router when provider=piper
34
- # @patterns Provider contract: text/voice โ†’ audio file path, voice auto-download, language-aware synthesis
35
- # @related play-tts.sh, piper-voice-manager.sh, language-manager.sh, GitHub Issue #25
36
- #
37
-
38
- set -eo pipefail
39
- # Note: -u (nounset) omitted because sourced scripts (piper-voice-manager.sh,
40
- # language-manager.sh, audio-cache-utils.sh) use unset variables freely.
41
- # Variables in THIS script use ${VAR:-} defaults for safety.
42
-
43
- # Cleanup handler for temp files (preserves final output in $TEMP_FILE)
44
- _CLEANUP_FILES=()
45
- cleanup() {
46
- local f
47
- for f in "${_CLEANUP_FILES[@]+"${_CLEANUP_FILES[@]}"}"; do
48
- [[ "$f" == "${TEMP_FILE:-}" ]] && continue
49
- rm -f "$f"
50
- done
51
- }
52
- trap cleanup EXIT
53
-
54
- # Fix locale warnings
55
- export LC_ALL=C
56
-
57
- TEXT="${1:-}"
58
- VOICE_OVERRIDE="${2:-}" # Optional: voice model name
59
-
60
- # Source voice manager and language manager
61
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
62
- source "$SCRIPT_DIR/piper-voice-manager.sh"
63
- source "$SCRIPT_DIR/language-manager.sh"
64
- source "$SCRIPT_DIR/audio-cache-utils.sh"
65
-
66
- # Default voice for Piper
67
- DEFAULT_VOICE="en_US-lessac-medium"
68
-
69
- # @function determine_voice_model
70
- # @intent Resolve voice name to Piper model name with language support
71
- # @why Support voice override, language-specific voices, and default fallback
72
- # @param Uses global: $VOICE_OVERRIDE
73
- # @returns Sets $VOICE_MODEL global variable
74
- # @sideeffects None
75
- VOICE_MODEL=""
76
-
77
- # Get current language setting
78
- CURRENT_LANGUAGE=$(get_language_code)
79
-
80
- if [[ -n "$VOICE_OVERRIDE" ]]; then
81
- # Use override if provided
82
- VOICE_MODEL="$VOICE_OVERRIDE"
83
- echo "๐ŸŽค Using voice: $VOICE_OVERRIDE (session-specific)"
84
- else
85
- # Try to get voice from voice file (check CLAUDE_PROJECT_DIR first for MCP context)
86
- VOICE_FILE=""
87
-
88
- # Priority order:
89
- # 1. CLAUDE_PROJECT_DIR env var (set by MCP for project-specific settings)
90
- # 2. Script location (for direct slash command usage)
91
- # 3. Global ~/.claude (fallback)
92
-
93
- if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]] && [[ "${CLAUDE_PROJECT_DIR:-}" != *".."* ]] && [[ -f "$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt" ]]; then
94
- # MCP context: Use the project directory where MCP was invoked
95
- VOICE_FILE="$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt"
96
- elif [[ -f "$SCRIPT_DIR/../tts-voice.txt" ]]; then
97
- # Direct usage: Use script location
98
- VOICE_FILE="$SCRIPT_DIR/../tts-voice.txt"
99
- elif [[ -f "$HOME/.claude/tts-voice.txt" ]]; then
100
- # Fallback: Use global
101
- VOICE_FILE="$HOME/.claude/tts-voice.txt"
102
- fi
103
-
104
- if [[ -n "$VOICE_FILE" ]]; then
105
- FILE_VOICE=$(cat "$VOICE_FILE" 2>/dev/null)
106
-
107
- # Check for multi-speaker voice (model + speaker ID stored separately)
108
- # Use same directory as VOICE_FILE for consistency
109
- VOICE_DIR=$(dirname "$VOICE_FILE")
110
- MODEL_FILE="$VOICE_DIR/tts-piper-model.txt"
111
- SPEAKER_ID_FILE="$VOICE_DIR/tts-piper-speaker-id.txt"
112
-
113
- if [[ -f "$MODEL_FILE" ]] && [[ -f "$SPEAKER_ID_FILE" ]]; then
114
- # Multi-speaker voice config found locally
115
- VOICE_MODEL=$(cat "$MODEL_FILE" 2>/dev/null)
116
- SPEAKER_ID=$(cat "$SPEAKER_ID_FILE" 2>/dev/null)
117
- # Validate speaker ID is numeric
118
- if [[ -n "$SPEAKER_ID" ]] && ! [[ "$SPEAKER_ID" =~ ^[0-9]+$ ]]; then
119
- echo "Warning: Invalid speaker ID '$SPEAKER_ID', ignoring" >&2
120
- SPEAKER_ID=""
121
- fi
122
- echo "๐ŸŽญ Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-none})"
123
- # Fallback: check global ~/.claude/ for multi-speaker config files
124
- elif [[ -f "$HOME/.claude/tts-piper-model.txt" ]] && [[ -f "$HOME/.claude/tts-piper-speaker-id.txt" ]]; then
125
- VOICE_MODEL=$(cat "$HOME/.claude/tts-piper-model.txt" 2>/dev/null)
126
- SPEAKER_ID=$(cat "$HOME/.claude/tts-piper-speaker-id.txt" 2>/dev/null)
127
- if [[ -n "$SPEAKER_ID" ]] && ! [[ "$SPEAKER_ID" =~ ^[0-9]+$ ]]; then
128
- echo "Warning: Invalid speaker ID '$SPEAKER_ID', ignoring" >&2
129
- SPEAKER_ID=""
130
- fi
131
- echo "๐ŸŽญ Using multi-speaker voice (global): $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-none})"
132
- # Single-speaker voice or custom voice name
133
- elif [[ -n "$FILE_VOICE" ]]; then
134
- # Strip multi-speaker suffix if present (model::SpeakerName-Label)
135
- if [[ "$FILE_VOICE" == *"::"* ]]; then
136
- VOICE_MODEL="${FILE_VOICE%%::*}"
137
- else
138
- VOICE_MODEL="$FILE_VOICE"
139
- fi
140
- fi
141
- fi
142
-
143
- # If no Piper voice from file, try language-specific voice
144
- if [[ -z "$VOICE_MODEL" ]]; then
145
- LANG_VOICE=$(get_voice_for_language "$CURRENT_LANGUAGE" "piper" 2>/dev/null)
146
-
147
- if [[ -n "$LANG_VOICE" ]]; then
148
- VOICE_MODEL="$LANG_VOICE"
149
- echo "๐ŸŒ Using $CURRENT_LANGUAGE voice: $LANG_VOICE (Piper)"
150
- else
151
- # Use default voice
152
- VOICE_MODEL="$DEFAULT_VOICE"
153
- fi
154
- fi
155
- fi
156
-
157
- # @function validate_inputs
158
- # @intent Check required parameters
159
- # @why Fail fast with clear errors if inputs missing
160
- # @exitcode 1=missing text, 2=missing piper binary
161
- if [[ -z "$TEXT" ]]; then
162
- echo "Usage: $0 \"text to speak\" [voice_model_name]"
163
- exit 1
164
- fi
165
-
166
- # Check if Piper is installed
167
- if ! command -v piper &> /dev/null; then
168
- echo "โŒ Error: Piper TTS not installed"
169
- echo "Install with: pipx install piper-tts"
170
- echo "Or run: .claude/hooks/piper-installer.sh"
171
- exit 2
172
- fi
173
-
174
- # @function ensure_voice_downloaded
175
- # @intent Download voice model if not cached
176
- # @why Provide seamless experience with automatic downloads
177
- # @param Uses global: $VOICE_MODEL
178
- # @sideeffects Downloads voice model files
179
- # @edgecases Prompts user for consent before downloading, skipped in test mode
180
- if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && ! verify_voice "$VOICE_MODEL"; then
181
- echo "๐Ÿ“ฅ Voice model not found: $VOICE_MODEL"
182
- echo " File size: ~25MB"
183
- echo " Preview: https://huggingface.co/rhasspy/piper-voices"
184
- echo ""
185
- read -p " Download this voice model? [y/N]: " -n 1 -r
186
- echo
187
-
188
- if [[ $REPLY =~ ^[Yy]$ ]]; then
189
- if ! download_voice "$VOICE_MODEL"; then
190
- echo "โŒ Failed to download voice model"
191
- echo "Fix: Download manually or choose different voice"
192
- exit 3
193
- fi
194
- else
195
- echo "โŒ Voice download cancelled"
196
- exit 3
197
- fi
198
- fi
199
-
200
- # Get voice model path
201
- # In test mode, use a fake path since we have mock piper that doesn't need real files
202
- if [[ "${AGENTVIBES_TEST_MODE:-false}" == "true" ]]; then
203
- VOICE_PATH="/tmp/mock-voice-${VOICE_MODEL}.onnx"
204
- else
205
- VOICE_PATH=$(get_voice_path "$VOICE_MODEL")
206
- if [[ $? -ne 0 ]]; then
207
- echo "โŒ Voice model path not found: $VOICE_MODEL"
208
- exit 3
209
- fi
210
- fi
211
-
212
- # @function determine_audio_directory
213
- # @intent Find appropriate directory for audio file storage
214
- # @why Supports project-local and global storage
215
- # @returns Sets $AUDIO_DIR global variable
216
- if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
217
- AUDIO_DIR="$CLAUDE_PROJECT_DIR/.claude/audio"
218
- else
219
- # Fallback: try to find .claude directory in current path
220
- CURRENT_DIR="$PWD"
221
- while [[ "$CURRENT_DIR" != "/" ]]; do
222
- if [[ -d "$CURRENT_DIR/.claude" ]]; then
223
- AUDIO_DIR="$CURRENT_DIR/.claude/audio"
224
- break
225
- fi
226
- CURRENT_DIR=$(dirname "$CURRENT_DIR")
227
- done
228
- # Final fallback to global if no project .claude found
229
- if [[ -z "${AUDIO_DIR:-}" ]]; then
230
- AUDIO_DIR="$HOME/.claude/audio"
231
- fi
232
- fi
233
-
234
- mkdir -p "$AUDIO_DIR"
235
- TEMP_FILE=$(mktemp "$AUDIO_DIR/tts-XXXXXX.wav")
236
-
237
- # @function get_speech_rate
238
- # @intent Determine speech rate for Piper synthesis
239
- # @why Convert user-facing speed (0.5=slower, 2.0=faster) to Piper length-scale (inverted)
240
- # @returns Piper length-scale value (inverted from user scale)
241
- # @note Piper uses length-scale where higher=slower, opposite of user expectation
242
- get_speech_rate() {
243
- local target_config=""
244
- local main_config=""
245
-
246
- # Check for target-specific config first (new and legacy paths)
247
- if [[ -f "$SCRIPT_DIR/../config/tts-target-speech-rate.txt" ]]; then
248
- target_config="$SCRIPT_DIR/../config/tts-target-speech-rate.txt"
249
- elif [[ -f "$HOME/.claude/config/tts-target-speech-rate.txt" ]]; then
250
- target_config="$HOME/.claude/config/tts-target-speech-rate.txt"
251
- elif [[ -f "$SCRIPT_DIR/../config/piper-target-speech-rate.txt" ]]; then
252
- target_config="$SCRIPT_DIR/../config/piper-target-speech-rate.txt"
253
- elif [[ -f "$HOME/.claude/config/piper-target-speech-rate.txt" ]]; then
254
- target_config="$HOME/.claude/config/piper-target-speech-rate.txt"
255
- fi
256
-
257
- # Check for main config (new and legacy paths)
258
- if [[ -f "$SCRIPT_DIR/../config/tts-speech-rate.txt" ]]; then
259
- main_config="$SCRIPT_DIR/../config/tts-speech-rate.txt"
260
- elif [[ -f "$HOME/.claude/config/tts-speech-rate.txt" ]]; then
261
- main_config="$HOME/.claude/config/tts-speech-rate.txt"
262
- elif [[ -f "$SCRIPT_DIR/../config/piper-speech-rate.txt" ]]; then
263
- main_config="$SCRIPT_DIR/../config/piper-speech-rate.txt"
264
- elif [[ -f "$HOME/.claude/config/piper-speech-rate.txt" ]]; then
265
- main_config="$HOME/.claude/config/piper-speech-rate.txt"
266
- fi
267
-
268
- # If this is a non-English voice and target config exists, use it
269
- if [[ "$CURRENT_LANGUAGE" != "english" ]] && [[ -n "$target_config" ]]; then
270
- local user_speed=$(cat "$target_config" 2>/dev/null)
271
- # Validate speed is a positive number
272
- if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
273
- echo "1.0"
274
- return
275
- fi
276
- # Convert user speed to Piper length-scale (invert)
277
- # User: 0.5=slower, 1.0=normal, 2.0=faster
278
- # Piper: 2.0=slower, 1.0=normal, 0.5=faster
279
- # Formula: piper_length_scale = 1.0 / user_speed
280
- echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
281
- return
282
- fi
283
-
284
- # Otherwise use main config if available
285
- if [[ -n "$main_config" ]]; then
286
- local user_speed=$(grep -v '^#' "$main_config" 2>/dev/null | grep -v '^$' | tail -1)
287
- # Validate speed is a positive number
288
- if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
289
- echo "1.0"
290
- return
291
- fi
292
- echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
293
- return
294
- fi
295
-
296
- # Default: 1.0 (normal) for English, 2.0 (slower) for learning
297
- if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
298
- echo "2.0"
299
- else
300
- echo "1.0"
301
- fi
302
- }
303
-
304
- SPEECH_RATE=$(get_speech_rate)
305
-
306
- # @function synthesize_with_piper
307
- # @intent Generate speech using Piper TTS
308
- # @why Provides free, offline TTS alternative
309
- # @param Uses globals: $TEXT, $VOICE_PATH, $SPEECH_RATE, $SPEAKER_ID (optional)
310
- # @returns Creates WAV file at $TEMP_FILE
311
- # @exitcode 0=success, 4=synthesis error
312
- # @sideeffects Creates audio file
313
- # @edgecases Handles piper errors, invalid models, multi-speaker voices
314
- if [[ -n "${SPEAKER_ID:-}" ]]; then
315
- # Multi-speaker voice: Pass speaker ID
316
- # Add 2-second pause between sentences for better pacing
317
- echo "$TEXT" | piper --model "$VOICE_PATH" --speaker "$SPEAKER_ID" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
318
- else
319
- # Single-speaker voice
320
- # Add 2-second pause between sentences for better pacing
321
- echo "$TEXT" | piper --model "$VOICE_PATH" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
322
- fi
323
-
324
- if [[ ! -f "$TEMP_FILE" ]] || [[ ! -s "$TEMP_FILE" ]]; then
325
- echo "โŒ Failed to synthesize speech with Piper"
326
- echo "Voice model: $VOICE_MODEL"
327
- echo "Check that voice model is valid"
328
- exit 4
329
- fi
330
-
331
- # @function detect_remote_session
332
- # @intent Auto-detect SSH/RDP sessions and enable audio compression
333
- # @why Remote desktop audio is choppy without compression
334
- # @returns Sets AGENTVIBES_RDP_MODE environment variable
335
- # @detection Checks SSH_CLIENT, SSH_TTY, and DISPLAY variables
336
- if [[ -z "${AGENTVIBES_RDP_MODE:-}" ]]; then
337
- # Auto-detect remote session
338
- if [[ -n "${SSH_CLIENT:-}" ]] || [[ -n "${SSH_TTY:-}" ]] || [[ "${DISPLAY:-}" =~ ^localhost:.* ]]; then
339
- export AGENTVIBES_RDP_MODE=true
340
- echo "๐ŸŒ Remote session detected - enabling audio compression"
341
- fi
342
- fi
343
-
344
- # @function compress_for_remote
345
- # @intent Compress TTS audio for remote sessions (SSH/RDP)
346
- # @why Reduces bandwidth and prevents choppy playback
347
- # @param Uses global: $TEMP_FILE, $AGENTVIBES_RDP_MODE
348
- # @returns Updates $TEMP_FILE to compressed version
349
- # @sideeffects Converts to mono 22kHz for lower bandwidth
350
- if [[ "${AGENTVIBES_RDP_MODE:-false}" == "true" ]] && command -v ffmpeg &> /dev/null; then
351
- COMPRESSED_FILE=$(mktemp "$AUDIO_DIR/tts-compressed-XXXXXX.wav")
352
- _CLEANUP_FILES+=("$COMPRESSED_FILE")
353
- # Convert to mono, 22kHz, 64kbps for remote sessions
354
- ffmpeg -i "$TEMP_FILE" -ac 1 -ar 22050 -b:a 64k -y "$COMPRESSED_FILE" 2>/dev/null
355
-
356
- if [[ -f "$COMPRESSED_FILE" ]]; then
357
- rm -f "$TEMP_FILE"
358
- TEMP_FILE="$COMPRESSED_FILE"
359
- fi
360
- fi
361
-
362
- # @function add_silence_padding
363
- # @intent Add silence to prevent WSL audio static
364
- # @why WSL audio subsystem cuts off first ~200ms
365
- # @param Uses global: $TEMP_FILE
366
- # @returns Updates $TEMP_FILE to padded version
367
- # @sideeffects Modifies audio file
368
- # AI NOTE: Use ffmpeg if available, otherwise skip padding (degraded experience)
369
- if command -v ffmpeg &> /dev/null; then
370
- PADDED_FILE=$(mktemp "$AUDIO_DIR/tts-padded-XXXXXX.wav")
371
- _CLEANUP_FILES+=("$PADDED_FILE")
372
- # Add 200ms of silence at the beginning
373
- ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo:d=0.2 -i "$TEMP_FILE" \
374
- -filter_complex "[0:a][1:a]concat=n=2:v=0:a=1[out]" \
375
- -map "[out]" -y "$PADDED_FILE" 2>/dev/null
376
-
377
- if [[ -f "$PADDED_FILE" ]]; then
378
- rm -f "$TEMP_FILE"
379
- TEMP_FILE="$PADDED_FILE"
380
- fi
381
- fi
382
-
383
- # @function apply_audio_effects
384
- # @intent Apply sox effects and background music via audio-processor.sh
385
- # @param Uses global: $TEMP_FILE
386
- # @returns Updates $TEMP_FILE to processed version, sets $BACKGROUND_MUSIC if used
387
- # @sideeffects Applies audio effects and background music
388
- BACKGROUND_MUSIC=""
389
- if [[ -f "$SCRIPT_DIR/audio-processor.sh" ]]; then
390
- PROCESSED_FILE=$(mktemp "$AUDIO_DIR/tts-processed-XXXXXX.wav")
391
- _CLEANUP_FILES+=("$PROCESSED_FILE")
392
- # audio-processor.sh returns: FILE_PATH|BACKGROUND_FILE
393
- PROCESSOR_OUTPUT=$("$SCRIPT_DIR/audio-processor.sh" "$TEMP_FILE" "default" "$PROCESSED_FILE" 2>/dev/null) || {
394
- echo "Warning: Audio processing failed, using unprocessed audio" >&2
395
- PROCESSED_FILE="$TEMP_FILE"
396
- PROCESSOR_OUTPUT="$TEMP_FILE|"
397
- }
398
-
399
- # Parse output: FILE|BACKGROUND
400
- PROCESSED_FILE="${PROCESSOR_OUTPUT%%|*}"
401
- BACKGROUND_MUSIC="${PROCESSOR_OUTPUT##*|}"
402
-
403
- if [[ -f "$PROCESSED_FILE" ]] && [[ "$PROCESSED_FILE" != "$TEMP_FILE" ]]; then
404
- rm -f "$TEMP_FILE"
405
- TEMP_FILE="$PROCESSED_FILE"
406
- fi
407
- fi
408
-
409
- # @function play_audio
410
- # @intent Play generated audio using available player with sequential playback
411
- # @why Support multiple audio players and prevent overlapping audio in learning mode
412
- # @param Uses global: $TEMP_FILE, $CURRENT_LANGUAGE
413
- # @sideeffects Plays audio with lock mechanism for sequential playback
414
- _LOCK_DIR="${XDG_RUNTIME_DIR:-/tmp/agentvibes-$(id -u)}"
415
- mkdir -p "$_LOCK_DIR"
416
- chmod 700 "$_LOCK_DIR"
417
- LOCK_FILE="$_LOCK_DIR/agentvibes-audio.lock"
418
-
419
- # Wait for previous audio to finish (max 2 seconds to prevent blocking)
420
- for i in {1..4}; do
421
- if [ ! -f "$LOCK_FILE" ]; then
422
- break
423
- fi
424
- sleep 0.5
425
- done
426
-
427
- # If still locked after 2 seconds, skip this TTS to prevent blocking Claude
428
- if [ -f "$LOCK_FILE" ]; then
429
- echo "โญ๏ธ Skipping TTS (previous audio still playing)" >&2
430
- exit 0
431
- fi
432
-
433
- # Track last target language audio for replay command
434
- if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
435
- TARGET_AUDIO_FILE="${CLAUDE_PROJECT_DIR:-${HOME}}/.claude/last-target-audio.txt"
436
- echo "$TEMP_FILE" > "$TARGET_AUDIO_FILE"
437
- fi
438
-
439
- # Create lock and play audio
440
- touch "$LOCK_FILE"
441
-
442
- # Create write lock file in audio directory to signal file is in-use (prevents race condition in cleanup)
443
- _TEMP_DIR="${TEMP_FILE%/*}"
444
- WRITE_LOCK_FILE="$_TEMP_DIR/$(basename "$TEMP_FILE" .wav).lock"
445
- touch "$WRITE_LOCK_FILE"
446
- _CLEANUP_FILES+=("$LOCK_FILE" "$WRITE_LOCK_FILE")
447
-
448
- # Get audio duration for proper lock timing
449
- DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null || true)
450
- DURATION=${DURATION%.*} # Round to integer
451
- DURATION=${DURATION:-1} # Default to 1 second if detection fails
452
-
453
- # Play audio in background (skip if in test mode or no-playback mode)
454
- # AGENTVIBES_NO_PLAYBACK: Set to "true" to generate audio without playing (for post-processing)
455
- if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && [[ "${AGENTVIBES_NO_PLAYBACK:-false}" != "true" ]]; then
456
- # Detect platform and use appropriate audio player
457
- if [[ "$(uname -s)" == "Darwin" ]]; then
458
- # macOS: Use afplay (native macOS audio player)
459
- afplay "$TEMP_FILE" >/dev/null 2>&1 &
460
- PLAYER_PID=$!
461
- elif [[ -n "${TERMUX_VERSION:-}" ]] || [[ -d "/data/data/com.termux" ]]; then
462
- # Android/Termux: Use termux-media-player
463
- termux-media-player play "$TEMP_FILE" >/dev/null 2>&1 &
464
- PLAYER_PID=$!
465
- else
466
- # Linux/WSL: Prefer paplay (PulseAudio) for best WSL audio quality
467
- (paplay "$TEMP_FILE" || mpv "$TEMP_FILE" || aplay "$TEMP_FILE") >/dev/null 2>&1 &
468
- PLAYER_PID=$!
469
- fi
470
- fi
471
-
472
- # Wait for audio to finish, then release locks (both global and write lock)
473
- (sleep $DURATION; rm -f "$LOCK_FILE" "$WRITE_LOCK_FILE") &
474
- disown
475
-
476
- # Get audio cache path
477
- AUDIO_DIR_PATH=$(get_audio_dir)
478
-
479
- # Color codes
480
- BLUE='\033[0;34m'
481
- YELLOW='\033[1;33m'
482
- PURPLE='\033[0;35m'
483
- LIGHT_PURPLE='\033[1;35m'
484
- RED='\033[0;31m'
485
- GREEN='\033[0;32m'
486
- ORANGE='\033[0;33m'
487
- WHITE='\033[1;37m'
488
- MAGENTA='\033[0;35m'
489
- CYAN='\033[0;36m'
490
- GOLD='\033[38;5;226m'
491
- NC='\033[0m'
492
-
493
- # CRITICAL: Run auto-cleanup FIRST (before calculating size)
494
- # This ensures we display the POST-cleanup size, not pre-cleanup size
495
- AUTO_CLEAN_THRESHOLD=$(get_auto_clean_threshold)
496
- INITIAL_SIZE=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
497
- if [[ $INITIAL_SIZE -gt $((AUTO_CLEAN_THRESHOLD * 1048576)) ]]; then
498
- DELETED=$(auto_clean_old_files "$AUDIO_DIR_PATH" "$AUTO_CLEAN_THRESHOLD")
499
- if [[ $DELETED -gt 0 ]]; then
500
- echo -e "${ORANGE}๐Ÿงน Auto-cleaned $DELETED old files${NC}"
501
- fi
502
- fi
503
-
504
- # NOW calculate cache stats after cleanup
505
- FILE_COUNT=$(count_tts_files "$AUDIO_DIR_PATH")
506
- SIZE_BYTES=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
507
- SIZE_HUMAN=$(bytes_to_human "$SIZE_BYTES")
508
-
509
- # Dynamic color coding based on cache size
510
- # Green: < 500MB (small)
511
- # Yellow: 500MB - 3GB (lots)
512
- # Red: > 3GB (extreme)
513
- CACHE_COLOR=$GREEN
514
- if [[ $SIZE_BYTES -gt 3221225472 ]]; then # > 3GB
515
- CACHE_COLOR=$RED
516
- elif [[ $SIZE_BYTES -gt 524288000 ]]; then # > 500MB
517
- CACHE_COLOR=$YELLOW
518
- fi
519
-
520
- # Display with file count (now showing accurate post-cleanup size)
521
- echo -e "${WHITE}๐Ÿ’พ Saved to:${NC} ${CYAN}$TEMP_FILE${NC} ${YELLOW}$FILE_COUNT${NC} ${WHITE}๐Ÿ—„๏ธ${NC} ${CACHE_COLOR}$SIZE_HUMAN${NC} ${WHITE}๐Ÿงน${NC}${GOLD}[${AUTO_CLEAN_THRESHOLD}mb]${NC}"
522
-
523
- if [[ -n "$BACKGROUND_MUSIC" ]]; then
524
- # Extract just the filename to save space
525
- MUSIC_FILENAME=$(basename "$BACKGROUND_MUSIC")
526
- echo -e "${WHITE}๐ŸŽต Background music:${NC} ${PURPLE}$MUSIC_FILENAME${NC}"
527
- fi
528
- # Show speaker name for multi-speaker voices, otherwise show model name
529
- if [[ -n "${SPEAKER_ID:-}" ]] && [[ -n "${FILE_VOICE:-}" ]]; then
530
- echo -e "${WHITE}๐ŸŽค Voice used:${NC} ${BLUE}$FILE_VOICE${NC} ${WHITE}(Piper TTS)${NC}"
531
- else
532
- echo -e "${WHITE}๐ŸŽค Voice used:${NC} ${BLUE}$VOICE_MODEL${NC} ${WHITE}(Piper TTS)${NC}"
533
- fi
534
-
535
- # Show personality if configured
536
- PERSONALITY=$(cat "${PROJECT_ROOT:-/nonexistent}/.claude/tts-personality.txt" 2>/dev/null || cat "$HOME/.claude/tts-personality.txt" 2>/dev/null || echo "")
537
- if [[ -n "$PERSONALITY" ]] && [[ "$PERSONALITY" != "none" ]] && [[ "$PERSONALITY" != "normal" ]]; then
538
- echo -e "${WHITE}๐Ÿ’ซ Personality:${NC} ${YELLOW}$PERSONALITY${NC}"
539
- fi
540
-
541
- # Check audio folder size and warn if getting large
542
- if [[ -d "$AUDIO_DIR_PATH" ]]; then
543
- AUDIO_SIZE=$(du -sm "$AUDIO_DIR_PATH" 2>/dev/null | cut -f1)
544
- if [[ -n "$AUDIO_SIZE" ]] && [[ "$AUDIO_SIZE" -gt 100 ]]; then
545
- echo -e "\033[0;31mโš ๏ธ Audio cache is ${AUDIO_SIZE}MB - Run: /agent-vibes:cleanup\033[0m"
546
- fi
547
- fi
548
-
549
- # Show status indicators
550
- GLOBAL_MUTE_FILE="$HOME/.agentvibes-muted"
551
- PROJECT_MUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-muted"
552
- PROJECT_UNMUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-unmuted"
553
- BACKGROUND_ENABLED_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/config/background-music-enabled.txt"
554
- GLOBAL_BACKGROUND_ENABLED_FILE="$HOME/.claude/config/background-music-enabled.txt"
555
-
556
- # Mute status indicator
557
- if [[ -f "$PROJECT_UNMUTE_FILE" ]] && [[ -f "$GLOBAL_MUTE_FILE" ]]; then
558
- echo "๐Ÿ”Š Status: Unmuted (project overrides global mute)"
559
- elif [[ -f "$PROJECT_MUTE_FILE" ]]; then
560
- echo "๐Ÿ”‡ Status: Muted (project)"
561
- elif [[ -f "$GLOBAL_MUTE_FILE" ]]; then
562
- echo "๐Ÿ”‡ Status: Would be muted (global) - but this project is speaking"
563
- fi
564
-
565
- # Background music status indicator
566
- if [[ -z "$BACKGROUND_MUSIC" ]]; then
567
- _bg_enabled=false
568
- if [[ -f "$BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$BACKGROUND_ENABLED_FILE" 2>/dev/null; then
569
- _bg_enabled=true
570
- elif [[ -f "$GLOBAL_BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$GLOBAL_BACKGROUND_ENABLED_FILE" 2>/dev/null; then
571
- _bg_enabled=true
572
- fi
573
- if [[ "$_bg_enabled" == "true" ]]; then
574
- echo -e "${WHITE}๐ŸŽต Background music:${NC} ${PURPLE}Enabled but not playing (check config)${NC}"
575
- else
576
- echo -e "${WHITE}๐ŸŽต Background music:${NC} ${PURPLE}Disabled${NC}"
577
- fi
578
- fi
1
+ #!/usr/bin/env bash
2
+ #
3
+ # File: .claude/hooks/play-tts-piper.sh
4
+ #
5
+ # AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
6
+ # Website: https://agentvibes.org
7
+ # Repository: https://github.com/paulpreibisch/AgentVibes
8
+ #
9
+ # Co-created by Paul Preibisch with Claude AI
10
+ # Copyright (c) 2025 Paul Preibisch
11
+ #
12
+ # Licensed under the Apache License, Version 2.0 (the "License");
13
+ # you may not use this file except in compliance with the License.
14
+ # You may obtain a copy of the License at
15
+ #
16
+ # http://www.apache.org/licenses/LICENSE-2.0
17
+ #
18
+ # Unless required by applicable law or agreed to in writing, software
19
+ # distributed under the License is distributed on an "AS IS" BASIS,
20
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
+ # See the License for the specific language governing permissions and
22
+ # limitations under the License.
23
+ #
24
+ # DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
+ # express or implied. Use at your own risk. See the Apache License for details.
26
+ #
27
+ # ---
28
+ #
29
+ # @fileoverview Piper TTS Provider Implementation - Free, offline neural TTS
30
+ # @context Provides local, privacy-first TTS alternative to cloud services for WSL/Linux
31
+ # @architecture Implements provider interface contract for Piper binary integration
32
+ # @dependencies piper (pipx), piper-voice-manager.sh, mpv/aplay, ffmpeg (optional padding)
33
+ # @entrypoints Called by play-tts.sh router when provider=piper
34
+ # @patterns Provider contract: text/voice โ†’ audio file path, voice auto-download, language-aware synthesis
35
+ # @related play-tts.sh, piper-voice-manager.sh, language-manager.sh, GitHub Issue #25
36
+ #
37
+
38
+ set -eo pipefail
39
+ # Note: -u (nounset) omitted because sourced scripts (piper-voice-manager.sh,
40
+ # language-manager.sh, audio-cache-utils.sh) use unset variables freely.
41
+ # Variables in THIS script use ${VAR:-} defaults for safety.
42
+
43
+ # Cleanup handler for temp files (preserves final output in $TEMP_FILE)
44
+ _CLEANUP_FILES=()
45
+ cleanup() {
46
+ local f
47
+ for f in "${_CLEANUP_FILES[@]+"${_CLEANUP_FILES[@]}"}"; do
48
+ [[ "$f" == "${TEMP_FILE:-}" ]] && continue
49
+ rm -f "$f"
50
+ done
51
+ }
52
+ trap cleanup EXIT
53
+
54
+ # Fix locale warnings
55
+ export LC_ALL=C
56
+
57
+ TEXT="${1:-}"
58
+ VOICE_OVERRIDE="${2:-}" # Optional: voice model name
59
+ AGENT_PROFILE_FILE="${3:-}" # Optional: path to per-agent profile JSON (from bmad-speak.sh)
60
+
61
+ # Strip emojis, asterisks, and markdown formatting that Piper would speak literally
62
+ TEXT=$(printf '%s' "$TEXT" | perl -CSD -pe '
63
+ s/[\x{1F300}-\x{1F9FF}]//g; # emoticons, symbols, pictographs
64
+ s/[\x{2600}-\x{27BF}]//g; # misc symbols, dingbats
65
+ s/[\x{FE00}-\x{FE0F}]//g; # variation selectors
66
+ s/[\x{200D}]//g; # zero-width joiner
67
+ s/[\x{2500}-\x{257F}]//g; # box drawing (โ”€โ” etc)
68
+ s/[\x{2580}-\x{259F}]//g; # block elements
69
+ s/\*+//g; # asterisks (bold/italic markdown)
70
+ s/#+\s*//g; # heading markers
71
+ s/`//g; # backticks
72
+ s/~+//g; # strikethrough
73
+ s/^\s*[-]\s*//g; # list dashes
74
+ ')
75
+
76
+ # Source voice manager and language manager
77
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
78
+ source "$SCRIPT_DIR/piper-voice-manager.sh"
79
+ source "$SCRIPT_DIR/language-manager.sh"
80
+ source "$SCRIPT_DIR/audio-cache-utils.sh"
81
+
82
+ # Default voice for Piper
83
+ DEFAULT_VOICE="en_US-lessac-medium"
84
+
85
+ # @function determine_voice_model
86
+ # @intent Resolve voice name to Piper model name with language support
87
+ # @why Support voice override, language-specific voices, and default fallback
88
+ # @param Uses global: $VOICE_OVERRIDE
89
+ # @returns Sets $VOICE_MODEL global variable
90
+ # @sideeffects None
91
+ VOICE_MODEL=""
92
+
93
+ # Get current language setting
94
+ CURRENT_LANGUAGE=$(get_language_code)
95
+
96
+ if [[ -n "$VOICE_OVERRIDE" ]]; then
97
+ # Use override if provided
98
+ # Handle multi-speaker format: "Model::SpeakerName" โ†’ split into model + speaker lookup
99
+ if [[ "$VOICE_OVERRIDE" == *"::"* ]]; then
100
+ VOICE_MODEL="${VOICE_OVERRIDE%%::*}"
101
+ _SPEAKER_NAME="${VOICE_OVERRIDE#*::}"
102
+ # Look up speaker ID from the model's .onnx.json speaker_id_map
103
+ voice_dir=$(get_voice_storage_dir)
104
+ _JSON_FILE="$voice_dir/${VOICE_MODEL}.onnx.json"
105
+ if [[ -f "$_JSON_FILE" ]]; then
106
+ # SECURITY: Pass values via env vars to prevent shell injection
107
+ SPEAKER_ID=$(_JSON="$_JSON_FILE" _SPKR="$_SPEAKER_NAME" node -e "
108
+ try {
109
+ const j = JSON.parse(require('fs').readFileSync(process.env._JSON,'utf8'));
110
+ const map = j.speaker_id_map || {};
111
+ const id = map[process.env._SPKR];
112
+ if (id !== undefined) process.stdout.write(String(id));
113
+ } catch {}
114
+ " 2>/dev/null || true)
115
+ fi
116
+ echo "๐ŸŽญ Using multi-speaker voice: $VOICE_OVERRIDE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-?})"
117
+ else
118
+ VOICE_MODEL="$VOICE_OVERRIDE"
119
+ echo "๐ŸŽค Using voice: $VOICE_OVERRIDE (session-specific)"
120
+ fi
121
+ else
122
+ # Try to get voice from voice file (check CLAUDE_PROJECT_DIR first for MCP context)
123
+ VOICE_FILE=""
124
+
125
+ # Priority order:
126
+ # 1. CLAUDE_PROJECT_DIR env var (set by MCP for project-specific settings)
127
+ # 2. Script location (for direct slash command usage)
128
+ # 3. Global ~/.claude (fallback)
129
+
130
+ # SECURITY: Canonicalize path to prevent traversal (#128)
131
+ if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
132
+ CLAUDE_PROJECT_DIR=$(cd "${CLAUDE_PROJECT_DIR}" 2>/dev/null && pwd -P) || CLAUDE_PROJECT_DIR=""
133
+ fi
134
+ if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]] && [[ -f "$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt" ]]; then
135
+ # MCP context: Use the project directory where MCP was invoked
136
+ VOICE_FILE="$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt"
137
+ elif [[ -f "$SCRIPT_DIR/../tts-voice.txt" ]]; then
138
+ # Direct usage: Use script location
139
+ VOICE_FILE="$SCRIPT_DIR/../tts-voice.txt"
140
+ elif [[ -f "$HOME/.claude/tts-voice.txt" ]]; then
141
+ # Fallback: Use global
142
+ VOICE_FILE="$HOME/.claude/tts-voice.txt"
143
+ fi
144
+
145
+ if [[ -n "$VOICE_FILE" ]]; then
146
+ FILE_VOICE=$(cat "$VOICE_FILE" 2>/dev/null)
147
+
148
+ # Check for multi-speaker voice (model + speaker ID stored separately)
149
+ # Use same directory as VOICE_FILE for consistency
150
+ VOICE_DIR=$(dirname "$VOICE_FILE")
151
+ MODEL_FILE="$VOICE_DIR/tts-piper-model.txt"
152
+ SPEAKER_ID_FILE="$VOICE_DIR/tts-piper-speaker-id.txt"
153
+
154
+ if [[ -f "$MODEL_FILE" ]] && [[ -f "$SPEAKER_ID_FILE" ]]; then
155
+ # Multi-speaker voice config found locally
156
+ VOICE_MODEL=$(cat "$MODEL_FILE" 2>/dev/null)
157
+ SPEAKER_ID=$(cat "$SPEAKER_ID_FILE" 2>/dev/null)
158
+ # Validate speaker ID is numeric
159
+ if [[ -n "$SPEAKER_ID" ]] && ! [[ "$SPEAKER_ID" =~ ^[0-9]+$ ]]; then
160
+ echo "Warning: Invalid speaker ID '$SPEAKER_ID', ignoring" >&2
161
+ SPEAKER_ID=""
162
+ fi
163
+ echo "๐ŸŽญ Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-none})"
164
+ # Check if voice uses Model::SpeakerName format (from AgentVibes config)
165
+ elif [[ -n "$FILE_VOICE" ]] && [[ "$FILE_VOICE" == *"::"* ]]; then
166
+ VOICE_MODEL="${FILE_VOICE%%::*}"
167
+ _SPEAKER_NAME="${FILE_VOICE#*::}"
168
+ voice_dir=$(get_voice_storage_dir)
169
+ _JSON_FILE="$voice_dir/${VOICE_MODEL}.onnx.json"
170
+ if [[ -f "$_JSON_FILE" ]]; then
171
+ # SECURITY: Pass values via env vars to prevent shell injection
172
+ SPEAKER_ID=$(_JSON="$_JSON_FILE" _SPKR="$_SPEAKER_NAME" node -e "
173
+ try {
174
+ const j = JSON.parse(require('fs').readFileSync(process.env._JSON,'utf8'));
175
+ const map = j.speaker_id_map || {};
176
+ const id = map[process.env._SPKR];
177
+ if (id !== undefined) process.stdout.write(String(id));
178
+ } catch {}
179
+ " 2>/dev/null || true)
180
+ fi
181
+ echo "๐ŸŽญ Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-?})"
182
+ # Standard Piper model name or custom voice (just use as-is)
183
+ elif [[ -n "$FILE_VOICE" ]]; then
184
+ # Strip multi-speaker suffix if present (model::SpeakerName-Label)
185
+ if [[ "$FILE_VOICE" == *"::"* ]]; then
186
+ VOICE_MODEL="${FILE_VOICE%%::*}"
187
+ else
188
+ VOICE_MODEL="$FILE_VOICE"
189
+ fi
190
+ fi
191
+ fi
192
+
193
+ # If no Piper voice from file, try language-specific voice
194
+ if [[ -z "$VOICE_MODEL" ]]; then
195
+ LANG_VOICE=$(get_voice_for_language "$CURRENT_LANGUAGE" "piper" 2>/dev/null)
196
+
197
+ if [[ -n "$LANG_VOICE" ]]; then
198
+ VOICE_MODEL="$LANG_VOICE"
199
+ echo "๐ŸŒ Using $CURRENT_LANGUAGE voice: $LANG_VOICE (Piper)"
200
+ else
201
+ # Use default voice
202
+ VOICE_MODEL="$DEFAULT_VOICE"
203
+ fi
204
+ fi
205
+ fi
206
+
207
+ # @function validate_inputs
208
+ # @intent Check required parameters
209
+ # @why Fail fast with clear errors if inputs missing
210
+ # @exitcode 1=missing text, 2=missing piper binary
211
+ if [[ -z "$TEXT" ]]; then
212
+ echo "Usage: $0 \"text to speak\" [voice_model_name]"
213
+ exit 1
214
+ fi
215
+
216
+ # Check if Piper is installed
217
+ if ! command -v piper &> /dev/null; then
218
+ echo "โŒ Error: Piper TTS not installed"
219
+ echo "Install with: pipx install piper-tts"
220
+ echo "Or run: .claude/hooks/piper-installer.sh"
221
+ exit 2
222
+ fi
223
+
224
+ # @function ensure_voice_downloaded
225
+ # @intent Download voice model if not cached
226
+ # @why Provide seamless experience with automatic downloads
227
+ # @param Uses global: $VOICE_MODEL
228
+ # @sideeffects Downloads voice model files
229
+ # @edgecases Prompts user for consent before downloading, skipped in test mode
230
+ if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && ! verify_voice "$VOICE_MODEL"; then
231
+ echo "๐Ÿ“ฅ Voice model not found: $VOICE_MODEL"
232
+ echo " File size: ~25MB"
233
+ echo " Preview: https://huggingface.co/rhasspy/piper-voices"
234
+ echo ""
235
+ read -p " Download this voice model? [y/N]: " -n 1 -r
236
+ echo
237
+
238
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
239
+ if ! download_voice "$VOICE_MODEL"; then
240
+ echo "โŒ Failed to download voice model"
241
+ echo "Fix: Download manually or choose different voice"
242
+ exit 3
243
+ fi
244
+ else
245
+ echo "โŒ Voice download cancelled"
246
+ exit 3
247
+ fi
248
+ fi
249
+
250
+ # Get voice model path
251
+ # In test mode, use a fake path since we have mock piper that doesn't need real files
252
+ if [[ "${AGENTVIBES_TEST_MODE:-false}" == "true" ]]; then
253
+ VOICE_PATH="/tmp/mock-voice-${VOICE_MODEL}.onnx"
254
+ else
255
+ VOICE_PATH=$(get_voice_path "$VOICE_MODEL")
256
+ if [[ $? -ne 0 ]]; then
257
+ echo "โŒ Voice model path not found: $VOICE_MODEL"
258
+ exit 3
259
+ fi
260
+ fi
261
+
262
+ # @function determine_audio_directory
263
+ # @intent Find appropriate directory for audio file storage
264
+ # @why Supports project-local and global storage
265
+ # @returns Sets $AUDIO_DIR global variable
266
+ if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
267
+ AUDIO_DIR="$CLAUDE_PROJECT_DIR/.claude/audio"
268
+ else
269
+ # Fallback: try to find .claude directory in current path
270
+ CURRENT_DIR="$PWD"
271
+ while [[ "$CURRENT_DIR" != "/" ]]; do
272
+ if [[ -d "$CURRENT_DIR/.claude" ]]; then
273
+ AUDIO_DIR="$CURRENT_DIR/.claude/audio"
274
+ break
275
+ fi
276
+ CURRENT_DIR=$(dirname "$CURRENT_DIR")
277
+ done
278
+ # Final fallback to global if no project .claude found
279
+ if [[ -z "${AUDIO_DIR:-}" ]]; then
280
+ AUDIO_DIR="$HOME/.claude/audio"
281
+ fi
282
+ fi
283
+
284
+ mkdir -p "$AUDIO_DIR"
285
+ TEMP_FILE=$(mktemp "$AUDIO_DIR/tts-XXXXXX.wav")
286
+
287
+ # @function get_speech_rate
288
+ # @intent Determine speech rate for Piper synthesis
289
+ # @why Convert user-facing speed (0.5=slower, 2.0=faster) to Piper length-scale (inverted)
290
+ # @returns Piper length-scale value (inverted from user scale)
291
+ # @note Piper uses length-scale where higher=slower, opposite of user expectation
292
+ get_speech_rate() {
293
+ local target_config=""
294
+ local main_config=""
295
+
296
+ # Check for target-specific config first (new and legacy paths)
297
+ if [[ -f "$SCRIPT_DIR/../config/tts-target-speech-rate.txt" ]]; then
298
+ target_config="$SCRIPT_DIR/../config/tts-target-speech-rate.txt"
299
+ elif [[ -f "$HOME/.claude/config/tts-target-speech-rate.txt" ]]; then
300
+ target_config="$HOME/.claude/config/tts-target-speech-rate.txt"
301
+ elif [[ -f "$SCRIPT_DIR/../config/piper-target-speech-rate.txt" ]]; then
302
+ target_config="$SCRIPT_DIR/../config/piper-target-speech-rate.txt"
303
+ elif [[ -f "$HOME/.claude/config/piper-target-speech-rate.txt" ]]; then
304
+ target_config="$HOME/.claude/config/piper-target-speech-rate.txt"
305
+ fi
306
+
307
+ # Check for main config (new and legacy paths)
308
+ if [[ -f "$SCRIPT_DIR/../config/tts-speech-rate.txt" ]]; then
309
+ main_config="$SCRIPT_DIR/../config/tts-speech-rate.txt"
310
+ elif [[ -f "$HOME/.claude/config/tts-speech-rate.txt" ]]; then
311
+ main_config="$HOME/.claude/config/tts-speech-rate.txt"
312
+ elif [[ -f "$SCRIPT_DIR/../config/piper-speech-rate.txt" ]]; then
313
+ main_config="$SCRIPT_DIR/../config/piper-speech-rate.txt"
314
+ elif [[ -f "$HOME/.claude/config/piper-speech-rate.txt" ]]; then
315
+ main_config="$HOME/.claude/config/piper-speech-rate.txt"
316
+ fi
317
+
318
+ # If this is a non-English voice and target config exists, use it
319
+ if [[ "$CURRENT_LANGUAGE" != "english" ]] && [[ -n "$target_config" ]]; then
320
+ local user_speed=$(cat "$target_config" 2>/dev/null)
321
+ # Validate speed is a positive number
322
+ if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
323
+ echo "1.0"
324
+ return
325
+ fi
326
+ # Convert user speed to Piper length-scale (invert)
327
+ # User: 0.5=slower, 1.0=normal, 2.0=faster
328
+ # Piper: 2.0=slower, 1.0=normal, 0.5=faster
329
+ # Formula: piper_length_scale = 1.0 / user_speed
330
+ echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
331
+ return
332
+ fi
333
+
334
+ # Otherwise use main config if available
335
+ if [[ -n "$main_config" ]]; then
336
+ local user_speed=$(grep -v '^#' "$main_config" 2>/dev/null | grep -v '^$' | tail -1)
337
+ # Validate speed is a positive number
338
+ if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
339
+ echo "1.0"
340
+ return
341
+ fi
342
+ echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
343
+ return
344
+ fi
345
+
346
+ # Default: 1.0 (normal) for English, 2.0 (slower) for learning
347
+ if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
348
+ echo "2.0"
349
+ else
350
+ echo "1.0"
351
+ fi
352
+ }
353
+
354
+ SPEECH_RATE=$(get_speech_rate)
355
+
356
+ # @function synthesize_with_piper
357
+ # @intent Generate speech using Piper TTS
358
+ # @why Provides free, offline TTS alternative
359
+ # @param Uses globals: $TEXT, $VOICE_PATH, $SPEECH_RATE, $SPEAKER_ID (optional)
360
+ # @returns Creates WAV file at $TEMP_FILE
361
+ # @exitcode 0=success, 4=synthesis error
362
+ # @sideeffects Creates audio file
363
+ # @edgecases Handles piper errors, invalid models, multi-speaker voices
364
+ if [[ -n "${SPEAKER_ID:-}" ]]; then
365
+ # Multi-speaker voice: Pass speaker ID
366
+ # SECURITY: Use printf instead of echo for pipe safety (#134)
367
+ printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --speaker "$SPEAKER_ID" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
368
+ else
369
+ # Single-speaker voice
370
+ printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
371
+ fi
372
+
373
+ if [[ ! -f "$TEMP_FILE" ]] || [[ ! -s "$TEMP_FILE" ]]; then
374
+ echo "โŒ Failed to synthesize speech with Piper"
375
+ echo "Voice model: $VOICE_MODEL"
376
+ echo "Check that voice model is valid"
377
+ exit 4
378
+ fi
379
+
380
+ # @function detect_remote_session
381
+ # @intent Auto-detect SSH/RDP sessions and enable audio compression
382
+ # @why Remote desktop audio is choppy without compression
383
+ # @returns Sets AGENTVIBES_RDP_MODE environment variable
384
+ # @detection Checks SSH_CLIENT, SSH_TTY, and DISPLAY variables
385
+ if [[ -z "${AGENTVIBES_RDP_MODE:-}" ]]; then
386
+ # Auto-detect remote session
387
+ if [[ -n "${SSH_CLIENT:-}" ]] || [[ -n "${SSH_TTY:-}" ]] || [[ "${DISPLAY:-}" =~ ^localhost:.* ]]; then
388
+ export AGENTVIBES_RDP_MODE=true
389
+ echo "๐ŸŒ Remote session detected - enabling audio compression"
390
+ fi
391
+ fi
392
+
393
+ # @function compress_for_remote
394
+ # @intent Compress TTS audio for remote sessions (SSH/RDP)
395
+ # @why Reduces bandwidth and prevents choppy playback
396
+ # @param Uses global: $TEMP_FILE, $AGENTVIBES_RDP_MODE
397
+ # @returns Updates $TEMP_FILE to compressed version
398
+ # @sideeffects Converts to mono 22kHz for lower bandwidth
399
+ if [[ "${AGENTVIBES_RDP_MODE:-false}" == "true" ]] && command -v ffmpeg &> /dev/null; then
400
+ COMPRESSED_FILE=$(mktemp "$AUDIO_DIR/tts-compressed-XXXXXX.wav")
401
+ _CLEANUP_FILES+=("$COMPRESSED_FILE")
402
+ # Convert to mono, 22kHz, 64kbps for remote sessions
403
+ ffmpeg -i "$TEMP_FILE" -ac 1 -ar 22050 -b:a 64k -y "$COMPRESSED_FILE" 2>/dev/null
404
+
405
+ if [[ -f "$COMPRESSED_FILE" ]]; then
406
+ rm -f "$TEMP_FILE"
407
+ TEMP_FILE="$COMPRESSED_FILE"
408
+ fi
409
+ fi
410
+
411
+ # @function add_silence_padding
412
+ # @intent Add silence to prevent WSL audio static
413
+ # @why WSL audio subsystem cuts off first ~200ms
414
+ # @param Uses global: $TEMP_FILE
415
+ # @returns Updates $TEMP_FILE to padded version
416
+ # @sideeffects Modifies audio file
417
+ # AI NOTE: Use ffmpeg if available, otherwise skip padding (degraded experience)
418
+ if command -v ffmpeg &> /dev/null; then
419
+ PADDED_FILE=$(mktemp "$AUDIO_DIR/tts-padded-XXXXXX.wav")
420
+ _CLEANUP_FILES+=("$PADDED_FILE")
421
+ # Add 200ms of silence at the beginning
422
+ ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo:d=0.2 -i "$TEMP_FILE" \
423
+ -filter_complex "[0:a][1:a]concat=n=2:v=0:a=1[out]" \
424
+ -map "[out]" -y "$PADDED_FILE" 2>/dev/null
425
+
426
+ if [[ -f "$PADDED_FILE" ]]; then
427
+ rm -f "$TEMP_FILE"
428
+ TEMP_FILE="$PADDED_FILE"
429
+ fi
430
+ fi
431
+
432
+ # @function apply_audio_effects
433
+ # @intent Apply sox effects and background music via audio-processor.sh
434
+ # @param Uses global: $TEMP_FILE
435
+ # @returns Updates $TEMP_FILE to processed version, sets $BACKGROUND_MUSIC if used
436
+ # @sideeffects Applies audio effects and background music
437
+ BACKGROUND_MUSIC=""
438
+ if [[ -f "$SCRIPT_DIR/audio-processor.sh" ]]; then
439
+ PROCESSED_FILE=$(mktemp "$AUDIO_DIR/tts-processed-XXXXXX.wav")
440
+ _CLEANUP_FILES+=("$PROCESSED_FILE")
441
+ # audio-processor.sh returns: FILE_PATH|BACKGROUND_FILE
442
+ PROCESSOR_OUTPUT=$("$SCRIPT_DIR/audio-processor.sh" "$TEMP_FILE" "default" "$PROCESSED_FILE" "$AGENT_PROFILE_FILE" 2>/dev/null) || {
443
+ echo "Warning: Audio processing failed, using unprocessed audio" >&2
444
+ PROCESSED_FILE="$TEMP_FILE"
445
+ PROCESSOR_OUTPUT="$TEMP_FILE|"
446
+ }
447
+
448
+ # Parse output: FILE|BACKGROUND
449
+ PROCESSED_FILE="${PROCESSOR_OUTPUT%%|*}"
450
+ BACKGROUND_MUSIC="${PROCESSOR_OUTPUT##*|}"
451
+
452
+ if [[ -f "$PROCESSED_FILE" ]] && [[ "$PROCESSED_FILE" != "$TEMP_FILE" ]]; then
453
+ rm -f "$TEMP_FILE"
454
+ TEMP_FILE="$PROCESSED_FILE"
455
+ fi
456
+ fi
457
+
458
+ # @function play_audio
459
+ # @intent Play generated audio using available player with sequential playback
460
+ # @why Support multiple audio players and prevent overlapping audio in learning mode
461
+ # @param Uses global: $TEMP_FILE, $CURRENT_LANGUAGE
462
+ # @sideeffects Plays audio with lock mechanism for sequential playback
463
+ _LOCK_DIR="${XDG_RUNTIME_DIR:-/tmp/agentvibes-$(id -u)}"
464
+ mkdir -p "$_LOCK_DIR"
465
+ chmod 700 "$_LOCK_DIR"
466
+ LOCK_FILE="$_LOCK_DIR/agentvibes-audio.lock"
467
+
468
+ # Auto-remove stale lock files (older than 30 seconds) to prevent permanent blocking
469
+ # This handles cases where the background cleanup process was killed mid-playback
470
+ if [ -f "$LOCK_FILE" ]; then
471
+ _lock_age=0
472
+ if [[ "$(uname)" == "Darwin" ]]; then
473
+ _lock_mtime=$(stat -f %m "$LOCK_FILE" 2>/dev/null || echo 0)
474
+ else
475
+ _lock_mtime=$(stat -c %Y "$LOCK_FILE" 2>/dev/null || echo 0)
476
+ fi
477
+ _now=$(date +%s)
478
+ _lock_age=$((_now - _lock_mtime))
479
+ if [[ $_lock_age -gt 30 ]]; then
480
+ rm -f "$LOCK_FILE"
481
+ fi
482
+ fi
483
+
484
+ # Wait for previous audio to finish (max 2 seconds to prevent blocking)
485
+ for i in {1..4}; do
486
+ if [ ! -f "$LOCK_FILE" ]; then
487
+ break
488
+ fi
489
+ sleep 0.5
490
+ done
491
+
492
+ # If still locked after 2 seconds, skip this TTS to prevent blocking Claude
493
+ if [ -f "$LOCK_FILE" ]; then
494
+ echo "โญ๏ธ Skipping TTS (previous audio still playing)" >&2
495
+ exit 0
496
+ fi
497
+
498
+ # Track last target language audio for replay command
499
+ if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
500
+ TARGET_AUDIO_FILE="${CLAUDE_PROJECT_DIR:-${HOME}}/.claude/last-target-audio.txt"
501
+ echo "$TEMP_FILE" > "$TARGET_AUDIO_FILE"
502
+ fi
503
+
504
+ # Create lock and play audio
505
+ touch "$LOCK_FILE"
506
+
507
+ # Create write lock file in audio directory to signal file is in-use (prevents race condition in cleanup)
508
+ _TEMP_DIR="${TEMP_FILE%/*}"
509
+ WRITE_LOCK_FILE="$_TEMP_DIR/$(basename "$TEMP_FILE" .wav).lock"
510
+ touch "$WRITE_LOCK_FILE"
511
+ _CLEANUP_FILES+=("$LOCK_FILE" "$WRITE_LOCK_FILE")
512
+
513
+ # Get audio duration for proper lock timing
514
+ DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null || true)
515
+ DURATION=${DURATION%.*} # Round to integer
516
+ # SECURITY: Validate duration is numeric (#134)
517
+ if ! [[ "${DURATION:-}" =~ ^[0-9]+$ ]]; then
518
+ DURATION=1
519
+ fi
520
+
521
+ # Play audio (skip if in test mode or no-playback mode)
522
+ # AGENTVIBES_NO_PLAYBACK: Set to "true" to generate audio without playing (for post-processing)
523
+ PLAYER_PID=""
524
+ if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && [[ "${AGENTVIBES_NO_PLAYBACK:-false}" != "true" ]]; then
525
+ # Detect platform and use appropriate audio player
526
+ if [[ "$(uname -s)" == "Darwin" ]]; then
527
+ # macOS: Use afplay (native macOS audio player)
528
+ afplay "$TEMP_FILE" >/dev/null 2>&1 &
529
+ PLAYER_PID=$!
530
+ elif [[ -n "${TERMUX_VERSION:-}" ]] || [[ -d "/data/data/com.termux" ]]; then
531
+ # Android/Termux: Use termux-media-player
532
+ termux-media-player play "$TEMP_FILE" >/dev/null 2>&1 &
533
+ PLAYER_PID=$!
534
+ else
535
+ # Linux/WSL: Prefer paplay (PulseAudio) for best WSL audio quality
536
+ (paplay "$TEMP_FILE" || mpv "$TEMP_FILE" || aplay "$TEMP_FILE") >/dev/null 2>&1 &
537
+ PLAYER_PID=$!
538
+ fi
539
+ fi
540
+
541
+ # Wait for audio to finish, then release locks (both global and write lock)
542
+ (sleep $DURATION; rm -f "$LOCK_FILE" "$WRITE_LOCK_FILE") &
543
+ disown
544
+
545
+ # Get audio cache path
546
+ AUDIO_DIR_PATH=$(get_audio_dir)
547
+
548
+ # Color codes (safe to use โ€” WAV path is passed via AGENTVIBES_WAV_OUTPATH, not parsed from stdout)
549
+ BLUE='\033[0;34m'
550
+ YELLOW='\033[1;33m'
551
+ PURPLE='\033[0;35m'
552
+ RED='\033[0;31m'
553
+ GREEN='\033[0;32m'
554
+ ORANGE='\033[0;33m'
555
+ WHITE='\033[1;37m'
556
+ CYAN='\033[0;36m'
557
+ GOLD='\033[38;5;226m'
558
+ NC='\033[0m'
559
+
560
+ # Check if banner is enabled (default: on)
561
+ _BANNER_ENABLED=true
562
+ if [[ -f "$HOME/.agentvibes/banner-disabled" ]]; then
563
+ _BANNER_ENABLED=false
564
+ elif [[ -f "${PROJECT_ROOT:-/nonexistent}/.agentvibes/banner-disabled" ]]; then
565
+ _BANNER_ENABLED=false
566
+ fi
567
+
568
+ # Run auto-cleanup off the critical path: only every 10th call, in background after playback starts.
569
+ # Counter file lives in the secure lock dir (user-specific, already created above).
570
+ AUTO_CLEAN_THRESHOLD=$(get_auto_clean_threshold)
571
+ _CALL_COUNTER_FILE="$_LOCK_DIR/agentvibes-tts-call-count"
572
+ _CALL_COUNT=$(cat "$_CALL_COUNTER_FILE" 2>/dev/null || echo "0")
573
+ # SECURITY: Validate counter is numeric before arithmetic
574
+ if ! [[ "$_CALL_COUNT" =~ ^[0-9]+$ ]]; then _CALL_COUNT=0; fi
575
+ _CALL_COUNT=$((_CALL_COUNT + 1))
576
+ echo "$_CALL_COUNT" > "$_CALL_COUNTER_FILE"
577
+
578
+ if (( _CALL_COUNT % 10 == 0 )); then
579
+ # Capture values needed inside the subshell before forking
580
+ _CLEANUP_AUDIO_DIR="$AUDIO_DIR_PATH"
581
+ _CLEANUP_THRESHOLD="$AUTO_CLEAN_THRESHOLD"
582
+ _CLEANUP_BANNER="$_BANNER_ENABLED"
583
+ # Source the utils inside the subshell (functions are not exported)
584
+ _CLEANUP_UTILS="$SCRIPT_DIR/audio-cache-utils.sh"
585
+ (
586
+ source "$_CLEANUP_UTILS" 2>/dev/null || exit 0
587
+ _INITIAL_SIZE=$(calculate_tts_size_bytes "$_CLEANUP_AUDIO_DIR")
588
+ if [[ $_INITIAL_SIZE -gt $((_CLEANUP_THRESHOLD * 1048576)) ]]; then
589
+ _DELETED=$(auto_clean_old_files "$_CLEANUP_AUDIO_DIR" "$_CLEANUP_THRESHOLD")
590
+ if [[ ${_DELETED:-0} -gt 0 ]] && [[ "$_CLEANUP_BANNER" == "true" ]]; then
591
+ echo -e "\033[0;33m๐Ÿงน Auto-cleaned $_DELETED old files\033[0m"
592
+ fi
593
+ fi
594
+ ) &
595
+ disown
596
+ fi
597
+
598
+ # Write output path for play-tts-enhanced.sh (avoids stdout parsing โ€” colors are safe)
599
+ if [[ -n "${AGENTVIBES_WAV_OUTPATH:-}" ]]; then
600
+ echo "$TEMP_FILE" > "$AGENTVIBES_WAV_OUTPATH"
601
+ fi
602
+
603
+ if [[ "$_BANNER_ENABLED" == "true" ]]; then
604
+ FILE_COUNT=$(count_tts_files "$AUDIO_DIR_PATH")
605
+ SIZE_BYTES=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
606
+ SIZE_HUMAN=$(bytes_to_human "$SIZE_BYTES")
607
+
608
+ # Dynamic color coding based on cache size
609
+ CACHE_COLOR=$GREEN
610
+ if [[ $SIZE_BYTES -gt 3221225472 ]]; then
611
+ CACHE_COLOR=$RED
612
+ elif [[ $SIZE_BYTES -gt 524288000 ]]; then
613
+ CACHE_COLOR=$YELLOW
614
+ fi
615
+
616
+ echo -e "${WHITE}๐Ÿ’พ Saved to:${NC} ${CYAN}$TEMP_FILE${NC} ${YELLOW}$FILE_COUNT${NC} ${WHITE}๐Ÿ—„๏ธ${NC} ${CACHE_COLOR}$SIZE_HUMAN${NC} ${WHITE}๐Ÿงน${NC}${GOLD}[${AUTO_CLEAN_THRESHOLD}mb]${NC}"
617
+
618
+ if [[ -n "$BACKGROUND_MUSIC" ]]; then
619
+ echo -e "${WHITE}๐ŸŽต Background music:${NC} ${PURPLE}$(basename "$BACKGROUND_MUSIC")${NC}"
620
+ fi
621
+ if [[ -n "${SPEAKER_ID:-}" ]] && [[ -n "${FILE_VOICE:-}" ]]; then
622
+ echo -e "${WHITE}๐ŸŽค Voice used:${NC} ${BLUE}$FILE_VOICE${NC} ${WHITE}(Piper TTS)${NC}"
623
+ else
624
+ echo -e "${WHITE}๐ŸŽค Voice used:${NC} ${BLUE}$VOICE_MODEL${NC} ${WHITE}(Piper TTS)${NC}"
625
+ fi
626
+
627
+ PERSONALITY=$(cat "${PROJECT_ROOT:-/nonexistent}/.claude/tts-personality.txt" 2>/dev/null || cat "$HOME/.claude/tts-personality.txt" 2>/dev/null || echo "")
628
+ if [[ -n "$PERSONALITY" ]] && [[ "$PERSONALITY" != "none" ]] && [[ "$PERSONALITY" != "normal" ]]; then
629
+ echo -e "${WHITE}๐Ÿ’ซ Personality:${NC} ${YELLOW}$PERSONALITY${NC}"
630
+ fi
631
+
632
+ echo -e "\033[38;5;240mSay: \"Turn off banner\" to hide this output\033[0m"
633
+ fi
634
+
635
+ # Check audio folder size and warn if getting large
636
+ if [[ "$_BANNER_ENABLED" == "true" ]] && [[ -d "$AUDIO_DIR_PATH" ]]; then
637
+ AUDIO_SIZE=$(du -sm "$AUDIO_DIR_PATH" 2>/dev/null | cut -f1)
638
+ if [[ -n "$AUDIO_SIZE" ]] && [[ "$AUDIO_SIZE" -gt 100 ]]; then
639
+ echo -e "\033[0;31mโš ๏ธ Audio cache is ${AUDIO_SIZE}MB - Run: /agent-vibes:cleanup\033[0m"
640
+ fi
641
+ fi
642
+
643
+ # Show status indicators
644
+ GLOBAL_MUTE_FILE="$HOME/.agentvibes-muted"
645
+ PROJECT_MUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-muted"
646
+ PROJECT_UNMUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-unmuted"
647
+ BACKGROUND_ENABLED_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/config/background-music-enabled.txt"
648
+ GLOBAL_BACKGROUND_ENABLED_FILE="$HOME/.claude/config/background-music-enabled.txt"
649
+
650
+ # Mute status indicator
651
+ if [[ -f "$PROJECT_UNMUTE_FILE" ]] && [[ -f "$GLOBAL_MUTE_FILE" ]]; then
652
+ echo "๐Ÿ”Š Status: Unmuted (project overrides global mute)"
653
+ elif [[ -f "$PROJECT_MUTE_FILE" ]]; then
654
+ echo "๐Ÿ”‡ Status: Muted (project)"
655
+ elif [[ -f "$GLOBAL_MUTE_FILE" ]]; then
656
+ echo "๐Ÿ”‡ Status: Would be muted (global) - but this project is speaking"
657
+ fi
658
+
659
+ # Background music status indicator
660
+ if [[ -z "$BACKGROUND_MUSIC" ]]; then
661
+ _bg_enabled=false
662
+ if [[ -f "$BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$BACKGROUND_ENABLED_FILE" 2>/dev/null; then
663
+ _bg_enabled=true
664
+ elif [[ -f "$GLOBAL_BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$GLOBAL_BACKGROUND_ENABLED_FILE" 2>/dev/null; then
665
+ _bg_enabled=true
666
+ fi
667
+ if [[ "$_bg_enabled" == "true" ]]; then
668
+ echo "๐ŸŽต Background music: Enabled but not playing (check config)"
669
+ else
670
+ echo "๐ŸŽต Background music: Disabled"
671
+ fi
672
+ fi
673
+
674
+ # Wait for audio player to finish before returning.
675
+ # This keeps the bmad-speak.sh speech lock held until playback is actually done,
676
+ # preventing party-mode agents from talking over each other.
677
+ if [[ -n "$PLAYER_PID" ]]; then
678
+ wait "$PLAYER_PID" 2>/dev/null || true
679
+ fi