agentvibes 4.6.3 → 4.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,679 +1,679 @@
1
- #!/usr/bin/env bash
2
- #
3
- # File: .claude/hooks/play-tts-piper.sh
4
- #
5
- # AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
6
- # Website: https://agentvibes.org
7
- # Repository: https://github.com/paulpreibisch/AgentVibes
8
- #
9
- # Co-created by Paul Preibisch with Claude AI
10
- # Copyright (c) 2025 Paul Preibisch
11
- #
12
- # Licensed under the Apache License, Version 2.0 (the "License");
13
- # you may not use this file except in compliance with the License.
14
- # You may obtain a copy of the License at
15
- #
16
- # http://www.apache.org/licenses/LICENSE-2.0
17
- #
18
- # Unless required by applicable law or agreed to in writing, software
19
- # distributed under the License is distributed on an "AS IS" BASIS,
20
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
- # See the License for the specific language governing permissions and
22
- # limitations under the License.
23
- #
24
- # DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
- # express or implied. Use at your own risk. See the Apache License for details.
26
- #
27
- # ---
28
- #
29
- # @fileoverview Piper TTS Provider Implementation - Free, offline neural TTS
30
- # @context Provides local, privacy-first TTS alternative to cloud services for WSL/Linux
31
- # @architecture Implements provider interface contract for Piper binary integration
32
- # @dependencies piper (pipx), piper-voice-manager.sh, mpv/aplay, ffmpeg (optional padding)
33
- # @entrypoints Called by play-tts.sh router when provider=piper
34
- # @patterns Provider contract: text/voice → audio file path, voice auto-download, language-aware synthesis
35
- # @related play-tts.sh, piper-voice-manager.sh, language-manager.sh, GitHub Issue #25
36
- #
37
-
38
- set -eo pipefail
39
- # Note: -u (nounset) omitted because sourced scripts (piper-voice-manager.sh,
40
- # language-manager.sh, audio-cache-utils.sh) use unset variables freely.
41
- # Variables in THIS script use ${VAR:-} defaults for safety.
42
-
43
- # Cleanup handler for temp files (preserves final output in $TEMP_FILE)
44
- _CLEANUP_FILES=()
45
- cleanup() {
46
- local f
47
- for f in "${_CLEANUP_FILES[@]+"${_CLEANUP_FILES[@]}"}"; do
48
- [[ "$f" == "${TEMP_FILE:-}" ]] && continue
49
- rm -f "$f"
50
- done
51
- }
52
- trap cleanup EXIT
53
-
54
- # Fix locale warnings
55
- export LC_ALL=C
56
-
57
- TEXT="${1:-}"
58
- VOICE_OVERRIDE="${2:-}" # Optional: voice model name
59
- AGENT_PROFILE_FILE="${3:-}" # Optional: path to per-agent profile JSON (from bmad-speak.sh)
60
-
61
- # Strip emojis, asterisks, and markdown formatting that Piper would speak literally
62
- TEXT=$(printf '%s' "$TEXT" | perl -CSD -pe '
63
- s/[\x{1F300}-\x{1F9FF}]//g; # emoticons, symbols, pictographs
64
- s/[\x{2600}-\x{27BF}]//g; # misc symbols, dingbats
65
- s/[\x{FE00}-\x{FE0F}]//g; # variation selectors
66
- s/[\x{200D}]//g; # zero-width joiner
67
- s/[\x{2500}-\x{257F}]//g; # box drawing (─━ etc)
68
- s/[\x{2580}-\x{259F}]//g; # block elements
69
- s/\*+//g; # asterisks (bold/italic markdown)
70
- s/#+\s*//g; # heading markers
71
- s/`//g; # backticks
72
- s/~+//g; # strikethrough
73
- s/^\s*[-]\s*//g; # list dashes
74
- ')
75
-
76
- # Source voice manager and language manager
77
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
78
- source "$SCRIPT_DIR/piper-voice-manager.sh"
79
- source "$SCRIPT_DIR/language-manager.sh"
80
- source "$SCRIPT_DIR/audio-cache-utils.sh"
81
-
82
- # Default voice for Piper
83
- DEFAULT_VOICE="en_US-lessac-medium"
84
-
85
- # @function determine_voice_model
86
- # @intent Resolve voice name to Piper model name with language support
87
- # @why Support voice override, language-specific voices, and default fallback
88
- # @param Uses global: $VOICE_OVERRIDE
89
- # @returns Sets $VOICE_MODEL global variable
90
- # @sideeffects None
91
- VOICE_MODEL=""
92
-
93
- # Get current language setting
94
- CURRENT_LANGUAGE=$(get_language_code)
95
-
96
- if [[ -n "$VOICE_OVERRIDE" ]]; then
97
- # Use override if provided
98
- # Handle multi-speaker format: "Model::SpeakerName" → split into model + speaker lookup
99
- if [[ "$VOICE_OVERRIDE" == *"::"* ]]; then
100
- VOICE_MODEL="${VOICE_OVERRIDE%%::*}"
101
- _SPEAKER_NAME="${VOICE_OVERRIDE#*::}"
102
- # Look up speaker ID from the model's .onnx.json speaker_id_map
103
- voice_dir=$(get_voice_storage_dir)
104
- _JSON_FILE="$voice_dir/${VOICE_MODEL}.onnx.json"
105
- if [[ -f "$_JSON_FILE" ]]; then
106
- # SECURITY: Pass values via env vars to prevent shell injection
107
- SPEAKER_ID=$(_JSON="$_JSON_FILE" _SPKR="$_SPEAKER_NAME" node -e "
108
- try {
109
- const j = JSON.parse(require('fs').readFileSync(process.env._JSON,'utf8'));
110
- const map = j.speaker_id_map || {};
111
- const id = map[process.env._SPKR];
112
- if (id !== undefined) process.stdout.write(String(id));
113
- } catch {}
114
- " 2>/dev/null || true)
115
- fi
116
- echo "🎭 Using multi-speaker voice: $VOICE_OVERRIDE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-?})"
117
- else
118
- VOICE_MODEL="$VOICE_OVERRIDE"
119
- echo "🎤 Using voice: $VOICE_OVERRIDE (session-specific)"
120
- fi
121
- else
122
- # Try to get voice from voice file (check CLAUDE_PROJECT_DIR first for MCP context)
123
- VOICE_FILE=""
124
-
125
- # Priority order:
126
- # 1. CLAUDE_PROJECT_DIR env var (set by MCP for project-specific settings)
127
- # 2. Script location (for direct slash command usage)
128
- # 3. Global ~/.claude (fallback)
129
-
130
- # SECURITY: Canonicalize path to prevent traversal (#128)
131
- if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
132
- CLAUDE_PROJECT_DIR=$(cd "${CLAUDE_PROJECT_DIR}" 2>/dev/null && pwd -P) || CLAUDE_PROJECT_DIR=""
133
- fi
134
- if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]] && [[ -f "$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt" ]]; then
135
- # MCP context: Use the project directory where MCP was invoked
136
- VOICE_FILE="$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt"
137
- elif [[ -f "$SCRIPT_DIR/../tts-voice.txt" ]]; then
138
- # Direct usage: Use script location
139
- VOICE_FILE="$SCRIPT_DIR/../tts-voice.txt"
140
- elif [[ -f "$HOME/.claude/tts-voice.txt" ]]; then
141
- # Fallback: Use global
142
- VOICE_FILE="$HOME/.claude/tts-voice.txt"
143
- fi
144
-
145
- if [[ -n "$VOICE_FILE" ]]; then
146
- FILE_VOICE=$(cat "$VOICE_FILE" 2>/dev/null)
147
-
148
- # Check for multi-speaker voice (model + speaker ID stored separately)
149
- # Use same directory as VOICE_FILE for consistency
150
- VOICE_DIR=$(dirname "$VOICE_FILE")
151
- MODEL_FILE="$VOICE_DIR/tts-piper-model.txt"
152
- SPEAKER_ID_FILE="$VOICE_DIR/tts-piper-speaker-id.txt"
153
-
154
- if [[ -f "$MODEL_FILE" ]] && [[ -f "$SPEAKER_ID_FILE" ]]; then
155
- # Multi-speaker voice config found locally
156
- VOICE_MODEL=$(cat "$MODEL_FILE" 2>/dev/null)
157
- SPEAKER_ID=$(cat "$SPEAKER_ID_FILE" 2>/dev/null)
158
- # Validate speaker ID is numeric
159
- if [[ -n "$SPEAKER_ID" ]] && ! [[ "$SPEAKER_ID" =~ ^[0-9]+$ ]]; then
160
- echo "Warning: Invalid speaker ID '$SPEAKER_ID', ignoring" >&2
161
- SPEAKER_ID=""
162
- fi
163
- echo "🎭 Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-none})"
164
- # Check if voice uses Model::SpeakerName format (from AgentVibes config)
165
- elif [[ -n "$FILE_VOICE" ]] && [[ "$FILE_VOICE" == *"::"* ]]; then
166
- VOICE_MODEL="${FILE_VOICE%%::*}"
167
- _SPEAKER_NAME="${FILE_VOICE#*::}"
168
- voice_dir=$(get_voice_storage_dir)
169
- _JSON_FILE="$voice_dir/${VOICE_MODEL}.onnx.json"
170
- if [[ -f "$_JSON_FILE" ]]; then
171
- # SECURITY: Pass values via env vars to prevent shell injection
172
- SPEAKER_ID=$(_JSON="$_JSON_FILE" _SPKR="$_SPEAKER_NAME" node -e "
173
- try {
174
- const j = JSON.parse(require('fs').readFileSync(process.env._JSON,'utf8'));
175
- const map = j.speaker_id_map || {};
176
- const id = map[process.env._SPKR];
177
- if (id !== undefined) process.stdout.write(String(id));
178
- } catch {}
179
- " 2>/dev/null || true)
180
- fi
181
- echo "🎭 Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-?})"
182
- # Standard Piper model name or custom voice (just use as-is)
183
- elif [[ -n "$FILE_VOICE" ]]; then
184
- # Strip multi-speaker suffix if present (model::SpeakerName-Label)
185
- if [[ "$FILE_VOICE" == *"::"* ]]; then
186
- VOICE_MODEL="${FILE_VOICE%%::*}"
187
- else
188
- VOICE_MODEL="$FILE_VOICE"
189
- fi
190
- fi
191
- fi
192
-
193
- # If no Piper voice from file, try language-specific voice
194
- if [[ -z "$VOICE_MODEL" ]]; then
195
- LANG_VOICE=$(get_voice_for_language "$CURRENT_LANGUAGE" "piper" 2>/dev/null)
196
-
197
- if [[ -n "$LANG_VOICE" ]]; then
198
- VOICE_MODEL="$LANG_VOICE"
199
- echo "🌍 Using $CURRENT_LANGUAGE voice: $LANG_VOICE (Piper)"
200
- else
201
- # Use default voice
202
- VOICE_MODEL="$DEFAULT_VOICE"
203
- fi
204
- fi
205
- fi
206
-
207
- # @function validate_inputs
208
- # @intent Check required parameters
209
- # @why Fail fast with clear errors if inputs missing
210
- # @exitcode 1=missing text, 2=missing piper binary
211
- if [[ -z "$TEXT" ]]; then
212
- echo "Usage: $0 \"text to speak\" [voice_model_name]"
213
- exit 1
214
- fi
215
-
216
- # Check if Piper is installed
217
- if ! command -v piper &> /dev/null; then
218
- echo "❌ Error: Piper TTS not installed"
219
- echo "Install with: pipx install piper-tts"
220
- echo "Or run: .claude/hooks/piper-installer.sh"
221
- exit 2
222
- fi
223
-
224
- # @function ensure_voice_downloaded
225
- # @intent Download voice model if not cached
226
- # @why Provide seamless experience with automatic downloads
227
- # @param Uses global: $VOICE_MODEL
228
- # @sideeffects Downloads voice model files
229
- # @edgecases Prompts user for consent before downloading, skipped in test mode
230
- if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && ! verify_voice "$VOICE_MODEL"; then
231
- echo "📥 Voice model not found: $VOICE_MODEL"
232
- echo " File size: ~25MB"
233
- echo " Preview: https://huggingface.co/rhasspy/piper-voices"
234
- echo ""
235
- read -p " Download this voice model? [y/N]: " -n 1 -r
236
- echo
237
-
238
- if [[ $REPLY =~ ^[Yy]$ ]]; then
239
- if ! download_voice "$VOICE_MODEL"; then
240
- echo "❌ Failed to download voice model"
241
- echo "Fix: Download manually or choose different voice"
242
- exit 3
243
- fi
244
- else
245
- echo "❌ Voice download cancelled"
246
- exit 3
247
- fi
248
- fi
249
-
250
- # Get voice model path
251
- # In test mode, use a fake path since we have mock piper that doesn't need real files
252
- if [[ "${AGENTVIBES_TEST_MODE:-false}" == "true" ]]; then
253
- VOICE_PATH="/tmp/mock-voice-${VOICE_MODEL}.onnx"
254
- else
255
- VOICE_PATH=$(get_voice_path "$VOICE_MODEL")
256
- if [[ $? -ne 0 ]]; then
257
- echo "❌ Voice model path not found: $VOICE_MODEL"
258
- exit 3
259
- fi
260
- fi
261
-
262
- # @function determine_audio_directory
263
- # @intent Find appropriate directory for audio file storage
264
- # @why Supports project-local and global storage
265
- # @returns Sets $AUDIO_DIR global variable
266
- if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
267
- AUDIO_DIR="$CLAUDE_PROJECT_DIR/.claude/audio"
268
- else
269
- # Fallback: try to find .claude directory in current path
270
- CURRENT_DIR="$PWD"
271
- while [[ "$CURRENT_DIR" != "/" ]]; do
272
- if [[ -d "$CURRENT_DIR/.claude" ]]; then
273
- AUDIO_DIR="$CURRENT_DIR/.claude/audio"
274
- break
275
- fi
276
- CURRENT_DIR=$(dirname "$CURRENT_DIR")
277
- done
278
- # Final fallback to global if no project .claude found
279
- if [[ -z "${AUDIO_DIR:-}" ]]; then
280
- AUDIO_DIR="$HOME/.claude/audio"
281
- fi
282
- fi
283
-
284
- mkdir -p "$AUDIO_DIR"
285
- TEMP_FILE=$(mktemp "$AUDIO_DIR/tts-XXXXXX.wav")
286
-
287
- # @function get_speech_rate
288
- # @intent Determine speech rate for Piper synthesis
289
- # @why Convert user-facing speed (0.5=slower, 2.0=faster) to Piper length-scale (inverted)
290
- # @returns Piper length-scale value (inverted from user scale)
291
- # @note Piper uses length-scale where higher=slower, opposite of user expectation
292
- get_speech_rate() {
293
- local target_config=""
294
- local main_config=""
295
-
296
- # Check for target-specific config first (new and legacy paths)
297
- if [[ -f "$SCRIPT_DIR/../config/tts-target-speech-rate.txt" ]]; then
298
- target_config="$SCRIPT_DIR/../config/tts-target-speech-rate.txt"
299
- elif [[ -f "$HOME/.claude/config/tts-target-speech-rate.txt" ]]; then
300
- target_config="$HOME/.claude/config/tts-target-speech-rate.txt"
301
- elif [[ -f "$SCRIPT_DIR/../config/piper-target-speech-rate.txt" ]]; then
302
- target_config="$SCRIPT_DIR/../config/piper-target-speech-rate.txt"
303
- elif [[ -f "$HOME/.claude/config/piper-target-speech-rate.txt" ]]; then
304
- target_config="$HOME/.claude/config/piper-target-speech-rate.txt"
305
- fi
306
-
307
- # Check for main config (new and legacy paths)
308
- if [[ -f "$SCRIPT_DIR/../config/tts-speech-rate.txt" ]]; then
309
- main_config="$SCRIPT_DIR/../config/tts-speech-rate.txt"
310
- elif [[ -f "$HOME/.claude/config/tts-speech-rate.txt" ]]; then
311
- main_config="$HOME/.claude/config/tts-speech-rate.txt"
312
- elif [[ -f "$SCRIPT_DIR/../config/piper-speech-rate.txt" ]]; then
313
- main_config="$SCRIPT_DIR/../config/piper-speech-rate.txt"
314
- elif [[ -f "$HOME/.claude/config/piper-speech-rate.txt" ]]; then
315
- main_config="$HOME/.claude/config/piper-speech-rate.txt"
316
- fi
317
-
318
- # If this is a non-English voice and target config exists, use it
319
- if [[ "$CURRENT_LANGUAGE" != "english" ]] && [[ -n "$target_config" ]]; then
320
- local user_speed=$(cat "$target_config" 2>/dev/null)
321
- # Validate speed is a positive number
322
- if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
323
- echo "1.0"
324
- return
325
- fi
326
- # Convert user speed to Piper length-scale (invert)
327
- # User: 0.5=slower, 1.0=normal, 2.0=faster
328
- # Piper: 2.0=slower, 1.0=normal, 0.5=faster
329
- # Formula: piper_length_scale = 1.0 / user_speed
330
- echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
331
- return
332
- fi
333
-
334
- # Otherwise use main config if available
335
- if [[ -n "$main_config" ]]; then
336
- local user_speed=$(grep -v '^#' "$main_config" 2>/dev/null | grep -v '^$' | tail -1)
337
- # Validate speed is a positive number
338
- if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
339
- echo "1.0"
340
- return
341
- fi
342
- echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
343
- return
344
- fi
345
-
346
- # Default: 1.0 (normal) for English, 2.0 (slower) for learning
347
- if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
348
- echo "2.0"
349
- else
350
- echo "1.0"
351
- fi
352
- }
353
-
354
- SPEECH_RATE=$(get_speech_rate)
355
-
356
- # @function synthesize_with_piper
357
- # @intent Generate speech using Piper TTS
358
- # @why Provides free, offline TTS alternative
359
- # @param Uses globals: $TEXT, $VOICE_PATH, $SPEECH_RATE, $SPEAKER_ID (optional)
360
- # @returns Creates WAV file at $TEMP_FILE
361
- # @exitcode 0=success, 4=synthesis error
362
- # @sideeffects Creates audio file
363
- # @edgecases Handles piper errors, invalid models, multi-speaker voices
364
- if [[ -n "${SPEAKER_ID:-}" ]]; then
365
- # Multi-speaker voice: Pass speaker ID
366
- # SECURITY: Use printf instead of echo for pipe safety (#134)
367
- printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --speaker "$SPEAKER_ID" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
368
- else
369
- # Single-speaker voice
370
- printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
371
- fi
372
-
373
- if [[ ! -f "$TEMP_FILE" ]] || [[ ! -s "$TEMP_FILE" ]]; then
374
- echo "❌ Failed to synthesize speech with Piper"
375
- echo "Voice model: $VOICE_MODEL"
376
- echo "Check that voice model is valid"
377
- exit 4
378
- fi
379
-
380
- # @function detect_remote_session
381
- # @intent Auto-detect SSH/RDP sessions and enable audio compression
382
- # @why Remote desktop audio is choppy without compression
383
- # @returns Sets AGENTVIBES_RDP_MODE environment variable
384
- # @detection Checks SSH_CLIENT, SSH_TTY, and DISPLAY variables
385
- if [[ -z "${AGENTVIBES_RDP_MODE:-}" ]]; then
386
- # Auto-detect remote session
387
- if [[ -n "${SSH_CLIENT:-}" ]] || [[ -n "${SSH_TTY:-}" ]] || [[ "${DISPLAY:-}" =~ ^localhost:.* ]]; then
388
- export AGENTVIBES_RDP_MODE=true
389
- echo "🌐 Remote session detected - enabling audio compression"
390
- fi
391
- fi
392
-
393
- # @function compress_for_remote
394
- # @intent Compress TTS audio for remote sessions (SSH/RDP)
395
- # @why Reduces bandwidth and prevents choppy playback
396
- # @param Uses global: $TEMP_FILE, $AGENTVIBES_RDP_MODE
397
- # @returns Updates $TEMP_FILE to compressed version
398
- # @sideeffects Converts to mono 22kHz for lower bandwidth
399
- if [[ "${AGENTVIBES_RDP_MODE:-false}" == "true" ]] && command -v ffmpeg &> /dev/null; then
400
- COMPRESSED_FILE=$(mktemp "$AUDIO_DIR/tts-compressed-XXXXXX.wav")
401
- _CLEANUP_FILES+=("$COMPRESSED_FILE")
402
- # Convert to mono, 22kHz, 64kbps for remote sessions
403
- ffmpeg -i "$TEMP_FILE" -ac 1 -ar 22050 -b:a 64k -y "$COMPRESSED_FILE" 2>/dev/null
404
-
405
- if [[ -f "$COMPRESSED_FILE" ]]; then
406
- rm -f "$TEMP_FILE"
407
- TEMP_FILE="$COMPRESSED_FILE"
408
- fi
409
- fi
410
-
411
- # @function add_silence_padding
412
- # @intent Add silence to prevent WSL audio static
413
- # @why WSL audio subsystem cuts off first ~200ms
414
- # @param Uses global: $TEMP_FILE
415
- # @returns Updates $TEMP_FILE to padded version
416
- # @sideeffects Modifies audio file
417
- # AI NOTE: Use ffmpeg if available, otherwise skip padding (degraded experience)
418
- if command -v ffmpeg &> /dev/null; then
419
- PADDED_FILE=$(mktemp "$AUDIO_DIR/tts-padded-XXXXXX.wav")
420
- _CLEANUP_FILES+=("$PADDED_FILE")
421
- # Add 200ms of silence at the beginning
422
- ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo:d=0.2 -i "$TEMP_FILE" \
423
- -filter_complex "[0:a][1:a]concat=n=2:v=0:a=1[out]" \
424
- -map "[out]" -y "$PADDED_FILE" 2>/dev/null
425
-
426
- if [[ -f "$PADDED_FILE" ]]; then
427
- rm -f "$TEMP_FILE"
428
- TEMP_FILE="$PADDED_FILE"
429
- fi
430
- fi
431
-
432
- # @function apply_audio_effects
433
- # @intent Apply sox effects and background music via audio-processor.sh
434
- # @param Uses global: $TEMP_FILE
435
- # @returns Updates $TEMP_FILE to processed version, sets $BACKGROUND_MUSIC if used
436
- # @sideeffects Applies audio effects and background music
437
- BACKGROUND_MUSIC=""
438
- if [[ -f "$SCRIPT_DIR/audio-processor.sh" ]]; then
439
- PROCESSED_FILE=$(mktemp "$AUDIO_DIR/tts-processed-XXXXXX.wav")
440
- _CLEANUP_FILES+=("$PROCESSED_FILE")
441
- # audio-processor.sh returns: FILE_PATH|BACKGROUND_FILE
442
- PROCESSOR_OUTPUT=$("$SCRIPT_DIR/audio-processor.sh" "$TEMP_FILE" "default" "$PROCESSED_FILE" "$AGENT_PROFILE_FILE" 2>/dev/null) || {
443
- echo "Warning: Audio processing failed, using unprocessed audio" >&2
444
- PROCESSED_FILE="$TEMP_FILE"
445
- PROCESSOR_OUTPUT="$TEMP_FILE|"
446
- }
447
-
448
- # Parse output: FILE|BACKGROUND
449
- PROCESSED_FILE="${PROCESSOR_OUTPUT%%|*}"
450
- BACKGROUND_MUSIC="${PROCESSOR_OUTPUT##*|}"
451
-
452
- if [[ -f "$PROCESSED_FILE" ]] && [[ "$PROCESSED_FILE" != "$TEMP_FILE" ]]; then
453
- rm -f "$TEMP_FILE"
454
- TEMP_FILE="$PROCESSED_FILE"
455
- fi
456
- fi
457
-
458
- # @function play_audio
459
- # @intent Play generated audio using available player with sequential playback
460
- # @why Support multiple audio players and prevent overlapping audio in learning mode
461
- # @param Uses global: $TEMP_FILE, $CURRENT_LANGUAGE
462
- # @sideeffects Plays audio with lock mechanism for sequential playback
463
- _LOCK_DIR="${XDG_RUNTIME_DIR:-/tmp/agentvibes-$(id -u)}"
464
- mkdir -p "$_LOCK_DIR"
465
- chmod 700 "$_LOCK_DIR"
466
- LOCK_FILE="$_LOCK_DIR/agentvibes-audio.lock"
467
-
468
- # Auto-remove stale lock files (older than 30 seconds) to prevent permanent blocking
469
- # This handles cases where the background cleanup process was killed mid-playback
470
- if [ -f "$LOCK_FILE" ]; then
471
- _lock_age=0
472
- if [[ "$(uname)" == "Darwin" ]]; then
473
- _lock_mtime=$(stat -f %m "$LOCK_FILE" 2>/dev/null || echo 0)
474
- else
475
- _lock_mtime=$(stat -c %Y "$LOCK_FILE" 2>/dev/null || echo 0)
476
- fi
477
- _now=$(date +%s)
478
- _lock_age=$((_now - _lock_mtime))
479
- if [[ $_lock_age -gt 30 ]]; then
480
- rm -f "$LOCK_FILE"
481
- fi
482
- fi
483
-
484
- # Wait for previous audio to finish (max 2 seconds to prevent blocking)
485
- for i in {1..4}; do
486
- if [ ! -f "$LOCK_FILE" ]; then
487
- break
488
- fi
489
- sleep 0.5
490
- done
491
-
492
- # If still locked after 2 seconds, skip this TTS to prevent blocking Claude
493
- if [ -f "$LOCK_FILE" ]; then
494
- echo "⏭️ Skipping TTS (previous audio still playing)" >&2
495
- exit 0
496
- fi
497
-
498
- # Track last target language audio for replay command
499
- if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
500
- TARGET_AUDIO_FILE="${CLAUDE_PROJECT_DIR:-${HOME}}/.claude/last-target-audio.txt"
501
- echo "$TEMP_FILE" > "$TARGET_AUDIO_FILE"
502
- fi
503
-
504
- # Create lock and play audio
505
- touch "$LOCK_FILE"
506
-
507
- # Create write lock file in audio directory to signal file is in-use (prevents race condition in cleanup)
508
- _TEMP_DIR="${TEMP_FILE%/*}"
509
- WRITE_LOCK_FILE="$_TEMP_DIR/$(basename "$TEMP_FILE" .wav).lock"
510
- touch "$WRITE_LOCK_FILE"
511
- _CLEANUP_FILES+=("$LOCK_FILE" "$WRITE_LOCK_FILE")
512
-
513
- # Get audio duration for proper lock timing
514
- DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null || true)
515
- DURATION=${DURATION%.*} # Round to integer
516
- # SECURITY: Validate duration is numeric (#134)
517
- if ! [[ "${DURATION:-}" =~ ^[0-9]+$ ]]; then
518
- DURATION=1
519
- fi
520
-
521
- # Play audio (skip if in test mode or no-playback mode)
522
- # AGENTVIBES_NO_PLAYBACK: Set to "true" to generate audio without playing (for post-processing)
523
- PLAYER_PID=""
524
- if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && [[ "${AGENTVIBES_NO_PLAYBACK:-false}" != "true" ]]; then
525
- # Detect platform and use appropriate audio player
526
- if [[ "$(uname -s)" == "Darwin" ]]; then
527
- # macOS: Use afplay (native macOS audio player)
528
- afplay "$TEMP_FILE" >/dev/null 2>&1 &
529
- PLAYER_PID=$!
530
- elif [[ -n "${TERMUX_VERSION:-}" ]] || [[ -d "/data/data/com.termux" ]]; then
531
- # Android/Termux: Use termux-media-player
532
- termux-media-player play "$TEMP_FILE" >/dev/null 2>&1 &
533
- PLAYER_PID=$!
534
- else
535
- # Linux/WSL: Prefer paplay (PulseAudio) for best WSL audio quality
536
- (paplay "$TEMP_FILE" || mpv "$TEMP_FILE" || aplay "$TEMP_FILE") >/dev/null 2>&1 &
537
- PLAYER_PID=$!
538
- fi
539
- fi
540
-
541
- # Wait for audio to finish, then release locks (both global and write lock)
542
- (sleep $DURATION; rm -f "$LOCK_FILE" "$WRITE_LOCK_FILE") &
543
- disown
544
-
545
- # Get audio cache path
546
- AUDIO_DIR_PATH=$(get_audio_dir)
547
-
548
- # Color codes (safe to use — WAV path is passed via AGENTVIBES_WAV_OUTPATH, not parsed from stdout)
549
- BLUE='\033[0;34m'
550
- YELLOW='\033[1;33m'
551
- PURPLE='\033[0;35m'
552
- RED='\033[0;31m'
553
- GREEN='\033[0;32m'
554
- ORANGE='\033[0;33m'
555
- WHITE='\033[1;37m'
556
- CYAN='\033[0;36m'
557
- GOLD='\033[38;5;226m'
558
- NC='\033[0m'
559
-
560
- # Check if banner is enabled (default: on)
561
- _BANNER_ENABLED=true
562
- if [[ -f "$HOME/.agentvibes/banner-disabled" ]]; then
563
- _BANNER_ENABLED=false
564
- elif [[ -f "${PROJECT_ROOT:-/nonexistent}/.agentvibes/banner-disabled" ]]; then
565
- _BANNER_ENABLED=false
566
- fi
567
-
568
- # Run auto-cleanup off the critical path: only every 10th call, in background after playback starts.
569
- # Counter file lives in the secure lock dir (user-specific, already created above).
570
- AUTO_CLEAN_THRESHOLD=$(get_auto_clean_threshold)
571
- _CALL_COUNTER_FILE="$_LOCK_DIR/agentvibes-tts-call-count"
572
- _CALL_COUNT=$(cat "$_CALL_COUNTER_FILE" 2>/dev/null || echo "0")
573
- # SECURITY: Validate counter is numeric before arithmetic
574
- if ! [[ "$_CALL_COUNT" =~ ^[0-9]+$ ]]; then _CALL_COUNT=0; fi
575
- _CALL_COUNT=$((_CALL_COUNT + 1))
576
- echo "$_CALL_COUNT" > "$_CALL_COUNTER_FILE"
577
-
578
- if (( _CALL_COUNT % 10 == 0 )); then
579
- # Capture values needed inside the subshell before forking
580
- _CLEANUP_AUDIO_DIR="$AUDIO_DIR_PATH"
581
- _CLEANUP_THRESHOLD="$AUTO_CLEAN_THRESHOLD"
582
- _CLEANUP_BANNER="$_BANNER_ENABLED"
583
- # Source the utils inside the subshell (functions are not exported)
584
- _CLEANUP_UTILS="$SCRIPT_DIR/audio-cache-utils.sh"
585
- (
586
- source "$_CLEANUP_UTILS" 2>/dev/null || exit 0
587
- _INITIAL_SIZE=$(calculate_tts_size_bytes "$_CLEANUP_AUDIO_DIR")
588
- if [[ $_INITIAL_SIZE -gt $((_CLEANUP_THRESHOLD * 1048576)) ]]; then
589
- _DELETED=$(auto_clean_old_files "$_CLEANUP_AUDIO_DIR" "$_CLEANUP_THRESHOLD")
590
- if [[ ${_DELETED:-0} -gt 0 ]] && [[ "$_CLEANUP_BANNER" == "true" ]]; then
591
- echo -e "\033[0;33m🧹 Auto-cleaned $_DELETED old files\033[0m"
592
- fi
593
- fi
594
- ) &
595
- disown
596
- fi
597
-
598
- # Write output path for play-tts-enhanced.sh (avoids stdout parsing — colors are safe)
599
- if [[ -n "${AGENTVIBES_WAV_OUTPATH:-}" ]]; then
600
- echo "$TEMP_FILE" > "$AGENTVIBES_WAV_OUTPATH"
601
- fi
602
-
603
- if [[ "$_BANNER_ENABLED" == "true" ]]; then
604
- FILE_COUNT=$(count_tts_files "$AUDIO_DIR_PATH")
605
- SIZE_BYTES=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
606
- SIZE_HUMAN=$(bytes_to_human "$SIZE_BYTES")
607
-
608
- # Dynamic color coding based on cache size
609
- CACHE_COLOR=$GREEN
610
- if [[ $SIZE_BYTES -gt 3221225472 ]]; then
611
- CACHE_COLOR=$RED
612
- elif [[ $SIZE_BYTES -gt 524288000 ]]; then
613
- CACHE_COLOR=$YELLOW
614
- fi
615
-
616
- echo -e "${WHITE}💾 Saved to:${NC} ${CYAN}$TEMP_FILE${NC} ${YELLOW}$FILE_COUNT${NC} ${WHITE}🗄️${NC} ${CACHE_COLOR}$SIZE_HUMAN${NC} ${WHITE}🧹${NC}${GOLD}[${AUTO_CLEAN_THRESHOLD}mb]${NC}"
617
-
618
- if [[ -n "$BACKGROUND_MUSIC" ]]; then
619
- echo -e "${WHITE}🎵 Background music:${NC} ${PURPLE}$(basename "$BACKGROUND_MUSIC")${NC}"
620
- fi
621
- if [[ -n "${SPEAKER_ID:-}" ]] && [[ -n "${FILE_VOICE:-}" ]]; then
622
- echo -e "${WHITE}🎤 Voice used:${NC} ${BLUE}$FILE_VOICE${NC} ${WHITE}(Piper TTS)${NC}"
623
- else
624
- echo -e "${WHITE}🎤 Voice used:${NC} ${BLUE}$VOICE_MODEL${NC} ${WHITE}(Piper TTS)${NC}"
625
- fi
626
-
627
- PERSONALITY=$(cat "${PROJECT_ROOT:-/nonexistent}/.claude/tts-personality.txt" 2>/dev/null || cat "$HOME/.claude/tts-personality.txt" 2>/dev/null || echo "")
628
- if [[ -n "$PERSONALITY" ]] && [[ "$PERSONALITY" != "none" ]] && [[ "$PERSONALITY" != "normal" ]]; then
629
- echo -e "${WHITE}💫 Personality:${NC} ${YELLOW}$PERSONALITY${NC}"
630
- fi
631
-
632
- echo -e "\033[38;5;240mSay: \"Turn off banner\" to hide this output\033[0m"
633
- fi
634
-
635
- # Check audio folder size and warn if getting large
636
- if [[ "$_BANNER_ENABLED" == "true" ]] && [[ -d "$AUDIO_DIR_PATH" ]]; then
637
- AUDIO_SIZE=$(du -sm "$AUDIO_DIR_PATH" 2>/dev/null | cut -f1)
638
- if [[ -n "$AUDIO_SIZE" ]] && [[ "$AUDIO_SIZE" -gt 100 ]]; then
639
- echo -e "\033[0;31m⚠️ Audio cache is ${AUDIO_SIZE}MB - Run: /agent-vibes:cleanup\033[0m"
640
- fi
641
- fi
642
-
643
- # Show status indicators
644
- GLOBAL_MUTE_FILE="$HOME/.agentvibes-muted"
645
- PROJECT_MUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-muted"
646
- PROJECT_UNMUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-unmuted"
647
- BACKGROUND_ENABLED_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/config/background-music-enabled.txt"
648
- GLOBAL_BACKGROUND_ENABLED_FILE="$HOME/.claude/config/background-music-enabled.txt"
649
-
650
- # Mute status indicator
651
- if [[ -f "$PROJECT_UNMUTE_FILE" ]] && [[ -f "$GLOBAL_MUTE_FILE" ]]; then
652
- echo "🔊 Status: Unmuted (project overrides global mute)"
653
- elif [[ -f "$PROJECT_MUTE_FILE" ]]; then
654
- echo "🔇 Status: Muted (project)"
655
- elif [[ -f "$GLOBAL_MUTE_FILE" ]]; then
656
- echo "🔇 Status: Would be muted (global) - but this project is speaking"
657
- fi
658
-
659
- # Background music status indicator
660
- if [[ -z "$BACKGROUND_MUSIC" ]]; then
661
- _bg_enabled=false
662
- if [[ -f "$BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$BACKGROUND_ENABLED_FILE" 2>/dev/null; then
663
- _bg_enabled=true
664
- elif [[ -f "$GLOBAL_BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$GLOBAL_BACKGROUND_ENABLED_FILE" 2>/dev/null; then
665
- _bg_enabled=true
666
- fi
667
- if [[ "$_bg_enabled" == "true" ]]; then
668
- echo "🎵 Background music: Enabled but not playing (check config)"
669
- else
670
- echo "🎵 Background music: Disabled"
671
- fi
672
- fi
673
-
674
- # Wait for audio player to finish before returning.
675
- # This keeps the bmad-speak.sh speech lock held until playback is actually done,
676
- # preventing party-mode agents from talking over each other.
677
- if [[ -n "$PLAYER_PID" ]]; then
678
- wait "$PLAYER_PID" 2>/dev/null || true
679
- fi
1
+ #!/usr/bin/env bash
2
+ #
3
+ # File: .claude/hooks/play-tts-piper.sh
4
+ #
5
+ # AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
6
+ # Website: https://agentvibes.org
7
+ # Repository: https://github.com/paulpreibisch/AgentVibes
8
+ #
9
+ # Co-created by Paul Preibisch with Claude AI
10
+ # Copyright (c) 2025 Paul Preibisch
11
+ #
12
+ # Licensed under the Apache License, Version 2.0 (the "License");
13
+ # you may not use this file except in compliance with the License.
14
+ # You may obtain a copy of the License at
15
+ #
16
+ # http://www.apache.org/licenses/LICENSE-2.0
17
+ #
18
+ # Unless required by applicable law or agreed to in writing, software
19
+ # distributed under the License is distributed on an "AS IS" BASIS,
20
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
+ # See the License for the specific language governing permissions and
22
+ # limitations under the License.
23
+ #
24
+ # DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
25
+ # express or implied. Use at your own risk. See the Apache License for details.
26
+ #
27
+ # ---
28
+ #
29
+ # @fileoverview Piper TTS Provider Implementation - Free, offline neural TTS
30
+ # @context Provides local, privacy-first TTS alternative to cloud services for WSL/Linux
31
+ # @architecture Implements provider interface contract for Piper binary integration
32
+ # @dependencies piper (pipx), piper-voice-manager.sh, mpv/aplay, ffmpeg (optional padding)
33
+ # @entrypoints Called by play-tts.sh router when provider=piper
34
+ # @patterns Provider contract: text/voice → audio file path, voice auto-download, language-aware synthesis
35
+ # @related play-tts.sh, piper-voice-manager.sh, language-manager.sh, GitHub Issue #25
36
+ #
37
+
38
+ set -eo pipefail
39
+ # Note: -u (nounset) omitted because sourced scripts (piper-voice-manager.sh,
40
+ # language-manager.sh, audio-cache-utils.sh) use unset variables freely.
41
+ # Variables in THIS script use ${VAR:-} defaults for safety.
42
+
43
+ # Cleanup handler for temp files (preserves final output in $TEMP_FILE)
44
+ _CLEANUP_FILES=()
45
+ cleanup() {
46
+ local f
47
+ for f in "${_CLEANUP_FILES[@]+"${_CLEANUP_FILES[@]}"}"; do
48
+ [[ "$f" == "${TEMP_FILE:-}" ]] && continue
49
+ rm -f "$f"
50
+ done
51
+ }
52
+ trap cleanup EXIT
53
+
54
+ # Fix locale warnings
55
+ export LC_ALL=C
56
+
57
+ TEXT="${1:-}"
58
+ VOICE_OVERRIDE="${2:-}" # Optional: voice model name
59
+ AGENT_PROFILE_FILE="${3:-}" # Optional: path to per-agent profile JSON (from bmad-speak.sh)
60
+
61
+ # Strip emojis, asterisks, and markdown formatting that Piper would speak literally
62
+ TEXT=$(printf '%s' "$TEXT" | perl -CSD -pe '
63
+ s/[\x{1F300}-\x{1F9FF}]//g; # emoticons, symbols, pictographs
64
+ s/[\x{2600}-\x{27BF}]//g; # misc symbols, dingbats
65
+ s/[\x{FE00}-\x{FE0F}]//g; # variation selectors
66
+ s/[\x{200D}]//g; # zero-width joiner
67
+ s/[\x{2500}-\x{257F}]//g; # box drawing (─━ etc)
68
+ s/[\x{2580}-\x{259F}]//g; # block elements
69
+ s/\*+//g; # asterisks (bold/italic markdown)
70
+ s/#+\s*//g; # heading markers
71
+ s/`//g; # backticks
72
+ s/~+//g; # strikethrough
73
+ s/^\s*[-]\s*//g; # list dashes
74
+ ')
75
+
76
+ # Source voice manager and language manager
77
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
78
+ source "$SCRIPT_DIR/piper-voice-manager.sh"
79
+ source "$SCRIPT_DIR/language-manager.sh"
80
+ source "$SCRIPT_DIR/audio-cache-utils.sh"
81
+
82
+ # Default voice for Piper
83
+ DEFAULT_VOICE="en_US-lessac-medium"
84
+
85
+ # @function determine_voice_model
86
+ # @intent Resolve voice name to Piper model name with language support
87
+ # @why Support voice override, language-specific voices, and default fallback
88
+ # @param Uses global: $VOICE_OVERRIDE
89
+ # @returns Sets $VOICE_MODEL global variable
90
+ # @sideeffects None
91
+ VOICE_MODEL=""
92
+
93
+ # Get current language setting
94
+ CURRENT_LANGUAGE=$(get_language_code)
95
+
96
+ if [[ -n "$VOICE_OVERRIDE" ]]; then
97
+ # Use override if provided
98
+ # Handle multi-speaker format: "Model::SpeakerName" → split into model + speaker lookup
99
+ if [[ "$VOICE_OVERRIDE" == *"::"* ]]; then
100
+ VOICE_MODEL="${VOICE_OVERRIDE%%::*}"
101
+ _SPEAKER_NAME="${VOICE_OVERRIDE#*::}"
102
+ # Look up speaker ID from the model's .onnx.json speaker_id_map
103
+ voice_dir=$(get_voice_storage_dir)
104
+ _JSON_FILE="$voice_dir/${VOICE_MODEL}.onnx.json"
105
+ if [[ -f "$_JSON_FILE" ]]; then
106
+ # SECURITY: Pass values via env vars to prevent shell injection
107
+ SPEAKER_ID=$(_JSON="$_JSON_FILE" _SPKR="$_SPEAKER_NAME" node -e "
108
+ try {
109
+ const j = JSON.parse(require('fs').readFileSync(process.env._JSON,'utf8'));
110
+ const map = j.speaker_id_map || {};
111
+ const id = map[process.env._SPKR];
112
+ if (id !== undefined) process.stdout.write(String(id));
113
+ } catch {}
114
+ " 2>/dev/null || true)
115
+ fi
116
+ echo "🎭 Using multi-speaker voice: $VOICE_OVERRIDE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-?})"
117
+ else
118
+ VOICE_MODEL="$VOICE_OVERRIDE"
119
+ echo "🎤 Using voice: $VOICE_OVERRIDE (session-specific)"
120
+ fi
121
+ else
122
+ # Try to get voice from voice file (check CLAUDE_PROJECT_DIR first for MCP context)
123
+ VOICE_FILE=""
124
+
125
+ # Priority order:
126
+ # 1. CLAUDE_PROJECT_DIR env var (set by MCP for project-specific settings)
127
+ # 2. Script location (for direct slash command usage)
128
+ # 3. Global ~/.claude (fallback)
129
+
130
+ # SECURITY: Canonicalize path to prevent traversal (#128)
131
+ if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
132
+ CLAUDE_PROJECT_DIR=$(cd "${CLAUDE_PROJECT_DIR}" 2>/dev/null && pwd -P) || CLAUDE_PROJECT_DIR=""
133
+ fi
134
+ if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]] && [[ -f "$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt" ]]; then
135
+ # MCP context: Use the project directory where MCP was invoked
136
+ VOICE_FILE="$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt"
137
+ elif [[ -f "$SCRIPT_DIR/../tts-voice.txt" ]]; then
138
+ # Direct usage: Use script location
139
+ VOICE_FILE="$SCRIPT_DIR/../tts-voice.txt"
140
+ elif [[ -f "$HOME/.claude/tts-voice.txt" ]]; then
141
+ # Fallback: Use global
142
+ VOICE_FILE="$HOME/.claude/tts-voice.txt"
143
+ fi
144
+
145
+ if [[ -n "$VOICE_FILE" ]]; then
146
+ FILE_VOICE=$(cat "$VOICE_FILE" 2>/dev/null)
147
+
148
+ # Check for multi-speaker voice (model + speaker ID stored separately)
149
+ # Use same directory as VOICE_FILE for consistency
150
+ VOICE_DIR=$(dirname "$VOICE_FILE")
151
+ MODEL_FILE="$VOICE_DIR/tts-piper-model.txt"
152
+ SPEAKER_ID_FILE="$VOICE_DIR/tts-piper-speaker-id.txt"
153
+
154
+ if [[ -f "$MODEL_FILE" ]] && [[ -f "$SPEAKER_ID_FILE" ]]; then
155
+ # Multi-speaker voice config found locally
156
+ VOICE_MODEL=$(cat "$MODEL_FILE" 2>/dev/null)
157
+ SPEAKER_ID=$(cat "$SPEAKER_ID_FILE" 2>/dev/null)
158
+ # Validate speaker ID is numeric
159
+ if [[ -n "$SPEAKER_ID" ]] && ! [[ "$SPEAKER_ID" =~ ^[0-9]+$ ]]; then
160
+ echo "Warning: Invalid speaker ID '$SPEAKER_ID', ignoring" >&2
161
+ SPEAKER_ID=""
162
+ fi
163
+ echo "🎭 Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-none})"
164
+ # Check if voice uses Model::SpeakerName format (from AgentVibes config)
165
+ elif [[ -n "$FILE_VOICE" ]] && [[ "$FILE_VOICE" == *"::"* ]]; then
166
+ VOICE_MODEL="${FILE_VOICE%%::*}"
167
+ _SPEAKER_NAME="${FILE_VOICE#*::}"
168
+ voice_dir=$(get_voice_storage_dir)
169
+ _JSON_FILE="$voice_dir/${VOICE_MODEL}.onnx.json"
170
+ if [[ -f "$_JSON_FILE" ]]; then
171
+ # SECURITY: Pass values via env vars to prevent shell injection
172
+ SPEAKER_ID=$(_JSON="$_JSON_FILE" _SPKR="$_SPEAKER_NAME" node -e "
173
+ try {
174
+ const j = JSON.parse(require('fs').readFileSync(process.env._JSON,'utf8'));
175
+ const map = j.speaker_id_map || {};
176
+ const id = map[process.env._SPKR];
177
+ if (id !== undefined) process.stdout.write(String(id));
178
+ } catch {}
179
+ " 2>/dev/null || true)
180
+ fi
181
+ echo "🎭 Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-?})"
182
+ # Standard Piper model name or custom voice (just use as-is)
183
+ elif [[ -n "$FILE_VOICE" ]]; then
184
+ # Strip multi-speaker suffix if present (model::SpeakerName-Label)
185
+ if [[ "$FILE_VOICE" == *"::"* ]]; then
186
+ VOICE_MODEL="${FILE_VOICE%%::*}"
187
+ else
188
+ VOICE_MODEL="$FILE_VOICE"
189
+ fi
190
+ fi
191
+ fi
192
+
193
+ # If no Piper voice from file, try language-specific voice
194
+ if [[ -z "$VOICE_MODEL" ]]; then
195
+ LANG_VOICE=$(get_voice_for_language "$CURRENT_LANGUAGE" "piper" 2>/dev/null)
196
+
197
+ if [[ -n "$LANG_VOICE" ]]; then
198
+ VOICE_MODEL="$LANG_VOICE"
199
+ echo "🌍 Using $CURRENT_LANGUAGE voice: $LANG_VOICE (Piper)"
200
+ else
201
+ # Use default voice
202
+ VOICE_MODEL="$DEFAULT_VOICE"
203
+ fi
204
+ fi
205
+ fi
206
+
207
+ # @function validate_inputs
208
+ # @intent Check required parameters
209
+ # @why Fail fast with clear errors if inputs missing
210
+ # @exitcode 1=missing text, 2=missing piper binary
211
+ if [[ -z "$TEXT" ]]; then
212
+ echo "Usage: $0 \"text to speak\" [voice_model_name]"
213
+ exit 1
214
+ fi
215
+
216
+ # Check if Piper is installed
217
+ if ! command -v piper &> /dev/null; then
218
+ echo "❌ Error: Piper TTS not installed"
219
+ echo "Install with: pipx install piper-tts"
220
+ echo "Or run: .claude/hooks/piper-installer.sh"
221
+ exit 2
222
+ fi
223
+
224
+ # @function ensure_voice_downloaded
225
+ # @intent Download voice model if not cached
226
+ # @why Provide seamless experience with automatic downloads
227
+ # @param Uses global: $VOICE_MODEL
228
+ # @sideeffects Downloads voice model files
229
+ # @edgecases Prompts user for consent before downloading, skipped in test mode
230
+ if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && ! verify_voice "$VOICE_MODEL"; then
231
+ echo "📥 Voice model not found: $VOICE_MODEL"
232
+ echo " File size: ~25MB"
233
+ echo " Preview: https://huggingface.co/rhasspy/piper-voices"
234
+ echo ""
235
+ read -p " Download this voice model? [y/N]: " -n 1 -r
236
+ echo
237
+
238
+ if [[ $REPLY =~ ^[Yy]$ ]]; then
239
+ if ! download_voice "$VOICE_MODEL"; then
240
+ echo "❌ Failed to download voice model"
241
+ echo "Fix: Download manually or choose different voice"
242
+ exit 3
243
+ fi
244
+ else
245
+ echo "❌ Voice download cancelled"
246
+ exit 3
247
+ fi
248
+ fi
249
+
250
+ # Get voice model path
251
+ # In test mode, use a fake path since we have mock piper that doesn't need real files
252
+ if [[ "${AGENTVIBES_TEST_MODE:-false}" == "true" ]]; then
253
+ VOICE_PATH="/tmp/mock-voice-${VOICE_MODEL}.onnx"
254
+ else
255
+ VOICE_PATH=$(get_voice_path "$VOICE_MODEL")
256
+ if [[ $? -ne 0 ]]; then
257
+ echo "❌ Voice model path not found: $VOICE_MODEL"
258
+ exit 3
259
+ fi
260
+ fi
261
+
262
+ # @function determine_audio_directory
263
+ # @intent Find appropriate directory for audio file storage
264
+ # @why Supports project-local and global storage
265
+ # @returns Sets $AUDIO_DIR global variable
266
+ if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
267
+ AUDIO_DIR="$CLAUDE_PROJECT_DIR/.claude/audio"
268
+ else
269
+ # Fallback: try to find .claude directory in current path
270
+ CURRENT_DIR="$PWD"
271
+ while [[ "$CURRENT_DIR" != "/" ]]; do
272
+ if [[ -d "$CURRENT_DIR/.claude" ]]; then
273
+ AUDIO_DIR="$CURRENT_DIR/.claude/audio"
274
+ break
275
+ fi
276
+ CURRENT_DIR=$(dirname "$CURRENT_DIR")
277
+ done
278
+ # Final fallback to global if no project .claude found
279
+ if [[ -z "${AUDIO_DIR:-}" ]]; then
280
+ AUDIO_DIR="$HOME/.claude/audio"
281
+ fi
282
+ fi
283
+
284
+ mkdir -p "$AUDIO_DIR"
285
+ _tmp=$(mktemp "$AUDIO_DIR/tts-XXXXXX"); TEMP_FILE="${_tmp}.wav"; mv "$_tmp" "$TEMP_FILE"
286
+
287
+ # @function get_speech_rate
288
+ # @intent Determine speech rate for Piper synthesis
289
+ # @why Convert user-facing speed (0.5=slower, 2.0=faster) to Piper length-scale (inverted)
290
+ # @returns Piper length-scale value (inverted from user scale)
291
+ # @note Piper uses length-scale where higher=slower, opposite of user expectation
292
+ get_speech_rate() {
293
+ local target_config=""
294
+ local main_config=""
295
+
296
+ # Check for target-specific config first (new and legacy paths)
297
+ if [[ -f "$SCRIPT_DIR/../config/tts-target-speech-rate.txt" ]]; then
298
+ target_config="$SCRIPT_DIR/../config/tts-target-speech-rate.txt"
299
+ elif [[ -f "$HOME/.claude/config/tts-target-speech-rate.txt" ]]; then
300
+ target_config="$HOME/.claude/config/tts-target-speech-rate.txt"
301
+ elif [[ -f "$SCRIPT_DIR/../config/piper-target-speech-rate.txt" ]]; then
302
+ target_config="$SCRIPT_DIR/../config/piper-target-speech-rate.txt"
303
+ elif [[ -f "$HOME/.claude/config/piper-target-speech-rate.txt" ]]; then
304
+ target_config="$HOME/.claude/config/piper-target-speech-rate.txt"
305
+ fi
306
+
307
+ # Check for main config (new and legacy paths)
308
+ if [[ -f "$SCRIPT_DIR/../config/tts-speech-rate.txt" ]]; then
309
+ main_config="$SCRIPT_DIR/../config/tts-speech-rate.txt"
310
+ elif [[ -f "$HOME/.claude/config/tts-speech-rate.txt" ]]; then
311
+ main_config="$HOME/.claude/config/tts-speech-rate.txt"
312
+ elif [[ -f "$SCRIPT_DIR/../config/piper-speech-rate.txt" ]]; then
313
+ main_config="$SCRIPT_DIR/../config/piper-speech-rate.txt"
314
+ elif [[ -f "$HOME/.claude/config/piper-speech-rate.txt" ]]; then
315
+ main_config="$HOME/.claude/config/piper-speech-rate.txt"
316
+ fi
317
+
318
+ # If this is a non-English voice and target config exists, use it
319
+ if [[ "$CURRENT_LANGUAGE" != "english" ]] && [[ -n "$target_config" ]]; then
320
+ local user_speed=$(cat "$target_config" 2>/dev/null)
321
+ # Validate speed is a positive number
322
+ if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
323
+ echo "1.0"
324
+ return
325
+ fi
326
+ # Convert user speed to Piper length-scale (invert)
327
+ # User: 0.5=slower, 1.0=normal, 2.0=faster
328
+ # Piper: 2.0=slower, 1.0=normal, 0.5=faster
329
+ # Formula: piper_length_scale = 1.0 / user_speed
330
+ echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
331
+ return
332
+ fi
333
+
334
+ # Otherwise use main config if available
335
+ if [[ -n "$main_config" ]]; then
336
+ local user_speed=$(grep -v '^#' "$main_config" 2>/dev/null | grep -v '^$' | tail -1)
337
+ # Validate speed is a positive number
338
+ if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
339
+ echo "1.0"
340
+ return
341
+ fi
342
+ echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
343
+ return
344
+ fi
345
+
346
+ # Default: 1.0 (normal) for English, 2.0 (slower) for learning
347
+ if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
348
+ echo "2.0"
349
+ else
350
+ echo "1.0"
351
+ fi
352
+ }
353
+
354
+ SPEECH_RATE=$(get_speech_rate)
355
+
356
+ # @function synthesize_with_piper
357
+ # @intent Generate speech using Piper TTS
358
+ # @why Provides free, offline TTS alternative
359
+ # @param Uses globals: $TEXT, $VOICE_PATH, $SPEECH_RATE, $SPEAKER_ID (optional)
360
+ # @returns Creates WAV file at $TEMP_FILE
361
+ # @exitcode 0=success, 4=synthesis error
362
+ # @sideeffects Creates audio file
363
+ # @edgecases Handles piper errors, invalid models, multi-speaker voices
364
+ if [[ -n "${SPEAKER_ID:-}" ]]; then
365
+ # Multi-speaker voice: Pass speaker ID
366
+ # SECURITY: Use printf instead of echo for pipe safety (#134)
367
+ printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --speaker "$SPEAKER_ID" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
368
+ else
369
+ # Single-speaker voice
370
+ printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
371
+ fi
372
+
373
+ if [[ ! -f "$TEMP_FILE" ]] || [[ ! -s "$TEMP_FILE" ]]; then
374
+ echo "❌ Failed to synthesize speech with Piper"
375
+ echo "Voice model: $VOICE_MODEL"
376
+ echo "Check that voice model is valid"
377
+ exit 4
378
+ fi
379
+
380
+ # @function detect_remote_session
381
+ # @intent Auto-detect SSH/RDP sessions and enable audio compression
382
+ # @why Remote desktop audio is choppy without compression
383
+ # @returns Sets AGENTVIBES_RDP_MODE environment variable
384
+ # @detection Checks SSH_CLIENT, SSH_TTY, and DISPLAY variables
385
+ if [[ -z "${AGENTVIBES_RDP_MODE:-}" ]]; then
386
+ # Auto-detect remote session
387
+ if [[ -n "${SSH_CLIENT:-}" ]] || [[ -n "${SSH_TTY:-}" ]] || [[ "${DISPLAY:-}" =~ ^localhost:.* ]]; then
388
+ export AGENTVIBES_RDP_MODE=true
389
+ echo "🌐 Remote session detected - enabling audio compression"
390
+ fi
391
+ fi
392
+
393
+ # @function compress_for_remote
394
+ # @intent Compress TTS audio for remote sessions (SSH/RDP)
395
+ # @why Reduces bandwidth and prevents choppy playback
396
+ # @param Uses global: $TEMP_FILE, $AGENTVIBES_RDP_MODE
397
+ # @returns Updates $TEMP_FILE to compressed version
398
+ # @sideeffects Converts to mono 22kHz for lower bandwidth
399
+ if [[ "${AGENTVIBES_RDP_MODE:-false}" == "true" ]] && command -v ffmpeg &> /dev/null; then
400
+ _tmp=$(mktemp "$AUDIO_DIR/tts-compressed-XXXXXX"); COMPRESSED_FILE="${_tmp}.wav"; mv "$_tmp" "$COMPRESSED_FILE"
401
+ _CLEANUP_FILES+=("$COMPRESSED_FILE")
402
+ # Convert to mono, 22kHz, 64kbps for remote sessions
403
+ ffmpeg -i "$TEMP_FILE" -ac 1 -ar 22050 -b:a 64k -y "$COMPRESSED_FILE" 2>/dev/null
404
+
405
+ if [[ -f "$COMPRESSED_FILE" ]]; then
406
+ rm -f "$TEMP_FILE"
407
+ TEMP_FILE="$COMPRESSED_FILE"
408
+ fi
409
+ fi
410
+
411
+ # @function add_silence_padding
412
+ # @intent Add silence to prevent WSL audio static
413
+ # @why WSL audio subsystem cuts off first ~200ms
414
+ # @param Uses global: $TEMP_FILE
415
+ # @returns Updates $TEMP_FILE to padded version
416
+ # @sideeffects Modifies audio file
417
+ # AI NOTE: Use ffmpeg if available, otherwise skip padding (degraded experience)
418
+ if command -v ffmpeg &> /dev/null; then
419
+ _tmp=$(mktemp "$AUDIO_DIR/tts-padded-XXXXXX"); PADDED_FILE="${_tmp}.wav"; mv "$_tmp" "$PADDED_FILE"
420
+ _CLEANUP_FILES+=("$PADDED_FILE")
421
+ # Add 200ms of silence at the beginning
422
+ ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo:d=0.2 -i "$TEMP_FILE" \
423
+ -filter_complex "[0:a][1:a]concat=n=2:v=0:a=1[out]" \
424
+ -map "[out]" -y "$PADDED_FILE" 2>/dev/null
425
+
426
+ if [[ -f "$PADDED_FILE" ]]; then
427
+ rm -f "$TEMP_FILE"
428
+ TEMP_FILE="$PADDED_FILE"
429
+ fi
430
+ fi
431
+
432
+ # @function apply_audio_effects
433
+ # @intent Apply sox effects and background music via audio-processor.sh
434
+ # @param Uses global: $TEMP_FILE
435
+ # @returns Updates $TEMP_FILE to processed version, sets $BACKGROUND_MUSIC if used
436
+ # @sideeffects Applies audio effects and background music
437
+ BACKGROUND_MUSIC=""
438
+ if [[ -f "$SCRIPT_DIR/audio-processor.sh" ]]; then
439
+ _tmp=$(mktemp "$AUDIO_DIR/tts-processed-XXXXXX"); PROCESSED_FILE="${_tmp}.wav"; mv "$_tmp" "$PROCESSED_FILE"
440
+ _CLEANUP_FILES+=("$PROCESSED_FILE")
441
+ # audio-processor.sh returns: FILE_PATH|BACKGROUND_FILE
442
+ PROCESSOR_OUTPUT=$("$SCRIPT_DIR/audio-processor.sh" "$TEMP_FILE" "default" "$PROCESSED_FILE" "$AGENT_PROFILE_FILE" 2>/dev/null) || {
443
+ echo "Warning: Audio processing failed, using unprocessed audio" >&2
444
+ PROCESSED_FILE="$TEMP_FILE"
445
+ PROCESSOR_OUTPUT="$TEMP_FILE|"
446
+ }
447
+
448
+ # Parse output: FILE|BACKGROUND
449
+ PROCESSED_FILE="${PROCESSOR_OUTPUT%%|*}"
450
+ BACKGROUND_MUSIC="${PROCESSOR_OUTPUT##*|}"
451
+
452
+ if [[ -f "$PROCESSED_FILE" ]] && [[ "$PROCESSED_FILE" != "$TEMP_FILE" ]]; then
453
+ rm -f "$TEMP_FILE"
454
+ TEMP_FILE="$PROCESSED_FILE"
455
+ fi
456
+ fi
457
+
458
+ # @function play_audio
459
+ # @intent Play generated audio using available player with sequential playback
460
+ # @why Support multiple audio players and prevent overlapping audio in learning mode
461
+ # @param Uses global: $TEMP_FILE, $CURRENT_LANGUAGE
462
+ # @sideeffects Plays audio with lock mechanism for sequential playback
463
+ _LOCK_DIR="${XDG_RUNTIME_DIR:-/tmp/agentvibes-$(id -u)}"
464
+ mkdir -p "$_LOCK_DIR"
465
+ chmod 700 "$_LOCK_DIR"
466
+ LOCK_FILE="$_LOCK_DIR/agentvibes-audio.lock"
467
+
468
+ # Auto-remove stale lock files (older than 30 seconds) to prevent permanent blocking
469
+ # This handles cases where the background cleanup process was killed mid-playback
470
+ if [ -f "$LOCK_FILE" ]; then
471
+ _lock_age=0
472
+ if [[ "$(uname)" == "Darwin" ]]; then
473
+ _lock_mtime=$(stat -f %m "$LOCK_FILE" 2>/dev/null || echo 0)
474
+ else
475
+ _lock_mtime=$(stat -c %Y "$LOCK_FILE" 2>/dev/null || echo 0)
476
+ fi
477
+ _now=$(date +%s)
478
+ _lock_age=$((_now - _lock_mtime))
479
+ if [[ $_lock_age -gt 30 ]]; then
480
+ rm -f "$LOCK_FILE"
481
+ fi
482
+ fi
483
+
484
+ # Wait for previous audio to finish (max 2 seconds to prevent blocking)
485
+ for i in {1..4}; do
486
+ if [ ! -f "$LOCK_FILE" ]; then
487
+ break
488
+ fi
489
+ sleep 0.5
490
+ done
491
+
492
+ # If still locked after 2 seconds, skip this TTS to prevent blocking Claude
493
+ if [ -f "$LOCK_FILE" ]; then
494
+ echo "⏭️ Skipping TTS (previous audio still playing)" >&2
495
+ exit 0
496
+ fi
497
+
498
+ # Track last target language audio for replay command
499
+ if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
500
+ TARGET_AUDIO_FILE="${CLAUDE_PROJECT_DIR:-${HOME}}/.claude/last-target-audio.txt"
501
+ echo "$TEMP_FILE" > "$TARGET_AUDIO_FILE"
502
+ fi
503
+
504
+ # Create lock and play audio
505
+ touch "$LOCK_FILE"
506
+
507
+ # Create write lock file in audio directory to signal file is in-use (prevents race condition in cleanup)
508
+ _TEMP_DIR="${TEMP_FILE%/*}"
509
+ WRITE_LOCK_FILE="$_TEMP_DIR/$(basename "$TEMP_FILE" .wav).lock"
510
+ touch "$WRITE_LOCK_FILE"
511
+ _CLEANUP_FILES+=("$LOCK_FILE" "$WRITE_LOCK_FILE")
512
+
513
+ # Get audio duration for proper lock timing
514
+ DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null || true)
515
+ DURATION=${DURATION%.*} # Round to integer
516
+ # SECURITY: Validate duration is numeric (#134)
517
+ if ! [[ "${DURATION:-}" =~ ^[0-9]+$ ]]; then
518
+ DURATION=1
519
+ fi
520
+
521
+ # Play audio (skip if in test mode or no-playback mode)
522
+ # AGENTVIBES_NO_PLAYBACK: Set to "true" to generate audio without playing (for post-processing)
523
+ PLAYER_PID=""
524
+ if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && [[ "${AGENTVIBES_NO_PLAYBACK:-false}" != "true" ]]; then
525
+ # Detect platform and use appropriate audio player
526
+ if [[ "$(uname -s)" == "Darwin" ]]; then
527
+ # macOS: Use afplay (native macOS audio player)
528
+ afplay "$TEMP_FILE" >/dev/null 2>&1 &
529
+ PLAYER_PID=$!
530
+ elif [[ -n "${TERMUX_VERSION:-}" ]] || [[ -d "/data/data/com.termux" ]]; then
531
+ # Android/Termux: Use termux-media-player
532
+ termux-media-player play "$TEMP_FILE" >/dev/null 2>&1 &
533
+ PLAYER_PID=$!
534
+ else
535
+ # Linux/WSL: Prefer paplay (PulseAudio) for best WSL audio quality
536
+ (paplay "$TEMP_FILE" || mpv "$TEMP_FILE" || aplay "$TEMP_FILE") >/dev/null 2>&1 &
537
+ PLAYER_PID=$!
538
+ fi
539
+ fi
540
+
541
+ # Wait for audio to finish, then release locks (both global and write lock)
542
+ (sleep $DURATION; rm -f "$LOCK_FILE" "$WRITE_LOCK_FILE") &
543
+ disown
544
+
545
+ # Get audio cache path
546
+ AUDIO_DIR_PATH=$(get_audio_dir)
547
+
548
+ # Color codes (safe to use — WAV path is passed via AGENTVIBES_WAV_OUTPATH, not parsed from stdout)
549
+ BLUE='\033[0;34m'
550
+ YELLOW='\033[1;33m'
551
+ PURPLE='\033[0;35m'
552
+ RED='\033[0;31m'
553
+ GREEN='\033[0;32m'
554
+ ORANGE='\033[0;33m'
555
+ WHITE='\033[1;37m'
556
+ CYAN='\033[0;36m'
557
+ GOLD='\033[38;5;226m'
558
+ NC='\033[0m'
559
+
560
+ # Check if banner is enabled (default: on)
561
+ _BANNER_ENABLED=true
562
+ if [[ -f "$HOME/.agentvibes/banner-disabled" ]]; then
563
+ _BANNER_ENABLED=false
564
+ elif [[ -f "${PROJECT_ROOT:-/nonexistent}/.agentvibes/banner-disabled" ]]; then
565
+ _BANNER_ENABLED=false
566
+ fi
567
+
568
+ # Run auto-cleanup off the critical path: only every 10th call, in background after playback starts.
569
+ # Counter file lives in the secure lock dir (user-specific, already created above).
570
+ AUTO_CLEAN_THRESHOLD=$(get_auto_clean_threshold)
571
+ _CALL_COUNTER_FILE="$_LOCK_DIR/agentvibes-tts-call-count"
572
+ _CALL_COUNT=$(cat "$_CALL_COUNTER_FILE" 2>/dev/null || echo "0")
573
+ # SECURITY: Validate counter is numeric before arithmetic
574
+ if ! [[ "$_CALL_COUNT" =~ ^[0-9]+$ ]]; then _CALL_COUNT=0; fi
575
+ _CALL_COUNT=$((_CALL_COUNT + 1))
576
+ echo "$_CALL_COUNT" > "$_CALL_COUNTER_FILE"
577
+
578
+ if (( _CALL_COUNT % 10 == 0 )); then
579
+ # Capture values needed inside the subshell before forking
580
+ _CLEANUP_AUDIO_DIR="$AUDIO_DIR_PATH"
581
+ _CLEANUP_THRESHOLD="$AUTO_CLEAN_THRESHOLD"
582
+ _CLEANUP_BANNER="$_BANNER_ENABLED"
583
+ # Source the utils inside the subshell (functions are not exported)
584
+ _CLEANUP_UTILS="$SCRIPT_DIR/audio-cache-utils.sh"
585
+ (
586
+ source "$_CLEANUP_UTILS" 2>/dev/null || exit 0
587
+ _INITIAL_SIZE=$(calculate_tts_size_bytes "$_CLEANUP_AUDIO_DIR")
588
+ if [[ $_INITIAL_SIZE -gt $((_CLEANUP_THRESHOLD * 1048576)) ]]; then
589
+ _DELETED=$(auto_clean_old_files "$_CLEANUP_AUDIO_DIR" "$_CLEANUP_THRESHOLD")
590
+ if [[ ${_DELETED:-0} -gt 0 ]] && [[ "$_CLEANUP_BANNER" == "true" ]]; then
591
+ echo -e "\033[0;33m🧹 Auto-cleaned $_DELETED old files\033[0m"
592
+ fi
593
+ fi
594
+ ) &
595
+ disown
596
+ fi
597
+
598
+ # Write output path for play-tts-enhanced.sh (avoids stdout parsing — colors are safe)
599
+ if [[ -n "${AGENTVIBES_WAV_OUTPATH:-}" ]]; then
600
+ echo "$TEMP_FILE" > "$AGENTVIBES_WAV_OUTPATH"
601
+ fi
602
+
603
+ if [[ "$_BANNER_ENABLED" == "true" ]]; then
604
+ FILE_COUNT=$(count_tts_files "$AUDIO_DIR_PATH")
605
+ SIZE_BYTES=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
606
+ SIZE_HUMAN=$(bytes_to_human "$SIZE_BYTES")
607
+
608
+ # Dynamic color coding based on cache size
609
+ CACHE_COLOR=$GREEN
610
+ if [[ $SIZE_BYTES -gt 3221225472 ]]; then
611
+ CACHE_COLOR=$RED
612
+ elif [[ $SIZE_BYTES -gt 524288000 ]]; then
613
+ CACHE_COLOR=$YELLOW
614
+ fi
615
+
616
+ echo -e "${WHITE}💾 Saved to:${NC} ${CYAN}$TEMP_FILE${NC} ${YELLOW}$FILE_COUNT${NC} ${WHITE}🗄️${NC} ${CACHE_COLOR}$SIZE_HUMAN${NC} ${WHITE}🧹${NC}${GOLD}[${AUTO_CLEAN_THRESHOLD}mb]${NC}"
617
+
618
+ if [[ -n "$BACKGROUND_MUSIC" ]]; then
619
+ echo -e "${WHITE}🎵 Background music:${NC} ${PURPLE}$(basename "$BACKGROUND_MUSIC")${NC}"
620
+ fi
621
+ if [[ -n "${SPEAKER_ID:-}" ]] && [[ -n "${FILE_VOICE:-}" ]]; then
622
+ echo -e "${WHITE}🎤 Voice used:${NC} ${BLUE}$FILE_VOICE${NC} ${WHITE}(Piper TTS)${NC}"
623
+ else
624
+ echo -e "${WHITE}🎤 Voice used:${NC} ${BLUE}$VOICE_MODEL${NC} ${WHITE}(Piper TTS)${NC}"
625
+ fi
626
+
627
+ PERSONALITY=$(cat "${PROJECT_ROOT:-/nonexistent}/.claude/tts-personality.txt" 2>/dev/null || cat "$HOME/.claude/tts-personality.txt" 2>/dev/null || echo "")
628
+ if [[ -n "$PERSONALITY" ]] && [[ "$PERSONALITY" != "none" ]] && [[ "$PERSONALITY" != "normal" ]]; then
629
+ echo -e "${WHITE}💫 Personality:${NC} ${YELLOW}$PERSONALITY${NC}"
630
+ fi
631
+
632
+ echo -e "\033[38;5;240mSay: \"Turn off banner\" to hide this output\033[0m"
633
+ fi
634
+
635
+ # Check audio folder size and warn if getting large
636
+ if [[ "$_BANNER_ENABLED" == "true" ]] && [[ -d "$AUDIO_DIR_PATH" ]]; then
637
+ AUDIO_SIZE=$(du -sm "$AUDIO_DIR_PATH" 2>/dev/null | cut -f1)
638
+ if [[ -n "$AUDIO_SIZE" ]] && [[ "$AUDIO_SIZE" -gt 100 ]]; then
639
+ echo -e "\033[0;31m⚠️ Audio cache is ${AUDIO_SIZE}MB - Run: /agent-vibes:cleanup\033[0m"
640
+ fi
641
+ fi
642
+
643
+ # Show status indicators
644
+ GLOBAL_MUTE_FILE="$HOME/.agentvibes-muted"
645
+ PROJECT_MUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-muted"
646
+ PROJECT_UNMUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-unmuted"
647
+ BACKGROUND_ENABLED_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/config/background-music-enabled.txt"
648
+ GLOBAL_BACKGROUND_ENABLED_FILE="$HOME/.claude/config/background-music-enabled.txt"
649
+
650
+ # Mute status indicator
651
+ if [[ -f "$PROJECT_UNMUTE_FILE" ]] && [[ -f "$GLOBAL_MUTE_FILE" ]]; then
652
+ echo "🔊 Status: Unmuted (project overrides global mute)"
653
+ elif [[ -f "$PROJECT_MUTE_FILE" ]]; then
654
+ echo "🔇 Status: Muted (project)"
655
+ elif [[ -f "$GLOBAL_MUTE_FILE" ]]; then
656
+ echo "🔇 Status: Would be muted (global) - but this project is speaking"
657
+ fi
658
+
659
+ # Background music status indicator
660
+ if [[ -z "$BACKGROUND_MUSIC" ]]; then
661
+ _bg_enabled=false
662
+ if [[ -f "$BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$BACKGROUND_ENABLED_FILE" 2>/dev/null; then
663
+ _bg_enabled=true
664
+ elif [[ -f "$GLOBAL_BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$GLOBAL_BACKGROUND_ENABLED_FILE" 2>/dev/null; then
665
+ _bg_enabled=true
666
+ fi
667
+ if [[ "$_bg_enabled" == "true" ]]; then
668
+ echo "🎵 Background music: Enabled but not playing (check config)"
669
+ else
670
+ echo "🎵 Background music: Disabled"
671
+ fi
672
+ fi
673
+
674
+ # Wait for audio player to finish before returning.
675
+ # This keeps the bmad-speak.sh speech lock held until playback is actually done,
676
+ # preventing party-mode agents from talking over each other.
677
+ if [[ -n "$PLAYER_PID" ]]; then
678
+ wait "$PLAYER_PID" 2>/dev/null || true
679
+ fi