agentvibes 4.2.0 โ 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agentvibes/bmad/bmad-voices.md +69 -69
- package/.agentvibes/config.json +12 -0
- package/.claude/activation-instructions +54 -54
- package/.claude/audio/tracks/README.md +52 -52
- package/.claude/commands/agent-vibes/add.md +21 -21
- package/.claude/commands/agent-vibes/agent-vibes.md +101 -101
- package/.claude/commands/agent-vibes/agent.md +79 -79
- package/.claude/commands/agent-vibes/background-music.md +111 -111
- package/.claude/commands/agent-vibes/bmad.md +198 -198
- package/.claude/commands/agent-vibes/clean.md +18 -18
- package/.claude/commands/agent-vibes/cleanup.md +18 -18
- package/.claude/commands/agent-vibes/commands.json +145 -145
- package/.claude/commands/agent-vibes/effects.md +97 -97
- package/.claude/commands/agent-vibes/get.md +9 -9
- package/.claude/commands/agent-vibes/hide.md +91 -91
- package/.claude/commands/agent-vibes/language.md +23 -23
- package/.claude/commands/agent-vibes/learn.md +67 -67
- package/.claude/commands/agent-vibes/list.md +13 -13
- package/.claude/commands/agent-vibes/mute.md +37 -37
- package/.claude/commands/agent-vibes/preview.md +17 -17
- package/.claude/commands/agent-vibes/provider.md +68 -68
- package/.claude/commands/agent-vibes/replay-target.md +14 -14
- package/.claude/commands/agent-vibes/sample.md +12 -12
- package/.claude/commands/agent-vibes/set-favorite-voice.md +84 -84
- package/.claude/commands/agent-vibes/set-pretext.md +65 -65
- package/.claude/commands/agent-vibes/set-speed.md +41 -41
- package/.claude/commands/agent-vibes/show.md +84 -84
- package/.claude/commands/agent-vibes/switch.md +87 -87
- package/.claude/commands/agent-vibes/target-voice.md +26 -26
- package/.claude/commands/agent-vibes/target.md +30 -30
- package/.claude/commands/agent-vibes/translate.md +68 -68
- package/.claude/commands/agent-vibes/unmute.md +45 -45
- package/.claude/commands/agent-vibes/verbosity.md +89 -89
- package/.claude/commands/agent-vibes/whoami.md +7 -7
- package/.claude/commands/agent-vibes-bmad-voices.md +117 -117
- package/.claude/commands/agent-vibes-rdp.md +24 -24
- package/.claude/config/agentvibes.json +1 -0
- package/.claude/config/audio-effects.cfg +2 -2
- package/.claude/config/audio-effects.cfg.sample +52 -52
- package/.claude/config/background-music-volume.txt +1 -0
- package/.claude/config/intro-text.txt +1 -0
- package/.claude/config/piper-speech-rate.txt +4 -0
- package/.claude/config/piper-target-speech-rate.txt +1 -0
- package/.claude/config/reverb-level.txt +1 -0
- package/.claude/config/tts-speech-rate.txt +4 -0
- package/.claude/config/tts-target-speech-rate.txt +1 -0
- package/.claude/docs/TERMUX_SETUP.md +408 -408
- package/.claude/github-star-reminder.txt +1 -1
- package/.claude/hooks/README-TTS-QUEUE.md +135 -135
- package/.claude/hooks/audio-cache-utils.sh +246 -246
- package/.claude/hooks/audio-processor.sh +433 -433
- package/.claude/hooks/background-music-manager.sh +404 -404
- package/.claude/hooks/bmad-speak-enhanced.sh +165 -165
- package/.claude/hooks/bmad-speak.sh +269 -269
- package/.claude/hooks/bmad-tts-injector.sh +568 -568
- package/.claude/hooks/bmad-voice-manager.sh +928 -928
- package/.claude/hooks/clawdbot-receiver-SECURE.sh +129 -129
- package/.claude/hooks/clawdbot-receiver.sh +107 -107
- package/.claude/hooks/clean-audio-cache.sh +22 -22
- package/.claude/hooks/cleanup-cache.sh +106 -106
- package/.claude/hooks/configure-rdp-mode.sh +137 -137
- package/.claude/hooks/download-extra-voices.sh +244 -244
- package/.claude/hooks/effects-manager.sh +268 -268
- package/.claude/hooks/github-star-reminder.sh +154 -154
- package/.claude/hooks/language-manager.sh +362 -362
- package/.claude/hooks/learn-manager.sh +492 -492
- package/.claude/hooks/macos-voice-manager.sh +205 -205
- package/.claude/hooks/migrate-background-music.sh +125 -125
- package/.claude/hooks/migrate-to-agentvibes.sh +161 -161
- package/.claude/hooks/optimize-background-music.sh +87 -87
- package/.claude/hooks/path-resolver.sh +60 -60
- package/.claude/hooks/personality-manager.sh +448 -448
- package/.claude/hooks/piper-download-voices.sh +225 -225
- package/.claude/hooks/piper-installer.sh +292 -292
- package/.claude/hooks/piper-multispeaker-registry.sh +171 -171
- package/.claude/hooks/piper-voice-manager.sh +24 -3
- package/.claude/hooks/play-tts-agentvibes-receiver-for-voiceless-connections.sh +90 -90
- package/.claude/hooks/play-tts-enhanced.sh +105 -105
- package/.claude/hooks/play-tts-macos.sh +368 -368
- package/.claude/hooks/play-tts-piper.sh +679 -679
- package/.claude/hooks/play-tts-soprano.sh +356 -356
- package/.claude/hooks/play-tts-ssh-remote.sh +167 -167
- package/.claude/hooks/play-tts-termux-ssh.sh +169 -169
- package/.claude/hooks/play-tts.sh +301 -301
- package/.claude/hooks/prepare-release.sh +54 -54
- package/.claude/hooks/provider-commands.sh +617 -617
- package/.claude/hooks/provider-manager.sh +399 -399
- package/.claude/hooks/replay-target-audio.sh +95 -95
- package/.claude/hooks/requirements.txt +6 -6
- package/.claude/hooks/sentiment-manager.sh +201 -201
- package/.claude/hooks/session-start-tts.sh +81 -81
- package/.claude/hooks/soprano-gradio-synth.py +139 -139
- package/.claude/hooks/speed-manager.sh +291 -291
- package/.claude/hooks/stop-tts.sh +84 -84
- package/.claude/hooks/termux-installer.sh +261 -261
- package/.claude/hooks/translate-manager.sh +341 -341
- package/.claude/hooks/translator.py +237 -237
- package/.claude/hooks/tts-queue-worker.sh +145 -145
- package/.claude/hooks/tts-queue.sh +165 -165
- package/.claude/hooks/verbosity-manager.sh +178 -178
- package/.claude/hooks/voice-manager.sh +548 -548
- package/.claude/hooks-windows/audio-cache-utils.ps1 +119 -119
- package/.claude/hooks-windows/background-music-manager.ps1 +348 -0
- package/.claude/hooks-windows/clean-audio-cache.ps1 +53 -0
- package/.claude/hooks-windows/download-extra-voices.ps1 +185 -0
- package/.claude/hooks-windows/effects-manager.ps1 +294 -0
- package/.claude/hooks-windows/language-manager.ps1 +193 -0
- package/.claude/hooks-windows/learn-manager.ps1 +241 -0
- package/.claude/hooks-windows/personality-manager.ps1 +266 -0
- package/.claude/hooks-windows/play-tts-piper.ps1 +209 -0
- package/.claude/hooks-windows/play-tts-sapi.ps1 +108 -0
- package/.claude/hooks-windows/play-tts-soprano.ps1 +159 -158
- package/.claude/hooks-windows/play-tts-windows-piper.ps1 +50 -5
- package/.claude/hooks-windows/play-tts-windows-sapi.ps1 +108 -108
- package/.claude/hooks-windows/play-tts.ps1 +344 -266
- package/.claude/hooks-windows/provider-manager.ps1 +29 -10
- package/.claude/hooks-windows/session-start-tts.ps1 +124 -124
- package/.claude/hooks-windows/soprano-gradio-synth.py +153 -153
- package/.claude/hooks-windows/speed-manager.ps1 +166 -0
- package/.claude/hooks-windows/verbosity-manager.ps1 +119 -0
- package/.claude/hooks-windows/voice-manager-windows.ps1 +92 -8
- package/.claude/output-styles/agent-vibes.md +202 -202
- package/.claude/personalities/angry.md +14 -14
- package/.claude/personalities/annoying.md +14 -14
- package/.claude/personalities/crass.md +14 -14
- package/.claude/personalities/dramatic.md +14 -14
- package/.claude/personalities/dry-humor.md +50 -50
- package/.claude/personalities/flirty.md +20 -20
- package/.claude/personalities/funny.md +14 -14
- package/.claude/personalities/grandpa.md +32 -32
- package/.claude/personalities/millennial.md +14 -14
- package/.claude/personalities/moody.md +14 -14
- package/.claude/personalities/normal.md +16 -16
- package/.claude/personalities/pirate.md +14 -14
- package/.claude/personalities/poetic.md +14 -14
- package/.claude/personalities/professional.md +14 -14
- package/.claude/personalities/rapper.md +55 -55
- package/.claude/personalities/robot.md +14 -14
- package/.claude/personalities/sarcastic.md +38 -38
- package/.claude/personalities/sassy.md +14 -14
- package/.claude/personalities/surfer-dude.md +14 -14
- package/.claude/personalities/zen.md +14 -14
- package/.claude/settings.json +15 -15
- package/.claude/verbosity.txt +1 -1
- package/.clawdbot/README.md +105 -105
- package/.clawdbot/skill/SKILL.md +241 -241
- package/.mcp.json +12 -0
- package/CLAUDE.md +170 -170
- package/README.md +2029 -2007
- package/RELEASE_NOTES.md +1310 -1203
- package/WINDOWS-SETUP.md +208 -208
- package/bin/agent-vibes +39 -39
- package/bin/agentvibes-voice-browser.js +1840 -1840
- package/bin/agentvibes.js +48 -2
- package/bin/mcp-server.js +121 -121
- package/bin/mcp-server.sh +206 -206
- package/bin/test-bmad-pr +78 -78
- package/mcp-server/QUICK_START.md +203 -203
- package/mcp-server/README.md +345 -345
- package/mcp-server/WINDOWS_SETUP.md +260 -260
- package/mcp-server/docs/troubleshooting-audio.md +313 -313
- package/mcp-server/examples/claude_desktop_config.json +11 -11
- package/mcp-server/examples/claude_desktop_config_piper.json +9 -9
- package/mcp-server/examples/custom_instructions.md +169 -169
- package/mcp-server/install-deps.js +130 -130
- package/mcp-server/pyproject.toml +52 -52
- package/mcp-server/requirements.txt +2 -2
- package/mcp-server/server.py +1465 -1453
- package/mcp-server/test_server.py +395 -395
- package/mcp-server/test_windows_script_parity.py +336 -0
- package/package.json +110 -110
- package/setup-windows.ps1 +815 -815
- package/src/bmad-detector.js +71 -71
- package/src/cli/list-personalities.js +110 -110
- package/src/cli/list-voices.js +114 -114
- package/src/commands/bmad-voices.js +394 -394
- package/src/commands/install-mcp.js +476 -476
- package/src/console/app.js +824 -824
- package/src/console/audio-env.js +20 -1
- package/src/console/brand-colors.js +13 -13
- package/src/console/constants/personalities.js +44 -44
- package/src/console/footer-config.js +50 -50
- package/src/console/modals/modal-overlay.js +247 -247
- package/src/console/navigation.js +62 -62
- package/src/console/tabs/agents-tab.js +1684 -1516
- package/src/console/tabs/help-tab.js +261 -261
- package/src/console/tabs/install-tab.js +1007 -991
- package/src/console/tabs/music-tab.js +22 -8
- package/src/console/tabs/placeholder-tab.js +53 -53
- package/src/console/tabs/readme-tab.js +267 -267
- package/src/console/tabs/receiver-tab.js +1472 -1212
- package/src/console/tabs/settings-tab.js +152 -79
- package/src/console/tabs/voices-tab.js +100 -21
- package/src/console/widgets/destroy-list.js +25 -25
- package/src/console/widgets/format-utils.js +89 -89
- package/src/console/widgets/notice.js +55 -55
- package/src/console/widgets/personality-picker.js +185 -185
- package/src/console/widgets/reverb-picker.js +94 -94
- package/src/console/widgets/track-picker.js +285 -285
- package/src/installer/music-file-input.js +304 -304
- package/src/installer.js +5882 -5829
- package/src/services/agent-voice-store.js +423 -423
- package/src/services/config-service.js +264 -264
- package/src/services/navigation-service.js +123 -123
- package/src/services/provider-service.js +132 -132
- package/src/services/verbosity-service.js +157 -157
- package/src/utils/audio-duration-validator.js +298 -298
- package/src/utils/audio-format-validator.js +277 -277
- package/src/utils/dependency-checker.js +469 -466
- package/src/utils/file-ownership-verifier.js +358 -358
- package/src/utils/list-formatter.js +194 -194
- package/src/utils/music-file-validator.js +285 -285
- package/src/utils/preview-list-prompt.js +136 -136
- package/src/utils/provider-validator.js +96 -12
- package/src/utils/secure-music-storage.js +412 -412
- package/templates/agentvibes-receiver.sh +482 -482
- package/templates/audio/welcome-music.mp3 +0 -0
- package/voice-assignments.json +8244 -8244
- package/.claude/config/background-music-position.txt +0 -1
|
@@ -1,679 +1,679 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
#
|
|
3
|
-
# File: .claude/hooks/play-tts-piper.sh
|
|
4
|
-
#
|
|
5
|
-
# AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
|
|
6
|
-
# Website: https://agentvibes.org
|
|
7
|
-
# Repository: https://github.com/paulpreibisch/AgentVibes
|
|
8
|
-
#
|
|
9
|
-
# Co-created by Paul Preibisch with Claude AI
|
|
10
|
-
# Copyright (c) 2025 Paul Preibisch
|
|
11
|
-
#
|
|
12
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
13
|
-
# you may not use this file except in compliance with the License.
|
|
14
|
-
# You may obtain a copy of the License at
|
|
15
|
-
#
|
|
16
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
17
|
-
#
|
|
18
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
19
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
-
# See the License for the specific language governing permissions and
|
|
22
|
-
# limitations under the License.
|
|
23
|
-
#
|
|
24
|
-
# DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
25
|
-
# express or implied. Use at your own risk. See the Apache License for details.
|
|
26
|
-
#
|
|
27
|
-
# ---
|
|
28
|
-
#
|
|
29
|
-
# @fileoverview Piper TTS Provider Implementation - Free, offline neural TTS
|
|
30
|
-
# @context Provides local, privacy-first TTS alternative to cloud services for WSL/Linux
|
|
31
|
-
# @architecture Implements provider interface contract for Piper binary integration
|
|
32
|
-
# @dependencies piper (pipx), piper-voice-manager.sh, mpv/aplay, ffmpeg (optional padding)
|
|
33
|
-
# @entrypoints Called by play-tts.sh router when provider=piper
|
|
34
|
-
# @patterns Provider contract: text/voice โ audio file path, voice auto-download, language-aware synthesis
|
|
35
|
-
# @related play-tts.sh, piper-voice-manager.sh, language-manager.sh, GitHub Issue #25
|
|
36
|
-
#
|
|
37
|
-
|
|
38
|
-
set -eo pipefail
|
|
39
|
-
# Note: -u (nounset) omitted because sourced scripts (piper-voice-manager.sh,
|
|
40
|
-
# language-manager.sh, audio-cache-utils.sh) use unset variables freely.
|
|
41
|
-
# Variables in THIS script use ${VAR:-} defaults for safety.
|
|
42
|
-
|
|
43
|
-
# Cleanup handler for temp files (preserves final output in $TEMP_FILE)
|
|
44
|
-
_CLEANUP_FILES=()
|
|
45
|
-
cleanup() {
|
|
46
|
-
local f
|
|
47
|
-
for f in "${_CLEANUP_FILES[@]+"${_CLEANUP_FILES[@]}"}"; do
|
|
48
|
-
[[ "$f" == "${TEMP_FILE:-}" ]] && continue
|
|
49
|
-
rm -f "$f"
|
|
50
|
-
done
|
|
51
|
-
}
|
|
52
|
-
trap cleanup EXIT
|
|
53
|
-
|
|
54
|
-
# Fix locale warnings
|
|
55
|
-
export LC_ALL=C
|
|
56
|
-
|
|
57
|
-
TEXT="${1:-}"
|
|
58
|
-
VOICE_OVERRIDE="${2:-}" # Optional: voice model name
|
|
59
|
-
AGENT_PROFILE_FILE="${3:-}" # Optional: path to per-agent profile JSON (from bmad-speak.sh)
|
|
60
|
-
|
|
61
|
-
# Strip emojis, asterisks, and markdown formatting that Piper would speak literally
|
|
62
|
-
TEXT=$(printf '%s' "$TEXT" | perl -CSD -pe '
|
|
63
|
-
s/[\x{1F300}-\x{1F9FF}]//g; # emoticons, symbols, pictographs
|
|
64
|
-
s/[\x{2600}-\x{27BF}]//g; # misc symbols, dingbats
|
|
65
|
-
s/[\x{FE00}-\x{FE0F}]//g; # variation selectors
|
|
66
|
-
s/[\x{200D}]//g; # zero-width joiner
|
|
67
|
-
s/[\x{2500}-\x{257F}]//g; # box drawing (โโ etc)
|
|
68
|
-
s/[\x{2580}-\x{259F}]//g; # block elements
|
|
69
|
-
s/\*+//g; # asterisks (bold/italic markdown)
|
|
70
|
-
s/#+\s*//g; # heading markers
|
|
71
|
-
s/`//g; # backticks
|
|
72
|
-
s/~+//g; # strikethrough
|
|
73
|
-
s/^\s*[-]\s*//g; # list dashes
|
|
74
|
-
')
|
|
75
|
-
|
|
76
|
-
# Source voice manager and language manager
|
|
77
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
78
|
-
source "$SCRIPT_DIR/piper-voice-manager.sh"
|
|
79
|
-
source "$SCRIPT_DIR/language-manager.sh"
|
|
80
|
-
source "$SCRIPT_DIR/audio-cache-utils.sh"
|
|
81
|
-
|
|
82
|
-
# Default voice for Piper
|
|
83
|
-
DEFAULT_VOICE="en_US-lessac-medium"
|
|
84
|
-
|
|
85
|
-
# @function determine_voice_model
|
|
86
|
-
# @intent Resolve voice name to Piper model name with language support
|
|
87
|
-
# @why Support voice override, language-specific voices, and default fallback
|
|
88
|
-
# @param Uses global: $VOICE_OVERRIDE
|
|
89
|
-
# @returns Sets $VOICE_MODEL global variable
|
|
90
|
-
# @sideeffects None
|
|
91
|
-
VOICE_MODEL=""
|
|
92
|
-
|
|
93
|
-
# Get current language setting
|
|
94
|
-
CURRENT_LANGUAGE=$(get_language_code)
|
|
95
|
-
|
|
96
|
-
if [[ -n "$VOICE_OVERRIDE" ]]; then
|
|
97
|
-
# Use override if provided
|
|
98
|
-
# Handle multi-speaker format: "Model::SpeakerName" โ split into model + speaker lookup
|
|
99
|
-
if [[ "$VOICE_OVERRIDE" == *"::"* ]]; then
|
|
100
|
-
VOICE_MODEL="${VOICE_OVERRIDE%%::*}"
|
|
101
|
-
_SPEAKER_NAME="${VOICE_OVERRIDE#*::}"
|
|
102
|
-
# Look up speaker ID from the model's .onnx.json speaker_id_map
|
|
103
|
-
voice_dir=$(get_voice_storage_dir)
|
|
104
|
-
_JSON_FILE="$voice_dir/${VOICE_MODEL}.onnx.json"
|
|
105
|
-
if [[ -f "$_JSON_FILE" ]]; then
|
|
106
|
-
# SECURITY: Pass values via env vars to prevent shell injection
|
|
107
|
-
SPEAKER_ID=$(_JSON="$_JSON_FILE" _SPKR="$_SPEAKER_NAME" node -e "
|
|
108
|
-
try {
|
|
109
|
-
const j = JSON.parse(require('fs').readFileSync(process.env._JSON,'utf8'));
|
|
110
|
-
const map = j.speaker_id_map || {};
|
|
111
|
-
const id = map[process.env._SPKR];
|
|
112
|
-
if (id !== undefined) process.stdout.write(String(id));
|
|
113
|
-
} catch {}
|
|
114
|
-
" 2>/dev/null || true)
|
|
115
|
-
fi
|
|
116
|
-
echo "๐ญ Using multi-speaker voice: $VOICE_OVERRIDE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-?})"
|
|
117
|
-
else
|
|
118
|
-
VOICE_MODEL="$VOICE_OVERRIDE"
|
|
119
|
-
echo "๐ค Using voice: $VOICE_OVERRIDE (session-specific)"
|
|
120
|
-
fi
|
|
121
|
-
else
|
|
122
|
-
# Try to get voice from voice file (check CLAUDE_PROJECT_DIR first for MCP context)
|
|
123
|
-
VOICE_FILE=""
|
|
124
|
-
|
|
125
|
-
# Priority order:
|
|
126
|
-
# 1. CLAUDE_PROJECT_DIR env var (set by MCP for project-specific settings)
|
|
127
|
-
# 2. Script location (for direct slash command usage)
|
|
128
|
-
# 3. Global ~/.claude (fallback)
|
|
129
|
-
|
|
130
|
-
# SECURITY: Canonicalize path to prevent traversal (#128)
|
|
131
|
-
if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
|
|
132
|
-
CLAUDE_PROJECT_DIR=$(cd "${CLAUDE_PROJECT_DIR}" 2>/dev/null && pwd -P) || CLAUDE_PROJECT_DIR=""
|
|
133
|
-
fi
|
|
134
|
-
if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]] && [[ -f "$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt" ]]; then
|
|
135
|
-
# MCP context: Use the project directory where MCP was invoked
|
|
136
|
-
VOICE_FILE="$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt"
|
|
137
|
-
elif [[ -f "$SCRIPT_DIR/../tts-voice.txt" ]]; then
|
|
138
|
-
# Direct usage: Use script location
|
|
139
|
-
VOICE_FILE="$SCRIPT_DIR/../tts-voice.txt"
|
|
140
|
-
elif [[ -f "$HOME/.claude/tts-voice.txt" ]]; then
|
|
141
|
-
# Fallback: Use global
|
|
142
|
-
VOICE_FILE="$HOME/.claude/tts-voice.txt"
|
|
143
|
-
fi
|
|
144
|
-
|
|
145
|
-
if [[ -n "$VOICE_FILE" ]]; then
|
|
146
|
-
FILE_VOICE=$(cat "$VOICE_FILE" 2>/dev/null)
|
|
147
|
-
|
|
148
|
-
# Check for multi-speaker voice (model + speaker ID stored separately)
|
|
149
|
-
# Use same directory as VOICE_FILE for consistency
|
|
150
|
-
VOICE_DIR=$(dirname "$VOICE_FILE")
|
|
151
|
-
MODEL_FILE="$VOICE_DIR/tts-piper-model.txt"
|
|
152
|
-
SPEAKER_ID_FILE="$VOICE_DIR/tts-piper-speaker-id.txt"
|
|
153
|
-
|
|
154
|
-
if [[ -f "$MODEL_FILE" ]] && [[ -f "$SPEAKER_ID_FILE" ]]; then
|
|
155
|
-
# Multi-speaker voice config found locally
|
|
156
|
-
VOICE_MODEL=$(cat "$MODEL_FILE" 2>/dev/null)
|
|
157
|
-
SPEAKER_ID=$(cat "$SPEAKER_ID_FILE" 2>/dev/null)
|
|
158
|
-
# Validate speaker ID is numeric
|
|
159
|
-
if [[ -n "$SPEAKER_ID" ]] && ! [[ "$SPEAKER_ID" =~ ^[0-9]+$ ]]; then
|
|
160
|
-
echo "Warning: Invalid speaker ID '$SPEAKER_ID', ignoring" >&2
|
|
161
|
-
SPEAKER_ID=""
|
|
162
|
-
fi
|
|
163
|
-
echo "๐ญ Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-none})"
|
|
164
|
-
# Check if voice uses Model::SpeakerName format (from AgentVibes config)
|
|
165
|
-
elif [[ -n "$FILE_VOICE" ]] && [[ "$FILE_VOICE" == *"::"* ]]; then
|
|
166
|
-
VOICE_MODEL="${FILE_VOICE%%::*}"
|
|
167
|
-
_SPEAKER_NAME="${FILE_VOICE#*::}"
|
|
168
|
-
voice_dir=$(get_voice_storage_dir)
|
|
169
|
-
_JSON_FILE="$voice_dir/${VOICE_MODEL}.onnx.json"
|
|
170
|
-
if [[ -f "$_JSON_FILE" ]]; then
|
|
171
|
-
# SECURITY: Pass values via env vars to prevent shell injection
|
|
172
|
-
SPEAKER_ID=$(_JSON="$_JSON_FILE" _SPKR="$_SPEAKER_NAME" node -e "
|
|
173
|
-
try {
|
|
174
|
-
const j = JSON.parse(require('fs').readFileSync(process.env._JSON,'utf8'));
|
|
175
|
-
const map = j.speaker_id_map || {};
|
|
176
|
-
const id = map[process.env._SPKR];
|
|
177
|
-
if (id !== undefined) process.stdout.write(String(id));
|
|
178
|
-
} catch {}
|
|
179
|
-
" 2>/dev/null || true)
|
|
180
|
-
fi
|
|
181
|
-
echo "๐ญ Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-?})"
|
|
182
|
-
# Standard Piper model name or custom voice (just use as-is)
|
|
183
|
-
elif [[ -n "$FILE_VOICE" ]]; then
|
|
184
|
-
# Strip multi-speaker suffix if present (model::SpeakerName-Label)
|
|
185
|
-
if [[ "$FILE_VOICE" == *"::"* ]]; then
|
|
186
|
-
VOICE_MODEL="${FILE_VOICE%%::*}"
|
|
187
|
-
else
|
|
188
|
-
VOICE_MODEL="$FILE_VOICE"
|
|
189
|
-
fi
|
|
190
|
-
fi
|
|
191
|
-
fi
|
|
192
|
-
|
|
193
|
-
# If no Piper voice from file, try language-specific voice
|
|
194
|
-
if [[ -z "$VOICE_MODEL" ]]; then
|
|
195
|
-
LANG_VOICE=$(get_voice_for_language "$CURRENT_LANGUAGE" "piper" 2>/dev/null)
|
|
196
|
-
|
|
197
|
-
if [[ -n "$LANG_VOICE" ]]; then
|
|
198
|
-
VOICE_MODEL="$LANG_VOICE"
|
|
199
|
-
echo "๐ Using $CURRENT_LANGUAGE voice: $LANG_VOICE (Piper)"
|
|
200
|
-
else
|
|
201
|
-
# Use default voice
|
|
202
|
-
VOICE_MODEL="$DEFAULT_VOICE"
|
|
203
|
-
fi
|
|
204
|
-
fi
|
|
205
|
-
fi
|
|
206
|
-
|
|
207
|
-
# @function validate_inputs
|
|
208
|
-
# @intent Check required parameters
|
|
209
|
-
# @why Fail fast with clear errors if inputs missing
|
|
210
|
-
# @exitcode 1=missing text, 2=missing piper binary
|
|
211
|
-
if [[ -z "$TEXT" ]]; then
|
|
212
|
-
echo "Usage: $0 \"text to speak\" [voice_model_name]"
|
|
213
|
-
exit 1
|
|
214
|
-
fi
|
|
215
|
-
|
|
216
|
-
# Check if Piper is installed
|
|
217
|
-
if ! command -v piper &> /dev/null; then
|
|
218
|
-
echo "โ Error: Piper TTS not installed"
|
|
219
|
-
echo "Install with: pipx install piper-tts"
|
|
220
|
-
echo "Or run: .claude/hooks/piper-installer.sh"
|
|
221
|
-
exit 2
|
|
222
|
-
fi
|
|
223
|
-
|
|
224
|
-
# @function ensure_voice_downloaded
|
|
225
|
-
# @intent Download voice model if not cached
|
|
226
|
-
# @why Provide seamless experience with automatic downloads
|
|
227
|
-
# @param Uses global: $VOICE_MODEL
|
|
228
|
-
# @sideeffects Downloads voice model files
|
|
229
|
-
# @edgecases Prompts user for consent before downloading, skipped in test mode
|
|
230
|
-
if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && ! verify_voice "$VOICE_MODEL"; then
|
|
231
|
-
echo "๐ฅ Voice model not found: $VOICE_MODEL"
|
|
232
|
-
echo " File size: ~25MB"
|
|
233
|
-
echo " Preview: https://huggingface.co/rhasspy/piper-voices"
|
|
234
|
-
echo ""
|
|
235
|
-
read -p " Download this voice model? [y/N]: " -n 1 -r
|
|
236
|
-
echo
|
|
237
|
-
|
|
238
|
-
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
|
239
|
-
if ! download_voice "$VOICE_MODEL"; then
|
|
240
|
-
echo "โ Failed to download voice model"
|
|
241
|
-
echo "Fix: Download manually or choose different voice"
|
|
242
|
-
exit 3
|
|
243
|
-
fi
|
|
244
|
-
else
|
|
245
|
-
echo "โ Voice download cancelled"
|
|
246
|
-
exit 3
|
|
247
|
-
fi
|
|
248
|
-
fi
|
|
249
|
-
|
|
250
|
-
# Get voice model path
|
|
251
|
-
# In test mode, use a fake path since we have mock piper that doesn't need real files
|
|
252
|
-
if [[ "${AGENTVIBES_TEST_MODE:-false}" == "true" ]]; then
|
|
253
|
-
VOICE_PATH="/tmp/mock-voice-${VOICE_MODEL}.onnx"
|
|
254
|
-
else
|
|
255
|
-
VOICE_PATH=$(get_voice_path "$VOICE_MODEL")
|
|
256
|
-
if [[ $? -ne 0 ]]; then
|
|
257
|
-
echo "โ Voice model path not found: $VOICE_MODEL"
|
|
258
|
-
exit 3
|
|
259
|
-
fi
|
|
260
|
-
fi
|
|
261
|
-
|
|
262
|
-
# @function determine_audio_directory
|
|
263
|
-
# @intent Find appropriate directory for audio file storage
|
|
264
|
-
# @why Supports project-local and global storage
|
|
265
|
-
# @returns Sets $AUDIO_DIR global variable
|
|
266
|
-
if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
|
|
267
|
-
AUDIO_DIR="$CLAUDE_PROJECT_DIR/.claude/audio"
|
|
268
|
-
else
|
|
269
|
-
# Fallback: try to find .claude directory in current path
|
|
270
|
-
CURRENT_DIR="$PWD"
|
|
271
|
-
while [[ "$CURRENT_DIR" != "/" ]]; do
|
|
272
|
-
if [[ -d "$CURRENT_DIR/.claude" ]]; then
|
|
273
|
-
AUDIO_DIR="$CURRENT_DIR/.claude/audio"
|
|
274
|
-
break
|
|
275
|
-
fi
|
|
276
|
-
CURRENT_DIR=$(dirname "$CURRENT_DIR")
|
|
277
|
-
done
|
|
278
|
-
# Final fallback to global if no project .claude found
|
|
279
|
-
if [[ -z "${AUDIO_DIR:-}" ]]; then
|
|
280
|
-
AUDIO_DIR="$HOME/.claude/audio"
|
|
281
|
-
fi
|
|
282
|
-
fi
|
|
283
|
-
|
|
284
|
-
mkdir -p "$AUDIO_DIR"
|
|
285
|
-
TEMP_FILE=$(mktemp "$AUDIO_DIR/tts-XXXXXX.wav")
|
|
286
|
-
|
|
287
|
-
# @function get_speech_rate
|
|
288
|
-
# @intent Determine speech rate for Piper synthesis
|
|
289
|
-
# @why Convert user-facing speed (0.5=slower, 2.0=faster) to Piper length-scale (inverted)
|
|
290
|
-
# @returns Piper length-scale value (inverted from user scale)
|
|
291
|
-
# @note Piper uses length-scale where higher=slower, opposite of user expectation
|
|
292
|
-
get_speech_rate() {
|
|
293
|
-
local target_config=""
|
|
294
|
-
local main_config=""
|
|
295
|
-
|
|
296
|
-
# Check for target-specific config first (new and legacy paths)
|
|
297
|
-
if [[ -f "$SCRIPT_DIR/../config/tts-target-speech-rate.txt" ]]; then
|
|
298
|
-
target_config="$SCRIPT_DIR/../config/tts-target-speech-rate.txt"
|
|
299
|
-
elif [[ -f "$HOME/.claude/config/tts-target-speech-rate.txt" ]]; then
|
|
300
|
-
target_config="$HOME/.claude/config/tts-target-speech-rate.txt"
|
|
301
|
-
elif [[ -f "$SCRIPT_DIR/../config/piper-target-speech-rate.txt" ]]; then
|
|
302
|
-
target_config="$SCRIPT_DIR/../config/piper-target-speech-rate.txt"
|
|
303
|
-
elif [[ -f "$HOME/.claude/config/piper-target-speech-rate.txt" ]]; then
|
|
304
|
-
target_config="$HOME/.claude/config/piper-target-speech-rate.txt"
|
|
305
|
-
fi
|
|
306
|
-
|
|
307
|
-
# Check for main config (new and legacy paths)
|
|
308
|
-
if [[ -f "$SCRIPT_DIR/../config/tts-speech-rate.txt" ]]; then
|
|
309
|
-
main_config="$SCRIPT_DIR/../config/tts-speech-rate.txt"
|
|
310
|
-
elif [[ -f "$HOME/.claude/config/tts-speech-rate.txt" ]]; then
|
|
311
|
-
main_config="$HOME/.claude/config/tts-speech-rate.txt"
|
|
312
|
-
elif [[ -f "$SCRIPT_DIR/../config/piper-speech-rate.txt" ]]; then
|
|
313
|
-
main_config="$SCRIPT_DIR/../config/piper-speech-rate.txt"
|
|
314
|
-
elif [[ -f "$HOME/.claude/config/piper-speech-rate.txt" ]]; then
|
|
315
|
-
main_config="$HOME/.claude/config/piper-speech-rate.txt"
|
|
316
|
-
fi
|
|
317
|
-
|
|
318
|
-
# If this is a non-English voice and target config exists, use it
|
|
319
|
-
if [[ "$CURRENT_LANGUAGE" != "english" ]] && [[ -n "$target_config" ]]; then
|
|
320
|
-
local user_speed=$(cat "$target_config" 2>/dev/null)
|
|
321
|
-
# Validate speed is a positive number
|
|
322
|
-
if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
|
|
323
|
-
echo "1.0"
|
|
324
|
-
return
|
|
325
|
-
fi
|
|
326
|
-
# Convert user speed to Piper length-scale (invert)
|
|
327
|
-
# User: 0.5=slower, 1.0=normal, 2.0=faster
|
|
328
|
-
# Piper: 2.0=slower, 1.0=normal, 0.5=faster
|
|
329
|
-
# Formula: piper_length_scale = 1.0 / user_speed
|
|
330
|
-
echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
|
|
331
|
-
return
|
|
332
|
-
fi
|
|
333
|
-
|
|
334
|
-
# Otherwise use main config if available
|
|
335
|
-
if [[ -n "$main_config" ]]; then
|
|
336
|
-
local user_speed=$(grep -v '^#' "$main_config" 2>/dev/null | grep -v '^$' | tail -1)
|
|
337
|
-
# Validate speed is a positive number
|
|
338
|
-
if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
|
|
339
|
-
echo "1.0"
|
|
340
|
-
return
|
|
341
|
-
fi
|
|
342
|
-
echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
|
|
343
|
-
return
|
|
344
|
-
fi
|
|
345
|
-
|
|
346
|
-
# Default: 1.0 (normal) for English, 2.0 (slower) for learning
|
|
347
|
-
if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
|
|
348
|
-
echo "2.0"
|
|
349
|
-
else
|
|
350
|
-
echo "1.0"
|
|
351
|
-
fi
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
SPEECH_RATE=$(get_speech_rate)
|
|
355
|
-
|
|
356
|
-
# @function synthesize_with_piper
|
|
357
|
-
# @intent Generate speech using Piper TTS
|
|
358
|
-
# @why Provides free, offline TTS alternative
|
|
359
|
-
# @param Uses globals: $TEXT, $VOICE_PATH, $SPEECH_RATE, $SPEAKER_ID (optional)
|
|
360
|
-
# @returns Creates WAV file at $TEMP_FILE
|
|
361
|
-
# @exitcode 0=success, 4=synthesis error
|
|
362
|
-
# @sideeffects Creates audio file
|
|
363
|
-
# @edgecases Handles piper errors, invalid models, multi-speaker voices
|
|
364
|
-
if [[ -n "${SPEAKER_ID:-}" ]]; then
|
|
365
|
-
# Multi-speaker voice: Pass speaker ID
|
|
366
|
-
# SECURITY: Use printf instead of echo for pipe safety (#134)
|
|
367
|
-
printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --speaker "$SPEAKER_ID" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
|
|
368
|
-
else
|
|
369
|
-
# Single-speaker voice
|
|
370
|
-
printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
|
|
371
|
-
fi
|
|
372
|
-
|
|
373
|
-
if [[ ! -f "$TEMP_FILE" ]] || [[ ! -s "$TEMP_FILE" ]]; then
|
|
374
|
-
echo "โ Failed to synthesize speech with Piper"
|
|
375
|
-
echo "Voice model: $VOICE_MODEL"
|
|
376
|
-
echo "Check that voice model is valid"
|
|
377
|
-
exit 4
|
|
378
|
-
fi
|
|
379
|
-
|
|
380
|
-
# @function detect_remote_session
|
|
381
|
-
# @intent Auto-detect SSH/RDP sessions and enable audio compression
|
|
382
|
-
# @why Remote desktop audio is choppy without compression
|
|
383
|
-
# @returns Sets AGENTVIBES_RDP_MODE environment variable
|
|
384
|
-
# @detection Checks SSH_CLIENT, SSH_TTY, and DISPLAY variables
|
|
385
|
-
if [[ -z "${AGENTVIBES_RDP_MODE:-}" ]]; then
|
|
386
|
-
# Auto-detect remote session
|
|
387
|
-
if [[ -n "${SSH_CLIENT:-}" ]] || [[ -n "${SSH_TTY:-}" ]] || [[ "${DISPLAY:-}" =~ ^localhost:.* ]]; then
|
|
388
|
-
export AGENTVIBES_RDP_MODE=true
|
|
389
|
-
echo "๐ Remote session detected - enabling audio compression"
|
|
390
|
-
fi
|
|
391
|
-
fi
|
|
392
|
-
|
|
393
|
-
# @function compress_for_remote
|
|
394
|
-
# @intent Compress TTS audio for remote sessions (SSH/RDP)
|
|
395
|
-
# @why Reduces bandwidth and prevents choppy playback
|
|
396
|
-
# @param Uses global: $TEMP_FILE, $AGENTVIBES_RDP_MODE
|
|
397
|
-
# @returns Updates $TEMP_FILE to compressed version
|
|
398
|
-
# @sideeffects Converts to mono 22kHz for lower bandwidth
|
|
399
|
-
if [[ "${AGENTVIBES_RDP_MODE:-false}" == "true" ]] && command -v ffmpeg &> /dev/null; then
|
|
400
|
-
COMPRESSED_FILE=$(mktemp "$AUDIO_DIR/tts-compressed-XXXXXX.wav")
|
|
401
|
-
_CLEANUP_FILES+=("$COMPRESSED_FILE")
|
|
402
|
-
# Convert to mono, 22kHz, 64kbps for remote sessions
|
|
403
|
-
ffmpeg -i "$TEMP_FILE" -ac 1 -ar 22050 -b:a 64k -y "$COMPRESSED_FILE" 2>/dev/null
|
|
404
|
-
|
|
405
|
-
if [[ -f "$COMPRESSED_FILE" ]]; then
|
|
406
|
-
rm -f "$TEMP_FILE"
|
|
407
|
-
TEMP_FILE="$COMPRESSED_FILE"
|
|
408
|
-
fi
|
|
409
|
-
fi
|
|
410
|
-
|
|
411
|
-
# @function add_silence_padding
|
|
412
|
-
# @intent Add silence to prevent WSL audio static
|
|
413
|
-
# @why WSL audio subsystem cuts off first ~200ms
|
|
414
|
-
# @param Uses global: $TEMP_FILE
|
|
415
|
-
# @returns Updates $TEMP_FILE to padded version
|
|
416
|
-
# @sideeffects Modifies audio file
|
|
417
|
-
# AI NOTE: Use ffmpeg if available, otherwise skip padding (degraded experience)
|
|
418
|
-
if command -v ffmpeg &> /dev/null; then
|
|
419
|
-
PADDED_FILE=$(mktemp "$AUDIO_DIR/tts-padded-XXXXXX.wav")
|
|
420
|
-
_CLEANUP_FILES+=("$PADDED_FILE")
|
|
421
|
-
# Add 200ms of silence at the beginning
|
|
422
|
-
ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo:d=0.2 -i "$TEMP_FILE" \
|
|
423
|
-
-filter_complex "[0:a][1:a]concat=n=2:v=0:a=1[out]" \
|
|
424
|
-
-map "[out]" -y "$PADDED_FILE" 2>/dev/null
|
|
425
|
-
|
|
426
|
-
if [[ -f "$PADDED_FILE" ]]; then
|
|
427
|
-
rm -f "$TEMP_FILE"
|
|
428
|
-
TEMP_FILE="$PADDED_FILE"
|
|
429
|
-
fi
|
|
430
|
-
fi
|
|
431
|
-
|
|
432
|
-
# @function apply_audio_effects
|
|
433
|
-
# @intent Apply sox effects and background music via audio-processor.sh
|
|
434
|
-
# @param Uses global: $TEMP_FILE
|
|
435
|
-
# @returns Updates $TEMP_FILE to processed version, sets $BACKGROUND_MUSIC if used
|
|
436
|
-
# @sideeffects Applies audio effects and background music
|
|
437
|
-
BACKGROUND_MUSIC=""
|
|
438
|
-
if [[ -f "$SCRIPT_DIR/audio-processor.sh" ]]; then
|
|
439
|
-
PROCESSED_FILE=$(mktemp "$AUDIO_DIR/tts-processed-XXXXXX.wav")
|
|
440
|
-
_CLEANUP_FILES+=("$PROCESSED_FILE")
|
|
441
|
-
# audio-processor.sh returns: FILE_PATH|BACKGROUND_FILE
|
|
442
|
-
PROCESSOR_OUTPUT=$("$SCRIPT_DIR/audio-processor.sh" "$TEMP_FILE" "default" "$PROCESSED_FILE" "$AGENT_PROFILE_FILE" 2>/dev/null) || {
|
|
443
|
-
echo "Warning: Audio processing failed, using unprocessed audio" >&2
|
|
444
|
-
PROCESSED_FILE="$TEMP_FILE"
|
|
445
|
-
PROCESSOR_OUTPUT="$TEMP_FILE|"
|
|
446
|
-
}
|
|
447
|
-
|
|
448
|
-
# Parse output: FILE|BACKGROUND
|
|
449
|
-
PROCESSED_FILE="${PROCESSOR_OUTPUT%%|*}"
|
|
450
|
-
BACKGROUND_MUSIC="${PROCESSOR_OUTPUT##*|}"
|
|
451
|
-
|
|
452
|
-
if [[ -f "$PROCESSED_FILE" ]] && [[ "$PROCESSED_FILE" != "$TEMP_FILE" ]]; then
|
|
453
|
-
rm -f "$TEMP_FILE"
|
|
454
|
-
TEMP_FILE="$PROCESSED_FILE"
|
|
455
|
-
fi
|
|
456
|
-
fi
|
|
457
|
-
|
|
458
|
-
# @function play_audio
|
|
459
|
-
# @intent Play generated audio using available player with sequential playback
|
|
460
|
-
# @why Support multiple audio players and prevent overlapping audio in learning mode
|
|
461
|
-
# @param Uses global: $TEMP_FILE, $CURRENT_LANGUAGE
|
|
462
|
-
# @sideeffects Plays audio with lock mechanism for sequential playback
|
|
463
|
-
_LOCK_DIR="${XDG_RUNTIME_DIR:-/tmp/agentvibes-$(id -u)}"
|
|
464
|
-
mkdir -p "$_LOCK_DIR"
|
|
465
|
-
chmod 700 "$_LOCK_DIR"
|
|
466
|
-
LOCK_FILE="$_LOCK_DIR/agentvibes-audio.lock"
|
|
467
|
-
|
|
468
|
-
# Auto-remove stale lock files (older than 30 seconds) to prevent permanent blocking
|
|
469
|
-
# This handles cases where the background cleanup process was killed mid-playback
|
|
470
|
-
if [ -f "$LOCK_FILE" ]; then
|
|
471
|
-
_lock_age=0
|
|
472
|
-
if [[ "$(uname)" == "Darwin" ]]; then
|
|
473
|
-
_lock_mtime=$(stat -f %m "$LOCK_FILE" 2>/dev/null || echo 0)
|
|
474
|
-
else
|
|
475
|
-
_lock_mtime=$(stat -c %Y "$LOCK_FILE" 2>/dev/null || echo 0)
|
|
476
|
-
fi
|
|
477
|
-
_now=$(date +%s)
|
|
478
|
-
_lock_age=$((_now - _lock_mtime))
|
|
479
|
-
if [[ $_lock_age -gt 30 ]]; then
|
|
480
|
-
rm -f "$LOCK_FILE"
|
|
481
|
-
fi
|
|
482
|
-
fi
|
|
483
|
-
|
|
484
|
-
# Wait for previous audio to finish (max 2 seconds to prevent blocking)
|
|
485
|
-
for i in {1..4}; do
|
|
486
|
-
if [ ! -f "$LOCK_FILE" ]; then
|
|
487
|
-
break
|
|
488
|
-
fi
|
|
489
|
-
sleep 0.5
|
|
490
|
-
done
|
|
491
|
-
|
|
492
|
-
# If still locked after 2 seconds, skip this TTS to prevent blocking Claude
|
|
493
|
-
if [ -f "$LOCK_FILE" ]; then
|
|
494
|
-
echo "โญ๏ธ Skipping TTS (previous audio still playing)" >&2
|
|
495
|
-
exit 0
|
|
496
|
-
fi
|
|
497
|
-
|
|
498
|
-
# Track last target language audio for replay command
|
|
499
|
-
if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
|
|
500
|
-
TARGET_AUDIO_FILE="${CLAUDE_PROJECT_DIR:-${HOME}}/.claude/last-target-audio.txt"
|
|
501
|
-
echo "$TEMP_FILE" > "$TARGET_AUDIO_FILE"
|
|
502
|
-
fi
|
|
503
|
-
|
|
504
|
-
# Create lock and play audio
|
|
505
|
-
touch "$LOCK_FILE"
|
|
506
|
-
|
|
507
|
-
# Create write lock file in audio directory to signal file is in-use (prevents race condition in cleanup)
|
|
508
|
-
_TEMP_DIR="${TEMP_FILE%/*}"
|
|
509
|
-
WRITE_LOCK_FILE="$_TEMP_DIR/$(basename "$TEMP_FILE" .wav).lock"
|
|
510
|
-
touch "$WRITE_LOCK_FILE"
|
|
511
|
-
_CLEANUP_FILES+=("$LOCK_FILE" "$WRITE_LOCK_FILE")
|
|
512
|
-
|
|
513
|
-
# Get audio duration for proper lock timing
|
|
514
|
-
DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null || true)
|
|
515
|
-
DURATION=${DURATION%.*} # Round to integer
|
|
516
|
-
# SECURITY: Validate duration is numeric (#134)
|
|
517
|
-
if ! [[ "${DURATION:-}" =~ ^[0-9]+$ ]]; then
|
|
518
|
-
DURATION=1
|
|
519
|
-
fi
|
|
520
|
-
|
|
521
|
-
# Play audio (skip if in test mode or no-playback mode)
|
|
522
|
-
# AGENTVIBES_NO_PLAYBACK: Set to "true" to generate audio without playing (for post-processing)
|
|
523
|
-
PLAYER_PID=""
|
|
524
|
-
if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && [[ "${AGENTVIBES_NO_PLAYBACK:-false}" != "true" ]]; then
|
|
525
|
-
# Detect platform and use appropriate audio player
|
|
526
|
-
if [[ "$(uname -s)" == "Darwin" ]]; then
|
|
527
|
-
# macOS: Use afplay (native macOS audio player)
|
|
528
|
-
afplay "$TEMP_FILE" >/dev/null 2>&1 &
|
|
529
|
-
PLAYER_PID=$!
|
|
530
|
-
elif [[ -n "${TERMUX_VERSION:-}" ]] || [[ -d "/data/data/com.termux" ]]; then
|
|
531
|
-
# Android/Termux: Use termux-media-player
|
|
532
|
-
termux-media-player play "$TEMP_FILE" >/dev/null 2>&1 &
|
|
533
|
-
PLAYER_PID=$!
|
|
534
|
-
else
|
|
535
|
-
# Linux/WSL: Prefer paplay (PulseAudio) for best WSL audio quality
|
|
536
|
-
(paplay "$TEMP_FILE" || mpv "$TEMP_FILE" || aplay "$TEMP_FILE") >/dev/null 2>&1 &
|
|
537
|
-
PLAYER_PID=$!
|
|
538
|
-
fi
|
|
539
|
-
fi
|
|
540
|
-
|
|
541
|
-
# Wait for audio to finish, then release locks (both global and write lock)
|
|
542
|
-
(sleep $DURATION; rm -f "$LOCK_FILE" "$WRITE_LOCK_FILE") &
|
|
543
|
-
disown
|
|
544
|
-
|
|
545
|
-
# Get audio cache path
|
|
546
|
-
AUDIO_DIR_PATH=$(get_audio_dir)
|
|
547
|
-
|
|
548
|
-
# Color codes (safe to use โ WAV path is passed via AGENTVIBES_WAV_OUTPATH, not parsed from stdout)
|
|
549
|
-
BLUE='\033[0;34m'
|
|
550
|
-
YELLOW='\033[1;33m'
|
|
551
|
-
PURPLE='\033[0;35m'
|
|
552
|
-
RED='\033[0;31m'
|
|
553
|
-
GREEN='\033[0;32m'
|
|
554
|
-
ORANGE='\033[0;33m'
|
|
555
|
-
WHITE='\033[1;37m'
|
|
556
|
-
CYAN='\033[0;36m'
|
|
557
|
-
GOLD='\033[38;5;226m'
|
|
558
|
-
NC='\033[0m'
|
|
559
|
-
|
|
560
|
-
# Check if banner is enabled (default: on)
|
|
561
|
-
_BANNER_ENABLED=true
|
|
562
|
-
if [[ -f "$HOME/.agentvibes/banner-disabled" ]]; then
|
|
563
|
-
_BANNER_ENABLED=false
|
|
564
|
-
elif [[ -f "${PROJECT_ROOT:-/nonexistent}/.agentvibes/banner-disabled" ]]; then
|
|
565
|
-
_BANNER_ENABLED=false
|
|
566
|
-
fi
|
|
567
|
-
|
|
568
|
-
# Run auto-cleanup off the critical path: only every 10th call, in background after playback starts.
|
|
569
|
-
# Counter file lives in the secure lock dir (user-specific, already created above).
|
|
570
|
-
AUTO_CLEAN_THRESHOLD=$(get_auto_clean_threshold)
|
|
571
|
-
_CALL_COUNTER_FILE="$_LOCK_DIR/agentvibes-tts-call-count"
|
|
572
|
-
_CALL_COUNT=$(cat "$_CALL_COUNTER_FILE" 2>/dev/null || echo "0")
|
|
573
|
-
# SECURITY: Validate counter is numeric before arithmetic
|
|
574
|
-
if ! [[ "$_CALL_COUNT" =~ ^[0-9]+$ ]]; then _CALL_COUNT=0; fi
|
|
575
|
-
_CALL_COUNT=$((_CALL_COUNT + 1))
|
|
576
|
-
echo "$_CALL_COUNT" > "$_CALL_COUNTER_FILE"
|
|
577
|
-
|
|
578
|
-
if (( _CALL_COUNT % 10 == 0 )); then
|
|
579
|
-
# Capture values needed inside the subshell before forking
|
|
580
|
-
_CLEANUP_AUDIO_DIR="$AUDIO_DIR_PATH"
|
|
581
|
-
_CLEANUP_THRESHOLD="$AUTO_CLEAN_THRESHOLD"
|
|
582
|
-
_CLEANUP_BANNER="$_BANNER_ENABLED"
|
|
583
|
-
# Source the utils inside the subshell (functions are not exported)
|
|
584
|
-
_CLEANUP_UTILS="$SCRIPT_DIR/audio-cache-utils.sh"
|
|
585
|
-
(
|
|
586
|
-
source "$_CLEANUP_UTILS" 2>/dev/null || exit 0
|
|
587
|
-
_INITIAL_SIZE=$(calculate_tts_size_bytes "$_CLEANUP_AUDIO_DIR")
|
|
588
|
-
if [[ $_INITIAL_SIZE -gt $((_CLEANUP_THRESHOLD * 1048576)) ]]; then
|
|
589
|
-
_DELETED=$(auto_clean_old_files "$_CLEANUP_AUDIO_DIR" "$_CLEANUP_THRESHOLD")
|
|
590
|
-
if [[ ${_DELETED:-0} -gt 0 ]] && [[ "$_CLEANUP_BANNER" == "true" ]]; then
|
|
591
|
-
echo -e "\033[0;33m๐งน Auto-cleaned $_DELETED old files\033[0m"
|
|
592
|
-
fi
|
|
593
|
-
fi
|
|
594
|
-
) &
|
|
595
|
-
disown
|
|
596
|
-
fi
|
|
597
|
-
|
|
598
|
-
# Write output path for play-tts-enhanced.sh (avoids stdout parsing โ colors are safe)
|
|
599
|
-
if [[ -n "${AGENTVIBES_WAV_OUTPATH:-}" ]]; then
|
|
600
|
-
echo "$TEMP_FILE" > "$AGENTVIBES_WAV_OUTPATH"
|
|
601
|
-
fi
|
|
602
|
-
|
|
603
|
-
if [[ "$_BANNER_ENABLED" == "true" ]]; then
|
|
604
|
-
FILE_COUNT=$(count_tts_files "$AUDIO_DIR_PATH")
|
|
605
|
-
SIZE_BYTES=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
|
|
606
|
-
SIZE_HUMAN=$(bytes_to_human "$SIZE_BYTES")
|
|
607
|
-
|
|
608
|
-
# Dynamic color coding based on cache size
|
|
609
|
-
CACHE_COLOR=$GREEN
|
|
610
|
-
if [[ $SIZE_BYTES -gt 3221225472 ]]; then
|
|
611
|
-
CACHE_COLOR=$RED
|
|
612
|
-
elif [[ $SIZE_BYTES -gt 524288000 ]]; then
|
|
613
|
-
CACHE_COLOR=$YELLOW
|
|
614
|
-
fi
|
|
615
|
-
|
|
616
|
-
echo -e "${WHITE}๐พ Saved to:${NC} ${CYAN}$TEMP_FILE${NC} ${YELLOW}$FILE_COUNT${NC} ${WHITE}๐๏ธ${NC} ${CACHE_COLOR}$SIZE_HUMAN${NC} ${WHITE}๐งน${NC}${GOLD}[${AUTO_CLEAN_THRESHOLD}mb]${NC}"
|
|
617
|
-
|
|
618
|
-
if [[ -n "$BACKGROUND_MUSIC" ]]; then
|
|
619
|
-
echo -e "${WHITE}๐ต Background music:${NC} ${PURPLE}$(basename "$BACKGROUND_MUSIC")${NC}"
|
|
620
|
-
fi
|
|
621
|
-
if [[ -n "${SPEAKER_ID:-}" ]] && [[ -n "${FILE_VOICE:-}" ]]; then
|
|
622
|
-
echo -e "${WHITE}๐ค Voice used:${NC} ${BLUE}$FILE_VOICE${NC} ${WHITE}(Piper TTS)${NC}"
|
|
623
|
-
else
|
|
624
|
-
echo -e "${WHITE}๐ค Voice used:${NC} ${BLUE}$VOICE_MODEL${NC} ${WHITE}(Piper TTS)${NC}"
|
|
625
|
-
fi
|
|
626
|
-
|
|
627
|
-
PERSONALITY=$(cat "${PROJECT_ROOT:-/nonexistent}/.claude/tts-personality.txt" 2>/dev/null || cat "$HOME/.claude/tts-personality.txt" 2>/dev/null || echo "")
|
|
628
|
-
if [[ -n "$PERSONALITY" ]] && [[ "$PERSONALITY" != "none" ]] && [[ "$PERSONALITY" != "normal" ]]; then
|
|
629
|
-
echo -e "${WHITE}๐ซ Personality:${NC} ${YELLOW}$PERSONALITY${NC}"
|
|
630
|
-
fi
|
|
631
|
-
|
|
632
|
-
echo -e "\033[38;5;240mSay: \"Turn off banner\" to hide this output\033[0m"
|
|
633
|
-
fi
|
|
634
|
-
|
|
635
|
-
# Check audio folder size and warn if getting large
|
|
636
|
-
if [[ "$_BANNER_ENABLED" == "true" ]] && [[ -d "$AUDIO_DIR_PATH" ]]; then
|
|
637
|
-
AUDIO_SIZE=$(du -sm "$AUDIO_DIR_PATH" 2>/dev/null | cut -f1)
|
|
638
|
-
if [[ -n "$AUDIO_SIZE" ]] && [[ "$AUDIO_SIZE" -gt 100 ]]; then
|
|
639
|
-
echo -e "\033[0;31mโ ๏ธ Audio cache is ${AUDIO_SIZE}MB - Run: /agent-vibes:cleanup\033[0m"
|
|
640
|
-
fi
|
|
641
|
-
fi
|
|
642
|
-
|
|
643
|
-
# Show status indicators
|
|
644
|
-
GLOBAL_MUTE_FILE="$HOME/.agentvibes-muted"
|
|
645
|
-
PROJECT_MUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-muted"
|
|
646
|
-
PROJECT_UNMUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-unmuted"
|
|
647
|
-
BACKGROUND_ENABLED_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/config/background-music-enabled.txt"
|
|
648
|
-
GLOBAL_BACKGROUND_ENABLED_FILE="$HOME/.claude/config/background-music-enabled.txt"
|
|
649
|
-
|
|
650
|
-
# Mute status indicator
|
|
651
|
-
if [[ -f "$PROJECT_UNMUTE_FILE" ]] && [[ -f "$GLOBAL_MUTE_FILE" ]]; then
|
|
652
|
-
echo "๐ Status: Unmuted (project overrides global mute)"
|
|
653
|
-
elif [[ -f "$PROJECT_MUTE_FILE" ]]; then
|
|
654
|
-
echo "๐ Status: Muted (project)"
|
|
655
|
-
elif [[ -f "$GLOBAL_MUTE_FILE" ]]; then
|
|
656
|
-
echo "๐ Status: Would be muted (global) - but this project is speaking"
|
|
657
|
-
fi
|
|
658
|
-
|
|
659
|
-
# Background music status indicator
|
|
660
|
-
if [[ -z "$BACKGROUND_MUSIC" ]]; then
|
|
661
|
-
_bg_enabled=false
|
|
662
|
-
if [[ -f "$BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$BACKGROUND_ENABLED_FILE" 2>/dev/null; then
|
|
663
|
-
_bg_enabled=true
|
|
664
|
-
elif [[ -f "$GLOBAL_BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$GLOBAL_BACKGROUND_ENABLED_FILE" 2>/dev/null; then
|
|
665
|
-
_bg_enabled=true
|
|
666
|
-
fi
|
|
667
|
-
if [[ "$_bg_enabled" == "true" ]]; then
|
|
668
|
-
echo "๐ต Background music: Enabled but not playing (check config)"
|
|
669
|
-
else
|
|
670
|
-
echo "๐ต Background music: Disabled"
|
|
671
|
-
fi
|
|
672
|
-
fi
|
|
673
|
-
|
|
674
|
-
# Wait for audio player to finish before returning.
|
|
675
|
-
# This keeps the bmad-speak.sh speech lock held until playback is actually done,
|
|
676
|
-
# preventing party-mode agents from talking over each other.
|
|
677
|
-
if [[ -n "$PLAYER_PID" ]]; then
|
|
678
|
-
wait "$PLAYER_PID" 2>/dev/null || true
|
|
679
|
-
fi
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# File: .claude/hooks/play-tts-piper.sh
|
|
4
|
+
#
|
|
5
|
+
# AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
|
|
6
|
+
# Website: https://agentvibes.org
|
|
7
|
+
# Repository: https://github.com/paulpreibisch/AgentVibes
|
|
8
|
+
#
|
|
9
|
+
# Co-created by Paul Preibisch with Claude AI
|
|
10
|
+
# Copyright (c) 2025 Paul Preibisch
|
|
11
|
+
#
|
|
12
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
13
|
+
# you may not use this file except in compliance with the License.
|
|
14
|
+
# You may obtain a copy of the License at
|
|
15
|
+
#
|
|
16
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
17
|
+
#
|
|
18
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
# See the License for the specific language governing permissions and
|
|
22
|
+
# limitations under the License.
|
|
23
|
+
#
|
|
24
|
+
# DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
25
|
+
# express or implied. Use at your own risk. See the Apache License for details.
|
|
26
|
+
#
|
|
27
|
+
# ---
|
|
28
|
+
#
|
|
29
|
+
# @fileoverview Piper TTS Provider Implementation - Free, offline neural TTS
|
|
30
|
+
# @context Provides local, privacy-first TTS alternative to cloud services for WSL/Linux
|
|
31
|
+
# @architecture Implements provider interface contract for Piper binary integration
|
|
32
|
+
# @dependencies piper (pipx), piper-voice-manager.sh, mpv/aplay, ffmpeg (optional padding)
|
|
33
|
+
# @entrypoints Called by play-tts.sh router when provider=piper
|
|
34
|
+
# @patterns Provider contract: text/voice โ audio file path, voice auto-download, language-aware synthesis
|
|
35
|
+
# @related play-tts.sh, piper-voice-manager.sh, language-manager.sh, GitHub Issue #25
|
|
36
|
+
#
|
|
37
|
+
|
|
38
|
+
set -eo pipefail
|
|
39
|
+
# Note: -u (nounset) omitted because sourced scripts (piper-voice-manager.sh,
|
|
40
|
+
# language-manager.sh, audio-cache-utils.sh) use unset variables freely.
|
|
41
|
+
# Variables in THIS script use ${VAR:-} defaults for safety.
|
|
42
|
+
|
|
43
|
+
# Cleanup handler for temp files (preserves final output in $TEMP_FILE)
|
|
44
|
+
_CLEANUP_FILES=()
|
|
45
|
+
cleanup() {
|
|
46
|
+
local f
|
|
47
|
+
for f in "${_CLEANUP_FILES[@]+"${_CLEANUP_FILES[@]}"}"; do
|
|
48
|
+
[[ "$f" == "${TEMP_FILE:-}" ]] && continue
|
|
49
|
+
rm -f "$f"
|
|
50
|
+
done
|
|
51
|
+
}
|
|
52
|
+
trap cleanup EXIT
|
|
53
|
+
|
|
54
|
+
# Fix locale warnings
|
|
55
|
+
export LC_ALL=C
|
|
56
|
+
|
|
57
|
+
TEXT="${1:-}"
|
|
58
|
+
VOICE_OVERRIDE="${2:-}" # Optional: voice model name
|
|
59
|
+
AGENT_PROFILE_FILE="${3:-}" # Optional: path to per-agent profile JSON (from bmad-speak.sh)
|
|
60
|
+
|
|
61
|
+
# Strip emojis, asterisks, and markdown formatting that Piper would speak literally
|
|
62
|
+
TEXT=$(printf '%s' "$TEXT" | perl -CSD -pe '
|
|
63
|
+
s/[\x{1F300}-\x{1F9FF}]//g; # emoticons, symbols, pictographs
|
|
64
|
+
s/[\x{2600}-\x{27BF}]//g; # misc symbols, dingbats
|
|
65
|
+
s/[\x{FE00}-\x{FE0F}]//g; # variation selectors
|
|
66
|
+
s/[\x{200D}]//g; # zero-width joiner
|
|
67
|
+
s/[\x{2500}-\x{257F}]//g; # box drawing (โโ etc)
|
|
68
|
+
s/[\x{2580}-\x{259F}]//g; # block elements
|
|
69
|
+
s/\*+//g; # asterisks (bold/italic markdown)
|
|
70
|
+
s/#+\s*//g; # heading markers
|
|
71
|
+
s/`//g; # backticks
|
|
72
|
+
s/~+//g; # strikethrough
|
|
73
|
+
s/^\s*[-]\s*//g; # list dashes
|
|
74
|
+
')
|
|
75
|
+
|
|
76
|
+
# Source voice manager and language manager
|
|
77
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
78
|
+
source "$SCRIPT_DIR/piper-voice-manager.sh"
|
|
79
|
+
source "$SCRIPT_DIR/language-manager.sh"
|
|
80
|
+
source "$SCRIPT_DIR/audio-cache-utils.sh"
|
|
81
|
+
|
|
82
|
+
# Default voice for Piper
|
|
83
|
+
DEFAULT_VOICE="en_US-lessac-medium"
|
|
84
|
+
|
|
85
|
+
# @function determine_voice_model
|
|
86
|
+
# @intent Resolve voice name to Piper model name with language support
|
|
87
|
+
# @why Support voice override, language-specific voices, and default fallback
|
|
88
|
+
# @param Uses global: $VOICE_OVERRIDE
|
|
89
|
+
# @returns Sets $VOICE_MODEL global variable
|
|
90
|
+
# @sideeffects None
|
|
91
|
+
VOICE_MODEL=""
|
|
92
|
+
|
|
93
|
+
# Get current language setting
|
|
94
|
+
CURRENT_LANGUAGE=$(get_language_code)
|
|
95
|
+
|
|
96
|
+
if [[ -n "$VOICE_OVERRIDE" ]]; then
|
|
97
|
+
# Use override if provided
|
|
98
|
+
# Handle multi-speaker format: "Model::SpeakerName" โ split into model + speaker lookup
|
|
99
|
+
if [[ "$VOICE_OVERRIDE" == *"::"* ]]; then
|
|
100
|
+
VOICE_MODEL="${VOICE_OVERRIDE%%::*}"
|
|
101
|
+
_SPEAKER_NAME="${VOICE_OVERRIDE#*::}"
|
|
102
|
+
# Look up speaker ID from the model's .onnx.json speaker_id_map
|
|
103
|
+
voice_dir=$(get_voice_storage_dir)
|
|
104
|
+
_JSON_FILE="$voice_dir/${VOICE_MODEL}.onnx.json"
|
|
105
|
+
if [[ -f "$_JSON_FILE" ]]; then
|
|
106
|
+
# SECURITY: Pass values via env vars to prevent shell injection
|
|
107
|
+
SPEAKER_ID=$(_JSON="$_JSON_FILE" _SPKR="$_SPEAKER_NAME" node -e "
|
|
108
|
+
try {
|
|
109
|
+
const j = JSON.parse(require('fs').readFileSync(process.env._JSON,'utf8'));
|
|
110
|
+
const map = j.speaker_id_map || {};
|
|
111
|
+
const id = map[process.env._SPKR];
|
|
112
|
+
if (id !== undefined) process.stdout.write(String(id));
|
|
113
|
+
} catch {}
|
|
114
|
+
" 2>/dev/null || true)
|
|
115
|
+
fi
|
|
116
|
+
echo "๐ญ Using multi-speaker voice: $VOICE_OVERRIDE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-?})"
|
|
117
|
+
else
|
|
118
|
+
VOICE_MODEL="$VOICE_OVERRIDE"
|
|
119
|
+
echo "๐ค Using voice: $VOICE_OVERRIDE (session-specific)"
|
|
120
|
+
fi
|
|
121
|
+
else
|
|
122
|
+
# Try to get voice from voice file (check CLAUDE_PROJECT_DIR first for MCP context)
|
|
123
|
+
VOICE_FILE=""
|
|
124
|
+
|
|
125
|
+
# Priority order:
|
|
126
|
+
# 1. CLAUDE_PROJECT_DIR env var (set by MCP for project-specific settings)
|
|
127
|
+
# 2. Script location (for direct slash command usage)
|
|
128
|
+
# 3. Global ~/.claude (fallback)
|
|
129
|
+
|
|
130
|
+
# SECURITY: Canonicalize path to prevent traversal (#128)
|
|
131
|
+
if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
|
|
132
|
+
CLAUDE_PROJECT_DIR=$(cd "${CLAUDE_PROJECT_DIR}" 2>/dev/null && pwd -P) || CLAUDE_PROJECT_DIR=""
|
|
133
|
+
fi
|
|
134
|
+
if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]] && [[ -f "$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt" ]]; then
|
|
135
|
+
# MCP context: Use the project directory where MCP was invoked
|
|
136
|
+
VOICE_FILE="$CLAUDE_PROJECT_DIR/.claude/tts-voice.txt"
|
|
137
|
+
elif [[ -f "$SCRIPT_DIR/../tts-voice.txt" ]]; then
|
|
138
|
+
# Direct usage: Use script location
|
|
139
|
+
VOICE_FILE="$SCRIPT_DIR/../tts-voice.txt"
|
|
140
|
+
elif [[ -f "$HOME/.claude/tts-voice.txt" ]]; then
|
|
141
|
+
# Fallback: Use global
|
|
142
|
+
VOICE_FILE="$HOME/.claude/tts-voice.txt"
|
|
143
|
+
fi
|
|
144
|
+
|
|
145
|
+
if [[ -n "$VOICE_FILE" ]]; then
|
|
146
|
+
FILE_VOICE=$(cat "$VOICE_FILE" 2>/dev/null)
|
|
147
|
+
|
|
148
|
+
# Check for multi-speaker voice (model + speaker ID stored separately)
|
|
149
|
+
# Use same directory as VOICE_FILE for consistency
|
|
150
|
+
VOICE_DIR=$(dirname "$VOICE_FILE")
|
|
151
|
+
MODEL_FILE="$VOICE_DIR/tts-piper-model.txt"
|
|
152
|
+
SPEAKER_ID_FILE="$VOICE_DIR/tts-piper-speaker-id.txt"
|
|
153
|
+
|
|
154
|
+
if [[ -f "$MODEL_FILE" ]] && [[ -f "$SPEAKER_ID_FILE" ]]; then
|
|
155
|
+
# Multi-speaker voice config found locally
|
|
156
|
+
VOICE_MODEL=$(cat "$MODEL_FILE" 2>/dev/null)
|
|
157
|
+
SPEAKER_ID=$(cat "$SPEAKER_ID_FILE" 2>/dev/null)
|
|
158
|
+
# Validate speaker ID is numeric
|
|
159
|
+
if [[ -n "$SPEAKER_ID" ]] && ! [[ "$SPEAKER_ID" =~ ^[0-9]+$ ]]; then
|
|
160
|
+
echo "Warning: Invalid speaker ID '$SPEAKER_ID', ignoring" >&2
|
|
161
|
+
SPEAKER_ID=""
|
|
162
|
+
fi
|
|
163
|
+
echo "๐ญ Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-none})"
|
|
164
|
+
# Check if voice uses Model::SpeakerName format (from AgentVibes config)
|
|
165
|
+
elif [[ -n "$FILE_VOICE" ]] && [[ "$FILE_VOICE" == *"::"* ]]; then
|
|
166
|
+
VOICE_MODEL="${FILE_VOICE%%::*}"
|
|
167
|
+
_SPEAKER_NAME="${FILE_VOICE#*::}"
|
|
168
|
+
voice_dir=$(get_voice_storage_dir)
|
|
169
|
+
_JSON_FILE="$voice_dir/${VOICE_MODEL}.onnx.json"
|
|
170
|
+
if [[ -f "$_JSON_FILE" ]]; then
|
|
171
|
+
# SECURITY: Pass values via env vars to prevent shell injection
|
|
172
|
+
SPEAKER_ID=$(_JSON="$_JSON_FILE" _SPKR="$_SPEAKER_NAME" node -e "
|
|
173
|
+
try {
|
|
174
|
+
const j = JSON.parse(require('fs').readFileSync(process.env._JSON,'utf8'));
|
|
175
|
+
const map = j.speaker_id_map || {};
|
|
176
|
+
const id = map[process.env._SPKR];
|
|
177
|
+
if (id !== undefined) process.stdout.write(String(id));
|
|
178
|
+
} catch {}
|
|
179
|
+
" 2>/dev/null || true)
|
|
180
|
+
fi
|
|
181
|
+
echo "๐ญ Using multi-speaker voice: $FILE_VOICE (Model: $VOICE_MODEL, Speaker ID: ${SPEAKER_ID:-?})"
|
|
182
|
+
# Standard Piper model name or custom voice (just use as-is)
|
|
183
|
+
elif [[ -n "$FILE_VOICE" ]]; then
|
|
184
|
+
# Strip multi-speaker suffix if present (model::SpeakerName-Label)
|
|
185
|
+
if [[ "$FILE_VOICE" == *"::"* ]]; then
|
|
186
|
+
VOICE_MODEL="${FILE_VOICE%%::*}"
|
|
187
|
+
else
|
|
188
|
+
VOICE_MODEL="$FILE_VOICE"
|
|
189
|
+
fi
|
|
190
|
+
fi
|
|
191
|
+
fi
|
|
192
|
+
|
|
193
|
+
# If no Piper voice from file, try language-specific voice
|
|
194
|
+
if [[ -z "$VOICE_MODEL" ]]; then
|
|
195
|
+
LANG_VOICE=$(get_voice_for_language "$CURRENT_LANGUAGE" "piper" 2>/dev/null)
|
|
196
|
+
|
|
197
|
+
if [[ -n "$LANG_VOICE" ]]; then
|
|
198
|
+
VOICE_MODEL="$LANG_VOICE"
|
|
199
|
+
echo "๐ Using $CURRENT_LANGUAGE voice: $LANG_VOICE (Piper)"
|
|
200
|
+
else
|
|
201
|
+
# Use default voice
|
|
202
|
+
VOICE_MODEL="$DEFAULT_VOICE"
|
|
203
|
+
fi
|
|
204
|
+
fi
|
|
205
|
+
fi
|
|
206
|
+
|
|
207
|
+
# @function validate_inputs
|
|
208
|
+
# @intent Check required parameters
|
|
209
|
+
# @why Fail fast with clear errors if inputs missing
|
|
210
|
+
# @exitcode 1=missing text, 2=missing piper binary
|
|
211
|
+
if [[ -z "$TEXT" ]]; then
|
|
212
|
+
echo "Usage: $0 \"text to speak\" [voice_model_name]"
|
|
213
|
+
exit 1
|
|
214
|
+
fi
|
|
215
|
+
|
|
216
|
+
# Check if Piper is installed
|
|
217
|
+
if ! command -v piper &> /dev/null; then
|
|
218
|
+
echo "โ Error: Piper TTS not installed"
|
|
219
|
+
echo "Install with: pipx install piper-tts"
|
|
220
|
+
echo "Or run: .claude/hooks/piper-installer.sh"
|
|
221
|
+
exit 2
|
|
222
|
+
fi
|
|
223
|
+
|
|
224
|
+
# @function ensure_voice_downloaded
|
|
225
|
+
# @intent Download voice model if not cached
|
|
226
|
+
# @why Provide seamless experience with automatic downloads
|
|
227
|
+
# @param Uses global: $VOICE_MODEL
|
|
228
|
+
# @sideeffects Downloads voice model files
|
|
229
|
+
# @edgecases Prompts user for consent before downloading, skipped in test mode
|
|
230
|
+
if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && ! verify_voice "$VOICE_MODEL"; then
|
|
231
|
+
echo "๐ฅ Voice model not found: $VOICE_MODEL"
|
|
232
|
+
echo " File size: ~25MB"
|
|
233
|
+
echo " Preview: https://huggingface.co/rhasspy/piper-voices"
|
|
234
|
+
echo ""
|
|
235
|
+
read -p " Download this voice model? [y/N]: " -n 1 -r
|
|
236
|
+
echo
|
|
237
|
+
|
|
238
|
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
|
239
|
+
if ! download_voice "$VOICE_MODEL"; then
|
|
240
|
+
echo "โ Failed to download voice model"
|
|
241
|
+
echo "Fix: Download manually or choose different voice"
|
|
242
|
+
exit 3
|
|
243
|
+
fi
|
|
244
|
+
else
|
|
245
|
+
echo "โ Voice download cancelled"
|
|
246
|
+
exit 3
|
|
247
|
+
fi
|
|
248
|
+
fi
|
|
249
|
+
|
|
250
|
+
# Get voice model path
|
|
251
|
+
# In test mode, use a fake path since we have mock piper that doesn't need real files
|
|
252
|
+
if [[ "${AGENTVIBES_TEST_MODE:-false}" == "true" ]]; then
|
|
253
|
+
VOICE_PATH="/tmp/mock-voice-${VOICE_MODEL}.onnx"
|
|
254
|
+
else
|
|
255
|
+
VOICE_PATH=$(get_voice_path "$VOICE_MODEL")
|
|
256
|
+
if [[ $? -ne 0 ]]; then
|
|
257
|
+
echo "โ Voice model path not found: $VOICE_MODEL"
|
|
258
|
+
exit 3
|
|
259
|
+
fi
|
|
260
|
+
fi
|
|
261
|
+
|
|
262
|
+
# @function determine_audio_directory
|
|
263
|
+
# @intent Find appropriate directory for audio file storage
|
|
264
|
+
# @why Supports project-local and global storage
|
|
265
|
+
# @returns Sets $AUDIO_DIR global variable
|
|
266
|
+
if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
|
|
267
|
+
AUDIO_DIR="$CLAUDE_PROJECT_DIR/.claude/audio"
|
|
268
|
+
else
|
|
269
|
+
# Fallback: try to find .claude directory in current path
|
|
270
|
+
CURRENT_DIR="$PWD"
|
|
271
|
+
while [[ "$CURRENT_DIR" != "/" ]]; do
|
|
272
|
+
if [[ -d "$CURRENT_DIR/.claude" ]]; then
|
|
273
|
+
AUDIO_DIR="$CURRENT_DIR/.claude/audio"
|
|
274
|
+
break
|
|
275
|
+
fi
|
|
276
|
+
CURRENT_DIR=$(dirname "$CURRENT_DIR")
|
|
277
|
+
done
|
|
278
|
+
# Final fallback to global if no project .claude found
|
|
279
|
+
if [[ -z "${AUDIO_DIR:-}" ]]; then
|
|
280
|
+
AUDIO_DIR="$HOME/.claude/audio"
|
|
281
|
+
fi
|
|
282
|
+
fi
|
|
283
|
+
|
|
284
|
+
mkdir -p "$AUDIO_DIR"
|
|
285
|
+
TEMP_FILE=$(mktemp "$AUDIO_DIR/tts-XXXXXX.wav")
|
|
286
|
+
|
|
287
|
+
# @function get_speech_rate
|
|
288
|
+
# @intent Determine speech rate for Piper synthesis
|
|
289
|
+
# @why Convert user-facing speed (0.5=slower, 2.0=faster) to Piper length-scale (inverted)
|
|
290
|
+
# @returns Piper length-scale value (inverted from user scale)
|
|
291
|
+
# @note Piper uses length-scale where higher=slower, opposite of user expectation
|
|
292
|
+
get_speech_rate() {
|
|
293
|
+
local target_config=""
|
|
294
|
+
local main_config=""
|
|
295
|
+
|
|
296
|
+
# Check for target-specific config first (new and legacy paths)
|
|
297
|
+
if [[ -f "$SCRIPT_DIR/../config/tts-target-speech-rate.txt" ]]; then
|
|
298
|
+
target_config="$SCRIPT_DIR/../config/tts-target-speech-rate.txt"
|
|
299
|
+
elif [[ -f "$HOME/.claude/config/tts-target-speech-rate.txt" ]]; then
|
|
300
|
+
target_config="$HOME/.claude/config/tts-target-speech-rate.txt"
|
|
301
|
+
elif [[ -f "$SCRIPT_DIR/../config/piper-target-speech-rate.txt" ]]; then
|
|
302
|
+
target_config="$SCRIPT_DIR/../config/piper-target-speech-rate.txt"
|
|
303
|
+
elif [[ -f "$HOME/.claude/config/piper-target-speech-rate.txt" ]]; then
|
|
304
|
+
target_config="$HOME/.claude/config/piper-target-speech-rate.txt"
|
|
305
|
+
fi
|
|
306
|
+
|
|
307
|
+
# Check for main config (new and legacy paths)
|
|
308
|
+
if [[ -f "$SCRIPT_DIR/../config/tts-speech-rate.txt" ]]; then
|
|
309
|
+
main_config="$SCRIPT_DIR/../config/tts-speech-rate.txt"
|
|
310
|
+
elif [[ -f "$HOME/.claude/config/tts-speech-rate.txt" ]]; then
|
|
311
|
+
main_config="$HOME/.claude/config/tts-speech-rate.txt"
|
|
312
|
+
elif [[ -f "$SCRIPT_DIR/../config/piper-speech-rate.txt" ]]; then
|
|
313
|
+
main_config="$SCRIPT_DIR/../config/piper-speech-rate.txt"
|
|
314
|
+
elif [[ -f "$HOME/.claude/config/piper-speech-rate.txt" ]]; then
|
|
315
|
+
main_config="$HOME/.claude/config/piper-speech-rate.txt"
|
|
316
|
+
fi
|
|
317
|
+
|
|
318
|
+
# If this is a non-English voice and target config exists, use it
|
|
319
|
+
if [[ "$CURRENT_LANGUAGE" != "english" ]] && [[ -n "$target_config" ]]; then
|
|
320
|
+
local user_speed=$(cat "$target_config" 2>/dev/null)
|
|
321
|
+
# Validate speed is a positive number
|
|
322
|
+
if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
|
|
323
|
+
echo "1.0"
|
|
324
|
+
return
|
|
325
|
+
fi
|
|
326
|
+
# Convert user speed to Piper length-scale (invert)
|
|
327
|
+
# User: 0.5=slower, 1.0=normal, 2.0=faster
|
|
328
|
+
# Piper: 2.0=slower, 1.0=normal, 0.5=faster
|
|
329
|
+
# Formula: piper_length_scale = 1.0 / user_speed
|
|
330
|
+
echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
|
|
331
|
+
return
|
|
332
|
+
fi
|
|
333
|
+
|
|
334
|
+
# Otherwise use main config if available
|
|
335
|
+
if [[ -n "$main_config" ]]; then
|
|
336
|
+
local user_speed=$(grep -v '^#' "$main_config" 2>/dev/null | grep -v '^$' | tail -1)
|
|
337
|
+
# Validate speed is a positive number
|
|
338
|
+
if ! [[ "$user_speed" =~ ^[0-9]*\.?[0-9]+$ ]] || [[ "$user_speed" == "0" ]] || [[ "$user_speed" == "0.0" ]]; then
|
|
339
|
+
echo "1.0"
|
|
340
|
+
return
|
|
341
|
+
fi
|
|
342
|
+
echo "scale=2; 1.0 / $user_speed" | bc -l 2>/dev/null || echo "1.0"
|
|
343
|
+
return
|
|
344
|
+
fi
|
|
345
|
+
|
|
346
|
+
# Default: 1.0 (normal) for English, 2.0 (slower) for learning
|
|
347
|
+
if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
|
|
348
|
+
echo "2.0"
|
|
349
|
+
else
|
|
350
|
+
echo "1.0"
|
|
351
|
+
fi
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
SPEECH_RATE=$(get_speech_rate)
|
|
355
|
+
|
|
356
|
+
# @function synthesize_with_piper
|
|
357
|
+
# @intent Generate speech using Piper TTS
|
|
358
|
+
# @why Provides free, offline TTS alternative
|
|
359
|
+
# @param Uses globals: $TEXT, $VOICE_PATH, $SPEECH_RATE, $SPEAKER_ID (optional)
|
|
360
|
+
# @returns Creates WAV file at $TEMP_FILE
|
|
361
|
+
# @exitcode 0=success, 4=synthesis error
|
|
362
|
+
# @sideeffects Creates audio file
|
|
363
|
+
# @edgecases Handles piper errors, invalid models, multi-speaker voices
|
|
364
|
+
if [[ -n "${SPEAKER_ID:-}" ]]; then
|
|
365
|
+
# Multi-speaker voice: Pass speaker ID
|
|
366
|
+
# SECURITY: Use printf instead of echo for pipe safety (#134)
|
|
367
|
+
printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --speaker "$SPEAKER_ID" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
|
|
368
|
+
else
|
|
369
|
+
# Single-speaker voice
|
|
370
|
+
printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
|
|
371
|
+
fi
|
|
372
|
+
|
|
373
|
+
if [[ ! -f "$TEMP_FILE" ]] || [[ ! -s "$TEMP_FILE" ]]; then
|
|
374
|
+
echo "โ Failed to synthesize speech with Piper"
|
|
375
|
+
echo "Voice model: $VOICE_MODEL"
|
|
376
|
+
echo "Check that voice model is valid"
|
|
377
|
+
exit 4
|
|
378
|
+
fi
|
|
379
|
+
|
|
380
|
+
# @function detect_remote_session
|
|
381
|
+
# @intent Auto-detect SSH/RDP sessions and enable audio compression
|
|
382
|
+
# @why Remote desktop audio is choppy without compression
|
|
383
|
+
# @returns Sets AGENTVIBES_RDP_MODE environment variable
|
|
384
|
+
# @detection Checks SSH_CLIENT, SSH_TTY, and DISPLAY variables
|
|
385
|
+
if [[ -z "${AGENTVIBES_RDP_MODE:-}" ]]; then
|
|
386
|
+
# Auto-detect remote session
|
|
387
|
+
if [[ -n "${SSH_CLIENT:-}" ]] || [[ -n "${SSH_TTY:-}" ]] || [[ "${DISPLAY:-}" =~ ^localhost:.* ]]; then
|
|
388
|
+
export AGENTVIBES_RDP_MODE=true
|
|
389
|
+
echo "๐ Remote session detected - enabling audio compression"
|
|
390
|
+
fi
|
|
391
|
+
fi
|
|
392
|
+
|
|
393
|
+
# @function compress_for_remote
|
|
394
|
+
# @intent Compress TTS audio for remote sessions (SSH/RDP)
|
|
395
|
+
# @why Reduces bandwidth and prevents choppy playback
|
|
396
|
+
# @param Uses global: $TEMP_FILE, $AGENTVIBES_RDP_MODE
|
|
397
|
+
# @returns Updates $TEMP_FILE to compressed version
|
|
398
|
+
# @sideeffects Converts to mono 22kHz for lower bandwidth
|
|
399
|
+
if [[ "${AGENTVIBES_RDP_MODE:-false}" == "true" ]] && command -v ffmpeg &> /dev/null; then
|
|
400
|
+
COMPRESSED_FILE=$(mktemp "$AUDIO_DIR/tts-compressed-XXXXXX.wav")
|
|
401
|
+
_CLEANUP_FILES+=("$COMPRESSED_FILE")
|
|
402
|
+
# Convert to mono, 22kHz, 64kbps for remote sessions
|
|
403
|
+
ffmpeg -i "$TEMP_FILE" -ac 1 -ar 22050 -b:a 64k -y "$COMPRESSED_FILE" 2>/dev/null
|
|
404
|
+
|
|
405
|
+
if [[ -f "$COMPRESSED_FILE" ]]; then
|
|
406
|
+
rm -f "$TEMP_FILE"
|
|
407
|
+
TEMP_FILE="$COMPRESSED_FILE"
|
|
408
|
+
fi
|
|
409
|
+
fi
|
|
410
|
+
|
|
411
|
+
# @function add_silence_padding
|
|
412
|
+
# @intent Add silence to prevent WSL audio static
|
|
413
|
+
# @why WSL audio subsystem cuts off first ~200ms
|
|
414
|
+
# @param Uses global: $TEMP_FILE
|
|
415
|
+
# @returns Updates $TEMP_FILE to padded version
|
|
416
|
+
# @sideeffects Modifies audio file
|
|
417
|
+
# AI NOTE: Use ffmpeg if available, otherwise skip padding (degraded experience)
|
|
418
|
+
if command -v ffmpeg &> /dev/null; then
|
|
419
|
+
PADDED_FILE=$(mktemp "$AUDIO_DIR/tts-padded-XXXXXX.wav")
|
|
420
|
+
_CLEANUP_FILES+=("$PADDED_FILE")
|
|
421
|
+
# Add 200ms of silence at the beginning
|
|
422
|
+
ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo:d=0.2 -i "$TEMP_FILE" \
|
|
423
|
+
-filter_complex "[0:a][1:a]concat=n=2:v=0:a=1[out]" \
|
|
424
|
+
-map "[out]" -y "$PADDED_FILE" 2>/dev/null
|
|
425
|
+
|
|
426
|
+
if [[ -f "$PADDED_FILE" ]]; then
|
|
427
|
+
rm -f "$TEMP_FILE"
|
|
428
|
+
TEMP_FILE="$PADDED_FILE"
|
|
429
|
+
fi
|
|
430
|
+
fi
|
|
431
|
+
|
|
432
|
+
# @function apply_audio_effects
|
|
433
|
+
# @intent Apply sox effects and background music via audio-processor.sh
|
|
434
|
+
# @param Uses global: $TEMP_FILE
|
|
435
|
+
# @returns Updates $TEMP_FILE to processed version, sets $BACKGROUND_MUSIC if used
|
|
436
|
+
# @sideeffects Applies audio effects and background music
|
|
437
|
+
BACKGROUND_MUSIC=""
|
|
438
|
+
if [[ -f "$SCRIPT_DIR/audio-processor.sh" ]]; then
|
|
439
|
+
PROCESSED_FILE=$(mktemp "$AUDIO_DIR/tts-processed-XXXXXX.wav")
|
|
440
|
+
_CLEANUP_FILES+=("$PROCESSED_FILE")
|
|
441
|
+
# audio-processor.sh returns: FILE_PATH|BACKGROUND_FILE
|
|
442
|
+
PROCESSOR_OUTPUT=$("$SCRIPT_DIR/audio-processor.sh" "$TEMP_FILE" "default" "$PROCESSED_FILE" "$AGENT_PROFILE_FILE" 2>/dev/null) || {
|
|
443
|
+
echo "Warning: Audio processing failed, using unprocessed audio" >&2
|
|
444
|
+
PROCESSED_FILE="$TEMP_FILE"
|
|
445
|
+
PROCESSOR_OUTPUT="$TEMP_FILE|"
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
# Parse output: FILE|BACKGROUND
|
|
449
|
+
PROCESSED_FILE="${PROCESSOR_OUTPUT%%|*}"
|
|
450
|
+
BACKGROUND_MUSIC="${PROCESSOR_OUTPUT##*|}"
|
|
451
|
+
|
|
452
|
+
if [[ -f "$PROCESSED_FILE" ]] && [[ "$PROCESSED_FILE" != "$TEMP_FILE" ]]; then
|
|
453
|
+
rm -f "$TEMP_FILE"
|
|
454
|
+
TEMP_FILE="$PROCESSED_FILE"
|
|
455
|
+
fi
|
|
456
|
+
fi
|
|
457
|
+
|
|
458
|
+
# @function play_audio
|
|
459
|
+
# @intent Play generated audio using available player with sequential playback
|
|
460
|
+
# @why Support multiple audio players and prevent overlapping audio in learning mode
|
|
461
|
+
# @param Uses global: $TEMP_FILE, $CURRENT_LANGUAGE
|
|
462
|
+
# @sideeffects Plays audio with lock mechanism for sequential playback
|
|
463
|
+
_LOCK_DIR="${XDG_RUNTIME_DIR:-/tmp/agentvibes-$(id -u)}"
|
|
464
|
+
mkdir -p "$_LOCK_DIR"
|
|
465
|
+
chmod 700 "$_LOCK_DIR"
|
|
466
|
+
LOCK_FILE="$_LOCK_DIR/agentvibes-audio.lock"
|
|
467
|
+
|
|
468
|
+
# Auto-remove stale lock files (older than 30 seconds) to prevent permanent blocking
|
|
469
|
+
# This handles cases where the background cleanup process was killed mid-playback
|
|
470
|
+
if [ -f "$LOCK_FILE" ]; then
|
|
471
|
+
_lock_age=0
|
|
472
|
+
if [[ "$(uname)" == "Darwin" ]]; then
|
|
473
|
+
_lock_mtime=$(stat -f %m "$LOCK_FILE" 2>/dev/null || echo 0)
|
|
474
|
+
else
|
|
475
|
+
_lock_mtime=$(stat -c %Y "$LOCK_FILE" 2>/dev/null || echo 0)
|
|
476
|
+
fi
|
|
477
|
+
_now=$(date +%s)
|
|
478
|
+
_lock_age=$((_now - _lock_mtime))
|
|
479
|
+
if [[ $_lock_age -gt 30 ]]; then
|
|
480
|
+
rm -f "$LOCK_FILE"
|
|
481
|
+
fi
|
|
482
|
+
fi
|
|
483
|
+
|
|
484
|
+
# Wait for previous audio to finish (max 2 seconds to prevent blocking)
|
|
485
|
+
for i in {1..4}; do
|
|
486
|
+
if [ ! -f "$LOCK_FILE" ]; then
|
|
487
|
+
break
|
|
488
|
+
fi
|
|
489
|
+
sleep 0.5
|
|
490
|
+
done
|
|
491
|
+
|
|
492
|
+
# If still locked after 2 seconds, skip this TTS to prevent blocking Claude
|
|
493
|
+
if [ -f "$LOCK_FILE" ]; then
|
|
494
|
+
echo "โญ๏ธ Skipping TTS (previous audio still playing)" >&2
|
|
495
|
+
exit 0
|
|
496
|
+
fi
|
|
497
|
+
|
|
498
|
+
# Track last target language audio for replay command
|
|
499
|
+
if [[ "$CURRENT_LANGUAGE" != "english" ]]; then
|
|
500
|
+
TARGET_AUDIO_FILE="${CLAUDE_PROJECT_DIR:-${HOME}}/.claude/last-target-audio.txt"
|
|
501
|
+
echo "$TEMP_FILE" > "$TARGET_AUDIO_FILE"
|
|
502
|
+
fi
|
|
503
|
+
|
|
504
|
+
# Create lock and play audio
|
|
505
|
+
touch "$LOCK_FILE"
|
|
506
|
+
|
|
507
|
+
# Create write lock file in audio directory to signal file is in-use (prevents race condition in cleanup)
|
|
508
|
+
_TEMP_DIR="${TEMP_FILE%/*}"
|
|
509
|
+
WRITE_LOCK_FILE="$_TEMP_DIR/$(basename "$TEMP_FILE" .wav).lock"
|
|
510
|
+
touch "$WRITE_LOCK_FILE"
|
|
511
|
+
_CLEANUP_FILES+=("$LOCK_FILE" "$WRITE_LOCK_FILE")
|
|
512
|
+
|
|
513
|
+
# Get audio duration for proper lock timing
|
|
514
|
+
DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null || true)
|
|
515
|
+
DURATION=${DURATION%.*} # Round to integer
|
|
516
|
+
# SECURITY: Validate duration is numeric (#134)
|
|
517
|
+
if ! [[ "${DURATION:-}" =~ ^[0-9]+$ ]]; then
|
|
518
|
+
DURATION=1
|
|
519
|
+
fi
|
|
520
|
+
|
|
521
|
+
# Play audio (skip if in test mode or no-playback mode)
|
|
522
|
+
# AGENTVIBES_NO_PLAYBACK: Set to "true" to generate audio without playing (for post-processing)
|
|
523
|
+
PLAYER_PID=""
|
|
524
|
+
if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && [[ "${AGENTVIBES_NO_PLAYBACK:-false}" != "true" ]]; then
|
|
525
|
+
# Detect platform and use appropriate audio player
|
|
526
|
+
if [[ "$(uname -s)" == "Darwin" ]]; then
|
|
527
|
+
# macOS: Use afplay (native macOS audio player)
|
|
528
|
+
afplay "$TEMP_FILE" >/dev/null 2>&1 &
|
|
529
|
+
PLAYER_PID=$!
|
|
530
|
+
elif [[ -n "${TERMUX_VERSION:-}" ]] || [[ -d "/data/data/com.termux" ]]; then
|
|
531
|
+
# Android/Termux: Use termux-media-player
|
|
532
|
+
termux-media-player play "$TEMP_FILE" >/dev/null 2>&1 &
|
|
533
|
+
PLAYER_PID=$!
|
|
534
|
+
else
|
|
535
|
+
# Linux/WSL: Prefer paplay (PulseAudio) for best WSL audio quality
|
|
536
|
+
(paplay "$TEMP_FILE" || mpv "$TEMP_FILE" || aplay "$TEMP_FILE") >/dev/null 2>&1 &
|
|
537
|
+
PLAYER_PID=$!
|
|
538
|
+
fi
|
|
539
|
+
fi
|
|
540
|
+
|
|
541
|
+
# Wait for audio to finish, then release locks (both global and write lock)
|
|
542
|
+
(sleep $DURATION; rm -f "$LOCK_FILE" "$WRITE_LOCK_FILE") &
|
|
543
|
+
disown
|
|
544
|
+
|
|
545
|
+
# Get audio cache path
|
|
546
|
+
AUDIO_DIR_PATH=$(get_audio_dir)
|
|
547
|
+
|
|
548
|
+
# Color codes (safe to use โ WAV path is passed via AGENTVIBES_WAV_OUTPATH, not parsed from stdout)
|
|
549
|
+
BLUE='\033[0;34m'
|
|
550
|
+
YELLOW='\033[1;33m'
|
|
551
|
+
PURPLE='\033[0;35m'
|
|
552
|
+
RED='\033[0;31m'
|
|
553
|
+
GREEN='\033[0;32m'
|
|
554
|
+
ORANGE='\033[0;33m'
|
|
555
|
+
WHITE='\033[1;37m'
|
|
556
|
+
CYAN='\033[0;36m'
|
|
557
|
+
GOLD='\033[38;5;226m'
|
|
558
|
+
NC='\033[0m'
|
|
559
|
+
|
|
560
|
+
# Check if banner is enabled (default: on)
|
|
561
|
+
_BANNER_ENABLED=true
|
|
562
|
+
if [[ -f "$HOME/.agentvibes/banner-disabled" ]]; then
|
|
563
|
+
_BANNER_ENABLED=false
|
|
564
|
+
elif [[ -f "${PROJECT_ROOT:-/nonexistent}/.agentvibes/banner-disabled" ]]; then
|
|
565
|
+
_BANNER_ENABLED=false
|
|
566
|
+
fi
|
|
567
|
+
|
|
568
|
+
# Run auto-cleanup off the critical path: only every 10th call, in background after playback starts.
|
|
569
|
+
# Counter file lives in the secure lock dir (user-specific, already created above).
|
|
570
|
+
AUTO_CLEAN_THRESHOLD=$(get_auto_clean_threshold)
|
|
571
|
+
_CALL_COUNTER_FILE="$_LOCK_DIR/agentvibes-tts-call-count"
|
|
572
|
+
_CALL_COUNT=$(cat "$_CALL_COUNTER_FILE" 2>/dev/null || echo "0")
|
|
573
|
+
# SECURITY: Validate counter is numeric before arithmetic
|
|
574
|
+
if ! [[ "$_CALL_COUNT" =~ ^[0-9]+$ ]]; then _CALL_COUNT=0; fi
|
|
575
|
+
_CALL_COUNT=$((_CALL_COUNT + 1))
|
|
576
|
+
echo "$_CALL_COUNT" > "$_CALL_COUNTER_FILE"
|
|
577
|
+
|
|
578
|
+
if (( _CALL_COUNT % 10 == 0 )); then
|
|
579
|
+
# Capture values needed inside the subshell before forking
|
|
580
|
+
_CLEANUP_AUDIO_DIR="$AUDIO_DIR_PATH"
|
|
581
|
+
_CLEANUP_THRESHOLD="$AUTO_CLEAN_THRESHOLD"
|
|
582
|
+
_CLEANUP_BANNER="$_BANNER_ENABLED"
|
|
583
|
+
# Source the utils inside the subshell (functions are not exported)
|
|
584
|
+
_CLEANUP_UTILS="$SCRIPT_DIR/audio-cache-utils.sh"
|
|
585
|
+
(
|
|
586
|
+
source "$_CLEANUP_UTILS" 2>/dev/null || exit 0
|
|
587
|
+
_INITIAL_SIZE=$(calculate_tts_size_bytes "$_CLEANUP_AUDIO_DIR")
|
|
588
|
+
if [[ $_INITIAL_SIZE -gt $((_CLEANUP_THRESHOLD * 1048576)) ]]; then
|
|
589
|
+
_DELETED=$(auto_clean_old_files "$_CLEANUP_AUDIO_DIR" "$_CLEANUP_THRESHOLD")
|
|
590
|
+
if [[ ${_DELETED:-0} -gt 0 ]] && [[ "$_CLEANUP_BANNER" == "true" ]]; then
|
|
591
|
+
echo -e "\033[0;33m๐งน Auto-cleaned $_DELETED old files\033[0m"
|
|
592
|
+
fi
|
|
593
|
+
fi
|
|
594
|
+
) &
|
|
595
|
+
disown
|
|
596
|
+
fi
|
|
597
|
+
|
|
598
|
+
# Write output path for play-tts-enhanced.sh (avoids stdout parsing โ colors are safe)
|
|
599
|
+
if [[ -n "${AGENTVIBES_WAV_OUTPATH:-}" ]]; then
|
|
600
|
+
echo "$TEMP_FILE" > "$AGENTVIBES_WAV_OUTPATH"
|
|
601
|
+
fi
|
|
602
|
+
|
|
603
|
+
if [[ "$_BANNER_ENABLED" == "true" ]]; then
|
|
604
|
+
FILE_COUNT=$(count_tts_files "$AUDIO_DIR_PATH")
|
|
605
|
+
SIZE_BYTES=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
|
|
606
|
+
SIZE_HUMAN=$(bytes_to_human "$SIZE_BYTES")
|
|
607
|
+
|
|
608
|
+
# Dynamic color coding based on cache size
|
|
609
|
+
CACHE_COLOR=$GREEN
|
|
610
|
+
if [[ $SIZE_BYTES -gt 3221225472 ]]; then
|
|
611
|
+
CACHE_COLOR=$RED
|
|
612
|
+
elif [[ $SIZE_BYTES -gt 524288000 ]]; then
|
|
613
|
+
CACHE_COLOR=$YELLOW
|
|
614
|
+
fi
|
|
615
|
+
|
|
616
|
+
echo -e "${WHITE}๐พ Saved to:${NC} ${CYAN}$TEMP_FILE${NC} ${YELLOW}$FILE_COUNT${NC} ${WHITE}๐๏ธ${NC} ${CACHE_COLOR}$SIZE_HUMAN${NC} ${WHITE}๐งน${NC}${GOLD}[${AUTO_CLEAN_THRESHOLD}mb]${NC}"
|
|
617
|
+
|
|
618
|
+
if [[ -n "$BACKGROUND_MUSIC" ]]; then
|
|
619
|
+
echo -e "${WHITE}๐ต Background music:${NC} ${PURPLE}$(basename "$BACKGROUND_MUSIC")${NC}"
|
|
620
|
+
fi
|
|
621
|
+
if [[ -n "${SPEAKER_ID:-}" ]] && [[ -n "${FILE_VOICE:-}" ]]; then
|
|
622
|
+
echo -e "${WHITE}๐ค Voice used:${NC} ${BLUE}$FILE_VOICE${NC} ${WHITE}(Piper TTS)${NC}"
|
|
623
|
+
else
|
|
624
|
+
echo -e "${WHITE}๐ค Voice used:${NC} ${BLUE}$VOICE_MODEL${NC} ${WHITE}(Piper TTS)${NC}"
|
|
625
|
+
fi
|
|
626
|
+
|
|
627
|
+
PERSONALITY=$(cat "${PROJECT_ROOT:-/nonexistent}/.claude/tts-personality.txt" 2>/dev/null || cat "$HOME/.claude/tts-personality.txt" 2>/dev/null || echo "")
|
|
628
|
+
if [[ -n "$PERSONALITY" ]] && [[ "$PERSONALITY" != "none" ]] && [[ "$PERSONALITY" != "normal" ]]; then
|
|
629
|
+
echo -e "${WHITE}๐ซ Personality:${NC} ${YELLOW}$PERSONALITY${NC}"
|
|
630
|
+
fi
|
|
631
|
+
|
|
632
|
+
echo -e "\033[38;5;240mSay: \"Turn off banner\" to hide this output\033[0m"
|
|
633
|
+
fi
|
|
634
|
+
|
|
635
|
+
# Check audio folder size and warn if getting large
|
|
636
|
+
if [[ "$_BANNER_ENABLED" == "true" ]] && [[ -d "$AUDIO_DIR_PATH" ]]; then
|
|
637
|
+
AUDIO_SIZE=$(du -sm "$AUDIO_DIR_PATH" 2>/dev/null | cut -f1)
|
|
638
|
+
if [[ -n "$AUDIO_SIZE" ]] && [[ "$AUDIO_SIZE" -gt 100 ]]; then
|
|
639
|
+
echo -e "\033[0;31mโ ๏ธ Audio cache is ${AUDIO_SIZE}MB - Run: /agent-vibes:cleanup\033[0m"
|
|
640
|
+
fi
|
|
641
|
+
fi
|
|
642
|
+
|
|
643
|
+
# Show status indicators
|
|
644
|
+
GLOBAL_MUTE_FILE="$HOME/.agentvibes-muted"
|
|
645
|
+
PROJECT_MUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-muted"
|
|
646
|
+
PROJECT_UNMUTE_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/agentvibes-unmuted"
|
|
647
|
+
BACKGROUND_ENABLED_FILE="${PROJECT_ROOT:-/nonexistent}/.claude/config/background-music-enabled.txt"
|
|
648
|
+
GLOBAL_BACKGROUND_ENABLED_FILE="$HOME/.claude/config/background-music-enabled.txt"
|
|
649
|
+
|
|
650
|
+
# Mute status indicator
|
|
651
|
+
if [[ -f "$PROJECT_UNMUTE_FILE" ]] && [[ -f "$GLOBAL_MUTE_FILE" ]]; then
|
|
652
|
+
echo "๐ Status: Unmuted (project overrides global mute)"
|
|
653
|
+
elif [[ -f "$PROJECT_MUTE_FILE" ]]; then
|
|
654
|
+
echo "๐ Status: Muted (project)"
|
|
655
|
+
elif [[ -f "$GLOBAL_MUTE_FILE" ]]; then
|
|
656
|
+
echo "๐ Status: Would be muted (global) - but this project is speaking"
|
|
657
|
+
fi
|
|
658
|
+
|
|
659
|
+
# Background music status indicator
|
|
660
|
+
if [[ -z "$BACKGROUND_MUSIC" ]]; then
|
|
661
|
+
_bg_enabled=false
|
|
662
|
+
if [[ -f "$BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$BACKGROUND_ENABLED_FILE" 2>/dev/null; then
|
|
663
|
+
_bg_enabled=true
|
|
664
|
+
elif [[ -f "$GLOBAL_BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$GLOBAL_BACKGROUND_ENABLED_FILE" 2>/dev/null; then
|
|
665
|
+
_bg_enabled=true
|
|
666
|
+
fi
|
|
667
|
+
if [[ "$_bg_enabled" == "true" ]]; then
|
|
668
|
+
echo "๐ต Background music: Enabled but not playing (check config)"
|
|
669
|
+
else
|
|
670
|
+
echo "๐ต Background music: Disabled"
|
|
671
|
+
fi
|
|
672
|
+
fi
|
|
673
|
+
|
|
674
|
+
# Wait for audio player to finish before returning.
|
|
675
|
+
# This keeps the bmad-speak.sh speech lock held until playback is actually done,
|
|
676
|
+
# preventing party-mode agents from talking over each other.
|
|
677
|
+
if [[ -n "$PLAYER_PID" ]]; then
|
|
678
|
+
wait "$PLAYER_PID" 2>/dev/null || true
|
|
679
|
+
fi
|