agentvibes 4.2.0 → 4.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agentvibes/bmad/bmad-voices.md +69 -69
- package/.agentvibes/config.json +12 -0
- package/.claude/activation-instructions +54 -54
- package/.claude/audio/tracks/README.md +52 -52
- package/.claude/commands/agent-vibes/add.md +21 -21
- package/.claude/commands/agent-vibes/agent-vibes.md +101 -101
- package/.claude/commands/agent-vibes/agent.md +79 -79
- package/.claude/commands/agent-vibes/background-music.md +111 -111
- package/.claude/commands/agent-vibes/bmad.md +198 -198
- package/.claude/commands/agent-vibes/clean.md +18 -18
- package/.claude/commands/agent-vibes/cleanup.md +18 -18
- package/.claude/commands/agent-vibes/commands.json +145 -145
- package/.claude/commands/agent-vibes/effects.md +97 -97
- package/.claude/commands/agent-vibes/get.md +9 -9
- package/.claude/commands/agent-vibes/hide.md +91 -91
- package/.claude/commands/agent-vibes/language.md +23 -23
- package/.claude/commands/agent-vibes/learn.md +67 -67
- package/.claude/commands/agent-vibes/list.md +13 -13
- package/.claude/commands/agent-vibes/mute.md +37 -37
- package/.claude/commands/agent-vibes/preview.md +17 -17
- package/.claude/commands/agent-vibes/provider.md +68 -68
- package/.claude/commands/agent-vibes/replay-target.md +14 -14
- package/.claude/commands/agent-vibes/sample.md +12 -12
- package/.claude/commands/agent-vibes/set-favorite-voice.md +84 -84
- package/.claude/commands/agent-vibes/set-pretext.md +65 -65
- package/.claude/commands/agent-vibes/set-speed.md +41 -41
- package/.claude/commands/agent-vibes/show.md +84 -84
- package/.claude/commands/agent-vibes/switch.md +87 -87
- package/.claude/commands/agent-vibes/target-voice.md +26 -26
- package/.claude/commands/agent-vibes/target.md +30 -30
- package/.claude/commands/agent-vibes/translate.md +68 -68
- package/.claude/commands/agent-vibes/unmute.md +45 -45
- package/.claude/commands/agent-vibes/verbosity.md +89 -89
- package/.claude/commands/agent-vibes/whoami.md +7 -7
- package/.claude/commands/agent-vibes-bmad-voices.md +117 -117
- package/.claude/commands/agent-vibes-rdp.md +24 -24
- package/.claude/config/agentvibes.json +1 -0
- package/.claude/config/audio-effects.cfg +2 -2
- package/.claude/config/audio-effects.cfg.sample +52 -52
- package/.claude/config/background-music-volume.txt +1 -0
- package/.claude/config/intro-text.txt +1 -0
- package/.claude/config/piper-speech-rate.txt +4 -0
- package/.claude/config/piper-target-speech-rate.txt +1 -0
- package/.claude/config/reverb-level.txt +1 -0
- package/.claude/config/tts-speech-rate.txt +4 -0
- package/.claude/config/tts-target-speech-rate.txt +1 -0
- package/.claude/docs/TERMUX_SETUP.md +408 -408
- package/.claude/github-star-reminder.txt +1 -1
- package/.claude/hooks/README-TTS-QUEUE.md +135 -135
- package/.claude/hooks/audio-cache-utils.sh +246 -246
- package/.claude/hooks/audio-processor.sh +433 -433
- package/.claude/hooks/background-music-manager.sh +404 -404
- package/.claude/hooks/bmad-speak-enhanced.sh +165 -165
- package/.claude/hooks/bmad-speak.sh +269 -269
- package/.claude/hooks/bmad-tts-injector.sh +568 -568
- package/.claude/hooks/bmad-voice-manager.sh +928 -928
- package/.claude/hooks/clawdbot-receiver-SECURE.sh +129 -129
- package/.claude/hooks/clawdbot-receiver.sh +107 -107
- package/.claude/hooks/clean-audio-cache.sh +22 -22
- package/.claude/hooks/cleanup-cache.sh +106 -106
- package/.claude/hooks/configure-rdp-mode.sh +137 -137
- package/.claude/hooks/download-extra-voices.sh +244 -244
- package/.claude/hooks/effects-manager.sh +268 -268
- package/.claude/hooks/github-star-reminder.sh +154 -154
- package/.claude/hooks/language-manager.sh +362 -362
- package/.claude/hooks/learn-manager.sh +492 -492
- package/.claude/hooks/macos-voice-manager.sh +205 -205
- package/.claude/hooks/migrate-background-music.sh +125 -125
- package/.claude/hooks/migrate-to-agentvibes.sh +161 -161
- package/.claude/hooks/optimize-background-music.sh +87 -87
- package/.claude/hooks/path-resolver.sh +60 -60
- package/.claude/hooks/personality-manager.sh +448 -448
- package/.claude/hooks/piper-download-voices.sh +225 -225
- package/.claude/hooks/piper-installer.sh +292 -292
- package/.claude/hooks/piper-multispeaker-registry.sh +171 -171
- package/.claude/hooks/piper-voice-manager.sh +24 -3
- package/.claude/hooks/play-tts-agentvibes-receiver-for-voiceless-connections.sh +90 -90
- package/.claude/hooks/play-tts-enhanced.sh +105 -105
- package/.claude/hooks/play-tts-macos.sh +368 -368
- package/.claude/hooks/play-tts-piper.sh +679 -679
- package/.claude/hooks/play-tts-soprano.sh +356 -356
- package/.claude/hooks/play-tts-ssh-remote.sh +167 -167
- package/.claude/hooks/play-tts-termux-ssh.sh +169 -169
- package/.claude/hooks/play-tts.sh +301 -301
- package/.claude/hooks/prepare-release.sh +54 -54
- package/.claude/hooks/provider-commands.sh +617 -617
- package/.claude/hooks/provider-manager.sh +399 -399
- package/.claude/hooks/replay-target-audio.sh +95 -95
- package/.claude/hooks/requirements.txt +6 -6
- package/.claude/hooks/sentiment-manager.sh +201 -201
- package/.claude/hooks/session-start-tts.sh +81 -81
- package/.claude/hooks/soprano-gradio-synth.py +139 -139
- package/.claude/hooks/speed-manager.sh +291 -291
- package/.claude/hooks/stop-tts.sh +84 -84
- package/.claude/hooks/termux-installer.sh +261 -261
- package/.claude/hooks/translate-manager.sh +341 -341
- package/.claude/hooks/translator.py +237 -237
- package/.claude/hooks/tts-queue-worker.sh +145 -145
- package/.claude/hooks/tts-queue.sh +165 -165
- package/.claude/hooks/verbosity-manager.sh +178 -178
- package/.claude/hooks/voice-manager.sh +548 -548
- package/.claude/hooks-windows/audio-cache-utils.ps1 +119 -119
- package/.claude/hooks-windows/background-music-manager.ps1 +348 -0
- package/.claude/hooks-windows/clean-audio-cache.ps1 +53 -0
- package/.claude/hooks-windows/download-extra-voices.ps1 +185 -0
- package/.claude/hooks-windows/effects-manager.ps1 +294 -0
- package/.claude/hooks-windows/language-manager.ps1 +193 -0
- package/.claude/hooks-windows/learn-manager.ps1 +241 -0
- package/.claude/hooks-windows/personality-manager.ps1 +266 -0
- package/.claude/hooks-windows/play-tts-piper.ps1 +209 -0
- package/.claude/hooks-windows/play-tts-sapi.ps1 +108 -0
- package/.claude/hooks-windows/play-tts-soprano.ps1 +159 -158
- package/.claude/hooks-windows/play-tts-windows-piper.ps1 +50 -5
- package/.claude/hooks-windows/play-tts-windows-sapi.ps1 +108 -108
- package/.claude/hooks-windows/play-tts.ps1 +344 -266
- package/.claude/hooks-windows/provider-manager.ps1 +29 -10
- package/.claude/hooks-windows/session-start-tts.ps1 +124 -124
- package/.claude/hooks-windows/soprano-gradio-synth.py +153 -153
- package/.claude/hooks-windows/speed-manager.ps1 +166 -0
- package/.claude/hooks-windows/verbosity-manager.ps1 +119 -0
- package/.claude/hooks-windows/voice-manager-windows.ps1 +92 -8
- package/.claude/output-styles/agent-vibes.md +202 -202
- package/.claude/personalities/angry.md +14 -14
- package/.claude/personalities/annoying.md +14 -14
- package/.claude/personalities/crass.md +14 -14
- package/.claude/personalities/dramatic.md +14 -14
- package/.claude/personalities/dry-humor.md +50 -50
- package/.claude/personalities/flirty.md +20 -20
- package/.claude/personalities/funny.md +14 -14
- package/.claude/personalities/grandpa.md +32 -32
- package/.claude/personalities/millennial.md +14 -14
- package/.claude/personalities/moody.md +14 -14
- package/.claude/personalities/normal.md +16 -16
- package/.claude/personalities/pirate.md +14 -14
- package/.claude/personalities/poetic.md +14 -14
- package/.claude/personalities/professional.md +14 -14
- package/.claude/personalities/rapper.md +55 -55
- package/.claude/personalities/robot.md +14 -14
- package/.claude/personalities/sarcastic.md +38 -38
- package/.claude/personalities/sassy.md +14 -14
- package/.claude/personalities/surfer-dude.md +14 -14
- package/.claude/personalities/zen.md +14 -14
- package/.claude/settings.json +15 -15
- package/.claude/verbosity.txt +1 -1
- package/.clawdbot/README.md +105 -105
- package/.clawdbot/skill/SKILL.md +241 -241
- package/.mcp.json +12 -0
- package/CLAUDE.md +170 -170
- package/README.md +2029 -2007
- package/RELEASE_NOTES.md +1310 -1203
- package/WINDOWS-SETUP.md +208 -208
- package/bin/agent-vibes +39 -39
- package/bin/agentvibes-voice-browser.js +1840 -1840
- package/bin/agentvibes.js +48 -2
- package/bin/mcp-server.js +121 -121
- package/bin/mcp-server.sh +206 -206
- package/bin/test-bmad-pr +78 -78
- package/mcp-server/QUICK_START.md +203 -203
- package/mcp-server/README.md +345 -345
- package/mcp-server/WINDOWS_SETUP.md +260 -260
- package/mcp-server/docs/troubleshooting-audio.md +313 -313
- package/mcp-server/examples/claude_desktop_config.json +11 -11
- package/mcp-server/examples/claude_desktop_config_piper.json +9 -9
- package/mcp-server/examples/custom_instructions.md +169 -169
- package/mcp-server/install-deps.js +130 -130
- package/mcp-server/pyproject.toml +52 -52
- package/mcp-server/requirements.txt +2 -2
- package/mcp-server/server.py +1465 -1453
- package/mcp-server/test_server.py +395 -395
- package/mcp-server/test_windows_script_parity.py +336 -0
- package/package.json +110 -110
- package/setup-windows.ps1 +815 -815
- package/src/bmad-detector.js +71 -71
- package/src/cli/list-personalities.js +110 -110
- package/src/cli/list-voices.js +114 -114
- package/src/commands/bmad-voices.js +394 -394
- package/src/commands/install-mcp.js +476 -476
- package/src/console/app.js +824 -824
- package/src/console/audio-env.js +20 -1
- package/src/console/brand-colors.js +13 -13
- package/src/console/constants/personalities.js +44 -44
- package/src/console/footer-config.js +50 -50
- package/src/console/modals/modal-overlay.js +247 -247
- package/src/console/navigation.js +62 -62
- package/src/console/tabs/agents-tab.js +1684 -1516
- package/src/console/tabs/help-tab.js +261 -261
- package/src/console/tabs/install-tab.js +1007 -991
- package/src/console/tabs/music-tab.js +22 -8
- package/src/console/tabs/placeholder-tab.js +53 -53
- package/src/console/tabs/readme-tab.js +267 -267
- package/src/console/tabs/receiver-tab.js +1472 -1212
- package/src/console/tabs/settings-tab.js +208 -84
- package/src/console/tabs/voices-tab.js +100 -21
- package/src/console/widgets/destroy-list.js +25 -25
- package/src/console/widgets/format-utils.js +89 -89
- package/src/console/widgets/notice.js +55 -55
- package/src/console/widgets/personality-picker.js +185 -185
- package/src/console/widgets/reverb-picker.js +94 -94
- package/src/console/widgets/track-picker.js +285 -285
- package/src/installer/music-file-input.js +304 -304
- package/src/installer.js +5895 -5829
- package/src/services/agent-voice-store.js +423 -423
- package/src/services/config-service.js +264 -264
- package/src/services/navigation-service.js +123 -123
- package/src/services/provider-service.js +143 -132
- package/src/services/verbosity-service.js +157 -157
- package/src/utils/audio-duration-validator.js +298 -298
- package/src/utils/audio-format-validator.js +277 -277
- package/src/utils/dependency-checker.js +469 -466
- package/src/utils/file-ownership-verifier.js +358 -358
- package/src/utils/list-formatter.js +194 -194
- package/src/utils/music-file-validator.js +285 -285
- package/src/utils/preview-list-prompt.js +136 -136
- package/src/utils/provider-validator.js +96 -12
- package/src/utils/secure-music-storage.js +412 -412
- package/templates/agentvibes-receiver.sh +482 -482
- package/templates/audio/welcome-music.mp3 +0 -0
- package/voice-assignments.json +8244 -8244
- package/.claude/config/background-music-position.txt +0 -1
|
@@ -1,356 +1,356 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
#
|
|
3
|
-
# File: .claude/hooks/play-tts-soprano.sh
|
|
4
|
-
#
|
|
5
|
-
# AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
|
|
6
|
-
# Website: https://agentvibes.org
|
|
7
|
-
# Repository: https://github.com/paulpreibisch/AgentVibes
|
|
8
|
-
#
|
|
9
|
-
# Co-created by Paul Preibisch with Claude AI
|
|
10
|
-
# Copyright (c) 2025 Paul Preibisch
|
|
11
|
-
#
|
|
12
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
13
|
-
# you may not use this file except in compliance with the License.
|
|
14
|
-
# You may obtain a copy of the License at
|
|
15
|
-
#
|
|
16
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
17
|
-
#
|
|
18
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
19
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
-
# See the License for the specific language governing permissions and
|
|
22
|
-
# limitations under the License.
|
|
23
|
-
#
|
|
24
|
-
# DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
25
|
-
# express or implied. Use at your own risk. See the Apache License for details.
|
|
26
|
-
#
|
|
27
|
-
# ---
|
|
28
|
-
#
|
|
29
|
-
# @fileoverview Soprano TTS Provider Implementation - Free, local, neural-quality TTS
|
|
30
|
-
# @context Provides ultra-lightweight on-device neural TTS via Soprano (80M params)
|
|
31
|
-
# @architecture Implements provider interface contract with 3 synthesis modes (WebUI/API/CLI)
|
|
32
|
-
# @dependencies soprano-tts (pip), soprano-gradio-synth.py, ffmpeg (optional padding), audio players
|
|
33
|
-
# @entrypoints Called by play-tts.sh router when provider=soprano
|
|
34
|
-
# @patterns Provider contract: text/voice → audio file path, auto-mode detection, Gradio SSE protocol
|
|
35
|
-
# @related play-tts.sh, soprano-gradio-synth.py, provider-manager.sh, GitHub Issue #94
|
|
36
|
-
#
|
|
37
|
-
# Supports three modes (auto-detected in priority order):
|
|
38
|
-
# 1. WebUI mode: Gradio WebUI running (soprano-webui), uses Python helper
|
|
39
|
-
# 2. API mode: OpenAI-compatible server (uvicorn soprano.server:app), uses curl
|
|
40
|
-
# 3. CLI mode: Direct `soprano` command — reloads model each call (slowest)
|
|
41
|
-
#
|
|
42
|
-
# Environment variables:
|
|
43
|
-
# SOPRANO_PORT — WebUI/API port (default: 7860)
|
|
44
|
-
# SOPRANO_DEVICE — Device for CLI mode: auto|cuda|cpu|mps (default: auto)
|
|
45
|
-
#
|
|
46
|
-
|
|
47
|
-
# Fix locale warnings
|
|
48
|
-
export LC_ALL=C
|
|
49
|
-
|
|
50
|
-
TEXT="$1"
|
|
51
|
-
VOICE_OVERRIDE="$2" # Ignored — Soprano has a single voice, kept for provider contract
|
|
52
|
-
|
|
53
|
-
# Strip emojis, asterisks, and markdown formatting
|
|
54
|
-
TEXT=$(printf '%s' "$TEXT" | perl -CSD -pe '
|
|
55
|
-
s/[\x{1F300}-\x{1F9FF}]//g;
|
|
56
|
-
s/[\x{2600}-\x{27BF}]//g;
|
|
57
|
-
s/[\x{FE00}-\x{FE0F}]//g;
|
|
58
|
-
s/[\x{200D}]//g;
|
|
59
|
-
s/[\x{2500}-\x{257F}]//g;
|
|
60
|
-
s/[\x{2580}-\x{259F}]//g;
|
|
61
|
-
s/\*+//g; s/#+\s*//g; s/`//g; s/~+//g; s/^\s*[-]\s*//g;
|
|
62
|
-
')
|
|
63
|
-
|
|
64
|
-
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
65
|
-
source "$SCRIPT_DIR/audio-cache-utils.sh"
|
|
66
|
-
|
|
67
|
-
SOPRANO_PORT="${SOPRANO_PORT:-7860}"
|
|
68
|
-
SOPRANO_DEVICE="${SOPRANO_DEVICE:-auto}"
|
|
69
|
-
|
|
70
|
-
# @function validate_inputs
|
|
71
|
-
# @intent Check required parameters
|
|
72
|
-
# @why Fail fast with clear errors if inputs missing
|
|
73
|
-
# @exitcode 1=missing text
|
|
74
|
-
if [[ -z "$TEXT" ]]; then
|
|
75
|
-
echo "Usage: $0 \"text to speak\" [voice_override]"
|
|
76
|
-
exit 1
|
|
77
|
-
fi
|
|
78
|
-
|
|
79
|
-
# @function check_webui_server
|
|
80
|
-
# @intent Detect if Soprano Gradio WebUI is reachable
|
|
81
|
-
# @why WebUI mode keeps model in memory for fastest repeated synthesis
|
|
82
|
-
# @returns exitcode 0=reachable, 1=not reachable
|
|
83
|
-
check_webui_server() {
|
|
84
|
-
curl -sf --max-time 2 "http://127.0.0.1:${SOPRANO_PORT}/gradio_api/info" -o /dev/null 2>/dev/null ||
|
|
85
|
-
curl -sf --max-time 2 "http://127.0.0.1:${SOPRANO_PORT}/info" -o /dev/null 2>/dev/null
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
# @function check_api_server
|
|
89
|
-
# @intent Detect if Soprano OpenAI-compatible API server is reachable
|
|
90
|
-
# @why API mode is simpler than WebUI (direct WAV response, no SSE polling)
|
|
91
|
-
# @returns exitcode 0=reachable, 1=not reachable
|
|
92
|
-
check_api_server() {
|
|
93
|
-
curl -sf --max-time 2 "http://127.0.0.1:${SOPRANO_PORT}/v1/audio/speech" \
|
|
94
|
-
-H "Content-Type: application/json" \
|
|
95
|
-
-d '{"input":"test"}' -o /dev/null 2>/dev/null
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
# @function check_soprano_available
|
|
99
|
-
# @intent Verify at least one synthesis mode is available
|
|
100
|
-
# @why Provide helpful installation instructions if nothing works
|
|
101
|
-
# @exitcode 2=soprano not installed and no server running
|
|
102
|
-
if ! command -v soprano &>/dev/null && ! check_webui_server && ! check_api_server; then
|
|
103
|
-
echo "❌ Error: Soprano TTS not installed and no server running on port $SOPRANO_PORT"
|
|
104
|
-
echo ""
|
|
105
|
-
echo "Install: pip install soprano-tts"
|
|
106
|
-
echo " (GPU): pip install soprano-tts[lmdeploy]"
|
|
107
|
-
echo ""
|
|
108
|
-
echo "Start WebUI: soprano-webui"
|
|
109
|
-
echo "Start API: uvicorn soprano.server:app --host 127.0.0.1 --port $SOPRANO_PORT"
|
|
110
|
-
exit 2
|
|
111
|
-
fi
|
|
112
|
-
|
|
113
|
-
# @function determine_audio_directory
|
|
114
|
-
# @intent Find appropriate directory for audio file storage
|
|
115
|
-
# @why Supports project-local and global storage
|
|
116
|
-
# @returns Sets $AUDIO_DIR global variable
|
|
117
|
-
# SECURITY: Canonicalize path to prevent traversal (#128)
|
|
118
|
-
if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
|
|
119
|
-
CLAUDE_PROJECT_DIR=$(cd "${CLAUDE_PROJECT_DIR}" 2>/dev/null && pwd -P) || CLAUDE_PROJECT_DIR=""
|
|
120
|
-
fi
|
|
121
|
-
if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
|
|
122
|
-
AUDIO_DIR="$CLAUDE_PROJECT_DIR/.claude/audio"
|
|
123
|
-
else
|
|
124
|
-
CURRENT_DIR="$PWD"
|
|
125
|
-
while [[ "$CURRENT_DIR" != "/" ]]; do
|
|
126
|
-
if [[ -d "$CURRENT_DIR/.claude" ]]; then
|
|
127
|
-
AUDIO_DIR="$CURRENT_DIR/.claude/audio"
|
|
128
|
-
break
|
|
129
|
-
fi
|
|
130
|
-
CURRENT_DIR=$(dirname "$CURRENT_DIR")
|
|
131
|
-
done
|
|
132
|
-
if [[ -z "$AUDIO_DIR" ]]; then
|
|
133
|
-
AUDIO_DIR="$HOME/.claude/audio"
|
|
134
|
-
fi
|
|
135
|
-
fi
|
|
136
|
-
|
|
137
|
-
mkdir -p "$AUDIO_DIR"
|
|
138
|
-
# SECURITY: Use mktemp for unpredictable filenames (#130)
|
|
139
|
-
TEMP_FILE=$(mktemp "$AUDIO_DIR/tts-XXXXXX.wav")
|
|
140
|
-
|
|
141
|
-
# @function synthesize_speech
|
|
142
|
-
# @intent Generate speech using best available Soprano mode
|
|
143
|
-
# @why Auto-detect WebUI → API → CLI for optimal performance
|
|
144
|
-
# @param Uses globals: $TEXT, $SOPRANO_PORT, $SOPRANO_DEVICE
|
|
145
|
-
# @returns Creates WAV file at $TEMP_FILE, sets $SYNTH_MODE
|
|
146
|
-
# @exitcode 4=synthesis error
|
|
147
|
-
SYNTH_MODE=""
|
|
148
|
-
|
|
149
|
-
if check_webui_server; then
|
|
150
|
-
# Gradio WebUI mode — use Python helper for SSE protocol
|
|
151
|
-
SYNTH_MODE="webui"
|
|
152
|
-
python3 "$SCRIPT_DIR/soprano-gradio-synth.py" "$TEXT" "$TEMP_FILE" "$SOPRANO_PORT" 2>/dev/null
|
|
153
|
-
elif check_api_server; then
|
|
154
|
-
# OpenAI-compatible API mode — direct curl
|
|
155
|
-
SYNTH_MODE="api"
|
|
156
|
-
# SECURITY: Use proper JSON encoding to prevent injection (#133)
|
|
157
|
-
_JSON_PAYLOAD=$(printf '%s' "$TEXT" | python3 -c 'import sys,json; print(json.dumps({"input":sys.stdin.read()}))' 2>/dev/null) || \
|
|
158
|
-
_JSON_PAYLOAD=$(printf '{"input":"%s"}' "$(printf '%s' "$TEXT" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\t/\\t/g')")
|
|
159
|
-
curl -sf "http://127.0.0.1:${SOPRANO_PORT}/v1/audio/speech" \
|
|
160
|
-
-H "Content-Type: application/json" \
|
|
161
|
-
-d "$_JSON_PAYLOAD" \
|
|
162
|
-
--output "$TEMP_FILE" 2>/dev/null
|
|
163
|
-
else
|
|
164
|
-
# CLI fallback — reloads model each call (slowest)
|
|
165
|
-
SYNTH_MODE="cli"
|
|
166
|
-
soprano "$TEXT" -o "$TEMP_FILE" -d "$SOPRANO_DEVICE" 2>/dev/null
|
|
167
|
-
fi
|
|
168
|
-
|
|
169
|
-
if [[ ! -f "$TEMP_FILE" ]] || [[ ! -s "$TEMP_FILE" ]]; then
|
|
170
|
-
echo "❌ Failed to synthesize speech with Soprano ($SYNTH_MODE mode)"
|
|
171
|
-
[[ "$SYNTH_MODE" == "webui" ]] && echo " Try: python3 $SCRIPT_DIR/soprano-gradio-synth.py \"test\" /tmp/test.wav $SOPRANO_PORT"
|
|
172
|
-
exit 4
|
|
173
|
-
fi
|
|
174
|
-
|
|
175
|
-
# @function detect_remote_session
|
|
176
|
-
# @intent Auto-detect SSH/RDP sessions and enable audio compression
|
|
177
|
-
# @why Remote desktop audio is choppy without compression
|
|
178
|
-
# @returns Sets AGENTVIBES_RDP_MODE environment variable
|
|
179
|
-
if [[ -z "${AGENTVIBES_RDP_MODE:-}" ]]; then
|
|
180
|
-
if [[ -n "${SSH_CLIENT:-}" ]] || [[ -n "${SSH_TTY:-}" ]] || [[ "${DISPLAY:-}" =~ ^localhost:.* ]]; then
|
|
181
|
-
export AGENTVIBES_RDP_MODE=true
|
|
182
|
-
echo "🌐 Remote session detected - enabling audio compression"
|
|
183
|
-
fi
|
|
184
|
-
fi
|
|
185
|
-
|
|
186
|
-
# @function compress_for_remote
|
|
187
|
-
# @intent Compress TTS audio for remote sessions (SSH/RDP)
|
|
188
|
-
# @why Reduces bandwidth and prevents choppy playback
|
|
189
|
-
if [[ "${AGENTVIBES_RDP_MODE:-false}" == "true" ]] && command -v ffmpeg &>/dev/null; then
|
|
190
|
-
COMPRESSED_FILE=$(mktemp "$AUDIO_DIR/tts-compressed-XXXXXX.wav")
|
|
191
|
-
ffmpeg -i "$TEMP_FILE" -ac 1 -ar 22050 -b:a 64k -y "$COMPRESSED_FILE" 2>/dev/null
|
|
192
|
-
if [[ -f "$COMPRESSED_FILE" ]]; then
|
|
193
|
-
rm -f "$TEMP_FILE"
|
|
194
|
-
TEMP_FILE="$COMPRESSED_FILE"
|
|
195
|
-
fi
|
|
196
|
-
fi
|
|
197
|
-
|
|
198
|
-
# @function add_silence_padding
|
|
199
|
-
# @intent Add silence to prevent WSL audio static
|
|
200
|
-
# @why WSL audio subsystem cuts off first ~200ms
|
|
201
|
-
if command -v ffmpeg &>/dev/null; then
|
|
202
|
-
PADDED_FILE=$(mktemp "$AUDIO_DIR/tts-padded-XXXXXX.wav")
|
|
203
|
-
ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo:d=0.2 -i "$TEMP_FILE" \
|
|
204
|
-
-filter_complex "[0:a][1:a]concat=n=2:v=0:a=1[out]" \
|
|
205
|
-
-map "[out]" -y "$PADDED_FILE" 2>/dev/null
|
|
206
|
-
if [[ -f "$PADDED_FILE" ]]; then
|
|
207
|
-
rm -f "$TEMP_FILE"
|
|
208
|
-
TEMP_FILE="$PADDED_FILE"
|
|
209
|
-
fi
|
|
210
|
-
fi
|
|
211
|
-
|
|
212
|
-
# @function apply_audio_effects
|
|
213
|
-
# @intent Apply sox effects and background music via audio-processor.sh
|
|
214
|
-
# @param Uses global: $TEMP_FILE
|
|
215
|
-
# @returns Updates $TEMP_FILE to processed version
|
|
216
|
-
BACKGROUND_MUSIC=""
|
|
217
|
-
if [[ -f "$SCRIPT_DIR/audio-processor.sh" ]]; then
|
|
218
|
-
PROCESSED_FILE="$AUDIO_DIR/tts-processed-$(date +%s).wav"
|
|
219
|
-
PROCESSOR_OUTPUT=$("$SCRIPT_DIR/audio-processor.sh" "$TEMP_FILE" "default" "$PROCESSED_FILE" 2>/dev/null) || {
|
|
220
|
-
PROCESSED_FILE="$TEMP_FILE"
|
|
221
|
-
PROCESSOR_OUTPUT="$TEMP_FILE|"
|
|
222
|
-
}
|
|
223
|
-
PROCESSED_FILE="${PROCESSOR_OUTPUT%%|*}"
|
|
224
|
-
BACKGROUND_MUSIC="${PROCESSOR_OUTPUT##*|}"
|
|
225
|
-
if [[ -f "$PROCESSED_FILE" ]] && [[ "$PROCESSED_FILE" != "$TEMP_FILE" ]]; then
|
|
226
|
-
rm -f "$TEMP_FILE"
|
|
227
|
-
TEMP_FILE="$PROCESSED_FILE"
|
|
228
|
-
fi
|
|
229
|
-
fi
|
|
230
|
-
|
|
231
|
-
# @function play_audio
|
|
232
|
-
# @intent Play generated audio using available player with sequential playback
|
|
233
|
-
# @why Support multiple audio players and prevent overlapping audio
|
|
234
|
-
# SECURITY: Use user-isolated lock directory (#129)
|
|
235
|
-
_LOCK_DIR="${XDG_RUNTIME_DIR:-/tmp/agentvibes-$(id -u)}"
|
|
236
|
-
mkdir -p "$_LOCK_DIR"
|
|
237
|
-
chmod 700 "$_LOCK_DIR"
|
|
238
|
-
LOCK_FILE="$_LOCK_DIR/agentvibes-audio.lock"
|
|
239
|
-
|
|
240
|
-
# Auto-remove stale lock files (older than 30 seconds)
|
|
241
|
-
if [ -f "$LOCK_FILE" ]; then
|
|
242
|
-
if [[ "$(uname)" == "Darwin" ]]; then
|
|
243
|
-
_lock_mtime=$(stat -f %m "$LOCK_FILE" 2>/dev/null || echo 0)
|
|
244
|
-
else
|
|
245
|
-
_lock_mtime=$(stat -c %Y "$LOCK_FILE" 2>/dev/null || echo 0)
|
|
246
|
-
fi
|
|
247
|
-
_lock_age=$(( $(date +%s) - _lock_mtime ))
|
|
248
|
-
if [[ $_lock_age -gt 30 ]]; then
|
|
249
|
-
rm -f "$LOCK_FILE"
|
|
250
|
-
fi
|
|
251
|
-
fi
|
|
252
|
-
|
|
253
|
-
for i in {1..4}; do
|
|
254
|
-
if [ ! -f "$LOCK_FILE" ]; then
|
|
255
|
-
break
|
|
256
|
-
fi
|
|
257
|
-
sleep 0.5
|
|
258
|
-
done
|
|
259
|
-
|
|
260
|
-
if [ -f "$LOCK_FILE" ]; then
|
|
261
|
-
echo "⏭️ Skipping TTS (previous audio still playing)" >&2
|
|
262
|
-
exit 0
|
|
263
|
-
fi
|
|
264
|
-
|
|
265
|
-
touch "$LOCK_FILE"
|
|
266
|
-
|
|
267
|
-
AUDIO_DIR_PLAY="${TEMP_FILE%/*}"
|
|
268
|
-
WRITE_LOCK_FILE="$AUDIO_DIR_PLAY/$(basename "$TEMP_FILE" .wav).lock"
|
|
269
|
-
touch "$WRITE_LOCK_FILE"
|
|
270
|
-
|
|
271
|
-
DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null)
|
|
272
|
-
DURATION=${DURATION%.*}
|
|
273
|
-
DURATION=${DURATION:-1}
|
|
274
|
-
|
|
275
|
-
if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && [[ "${AGENTVIBES_NO_PLAYBACK:-false}" != "true" ]]; then
|
|
276
|
-
if [[ "$(uname -s)" == "Darwin" ]]; then
|
|
277
|
-
afplay "$TEMP_FILE" >/dev/null 2>&1 &
|
|
278
|
-
PLAYER_PID=$!
|
|
279
|
-
elif [[ -n "${TERMUX_VERSION:-}" ]] || [[ -d "/data/data/com.termux" ]]; then
|
|
280
|
-
termux-media-player play "$TEMP_FILE" >/dev/null 2>&1 &
|
|
281
|
-
PLAYER_PID=$!
|
|
282
|
-
else
|
|
283
|
-
(paplay "$TEMP_FILE" || mpv "$TEMP_FILE" || aplay "$TEMP_FILE") >/dev/null 2>&1 &
|
|
284
|
-
PLAYER_PID=$!
|
|
285
|
-
fi
|
|
286
|
-
fi
|
|
287
|
-
|
|
288
|
-
(sleep $DURATION; rm -f "$LOCK_FILE" "$WRITE_LOCK_FILE") &
|
|
289
|
-
disown
|
|
290
|
-
|
|
291
|
-
# @function display_cache_stats
|
|
292
|
-
# @intent Show audio cache statistics with color-coded output
|
|
293
|
-
AUDIO_DIR_PATH=$(get_audio_dir)
|
|
294
|
-
|
|
295
|
-
BLUE='\033[0;34m'
|
|
296
|
-
YELLOW='\033[1;33m'
|
|
297
|
-
PURPLE='\033[0;35m'
|
|
298
|
-
RED='\033[0;31m'
|
|
299
|
-
GREEN='\033[0;32m'
|
|
300
|
-
ORANGE='\033[0;33m'
|
|
301
|
-
WHITE='\033[1;37m'
|
|
302
|
-
CYAN='\033[0;36m'
|
|
303
|
-
GOLD='\033[38;5;226m'
|
|
304
|
-
NC='\033[0m'
|
|
305
|
-
|
|
306
|
-
AUTO_CLEAN_THRESHOLD=$(get_auto_clean_threshold)
|
|
307
|
-
INITIAL_SIZE=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
|
|
308
|
-
if [[ $INITIAL_SIZE -gt $((AUTO_CLEAN_THRESHOLD * 1048576)) ]]; then
|
|
309
|
-
DELETED=$(auto_clean_old_files "$AUDIO_DIR_PATH" "$AUTO_CLEAN_THRESHOLD")
|
|
310
|
-
if [[ $DELETED -gt 0 ]]; then
|
|
311
|
-
echo -e "${ORANGE}🧹 Auto-cleaned $DELETED old files${NC}"
|
|
312
|
-
fi
|
|
313
|
-
fi
|
|
314
|
-
|
|
315
|
-
FILE_COUNT=$(count_tts_files "$AUDIO_DIR_PATH")
|
|
316
|
-
SIZE_BYTES=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
|
|
317
|
-
SIZE_HUMAN=$(bytes_to_human "$SIZE_BYTES")
|
|
318
|
-
|
|
319
|
-
CACHE_COLOR=$GREEN
|
|
320
|
-
if [[ $SIZE_BYTES -gt 3221225472 ]]; then
|
|
321
|
-
CACHE_COLOR=$RED
|
|
322
|
-
elif [[ $SIZE_BYTES -gt 524288000 ]]; then
|
|
323
|
-
CACHE_COLOR=$YELLOW
|
|
324
|
-
fi
|
|
325
|
-
|
|
326
|
-
echo -e "${WHITE}💾 Saved to:${NC} ${CYAN}$TEMP_FILE${NC} ${YELLOW}$FILE_COUNT${NC} ${WHITE}🗄️${NC} ${CACHE_COLOR}$SIZE_HUMAN${NC} ${WHITE}🧹${NC}${GOLD}[${AUTO_CLEAN_THRESHOLD}mb]${NC}"
|
|
327
|
-
|
|
328
|
-
if [[ -n "$BACKGROUND_MUSIC" ]]; then
|
|
329
|
-
MUSIC_FILENAME=$(basename "$BACKGROUND_MUSIC")
|
|
330
|
-
echo -e "${WHITE}🎵 Background music:${NC} ${PURPLE}$MUSIC_FILENAME${NC}"
|
|
331
|
-
fi
|
|
332
|
-
echo -e "${WHITE}🎤 Voice:${NC} ${BLUE}Soprano-1.1-80M${NC} ${WHITE}(Soprano TTS, ${SYNTH_MODE} mode)${NC}"
|
|
333
|
-
|
|
334
|
-
# Show personality if configured
|
|
335
|
-
PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$SCRIPT_DIR/../.." && pwd)}"
|
|
336
|
-
PERSONALITY=$(cat "$PROJECT_ROOT/.claude/tts-personality.txt" 2>/dev/null || cat "$HOME/.claude/tts-personality.txt" 2>/dev/null || echo "")
|
|
337
|
-
if [[ -n "$PERSONALITY" ]] && [[ "$PERSONALITY" != "none" ]] && [[ "$PERSONALITY" != "normal" ]]; then
|
|
338
|
-
echo -e "${WHITE}💫 Personality:${NC} ${YELLOW}$PERSONALITY${NC}"
|
|
339
|
-
fi
|
|
340
|
-
|
|
341
|
-
if [[ -d "$AUDIO_DIR_PATH" ]]; then
|
|
342
|
-
AUDIO_SIZE=$(du -sm "$AUDIO_DIR_PATH" 2>/dev/null | cut -f1)
|
|
343
|
-
if [[ -n "$AUDIO_SIZE" ]] && [[ "$AUDIO_SIZE" -gt 100 ]]; then
|
|
344
|
-
echo -e "\033[0;31m⚠️ Audio cache is ${AUDIO_SIZE}MB - Run: /agent-vibes:cleanup\033[0m"
|
|
345
|
-
fi
|
|
346
|
-
fi
|
|
347
|
-
|
|
348
|
-
# Background music status
|
|
349
|
-
if [[ -z "$BACKGROUND_MUSIC" ]]; then
|
|
350
|
-
BACKGROUND_ENABLED_FILE="$PROJECT_ROOT/.claude/config/background-music-enabled.txt"
|
|
351
|
-
if [[ -f "$BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$BACKGROUND_ENABLED_FILE" 2>/dev/null; then
|
|
352
|
-
echo -e "${WHITE}🎵 Background music:${NC} ${PURPLE}Enabled but not playing (check config)${NC}"
|
|
353
|
-
else
|
|
354
|
-
echo -e "${WHITE}🎵 Background music:${NC} ${PURPLE}Disabled${NC}"
|
|
355
|
-
fi
|
|
356
|
-
fi
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# File: .claude/hooks/play-tts-soprano.sh
|
|
4
|
+
#
|
|
5
|
+
# AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
|
|
6
|
+
# Website: https://agentvibes.org
|
|
7
|
+
# Repository: https://github.com/paulpreibisch/AgentVibes
|
|
8
|
+
#
|
|
9
|
+
# Co-created by Paul Preibisch with Claude AI
|
|
10
|
+
# Copyright (c) 2025 Paul Preibisch
|
|
11
|
+
#
|
|
12
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
13
|
+
# you may not use this file except in compliance with the License.
|
|
14
|
+
# You may obtain a copy of the License at
|
|
15
|
+
#
|
|
16
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
17
|
+
#
|
|
18
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
# See the License for the specific language governing permissions and
|
|
22
|
+
# limitations under the License.
|
|
23
|
+
#
|
|
24
|
+
# DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
25
|
+
# express or implied. Use at your own risk. See the Apache License for details.
|
|
26
|
+
#
|
|
27
|
+
# ---
|
|
28
|
+
#
|
|
29
|
+
# @fileoverview Soprano TTS Provider Implementation - Free, local, neural-quality TTS
|
|
30
|
+
# @context Provides ultra-lightweight on-device neural TTS via Soprano (80M params)
|
|
31
|
+
# @architecture Implements provider interface contract with 3 synthesis modes (WebUI/API/CLI)
|
|
32
|
+
# @dependencies soprano-tts (pip), soprano-gradio-synth.py, ffmpeg (optional padding), audio players
|
|
33
|
+
# @entrypoints Called by play-tts.sh router when provider=soprano
|
|
34
|
+
# @patterns Provider contract: text/voice → audio file path, auto-mode detection, Gradio SSE protocol
|
|
35
|
+
# @related play-tts.sh, soprano-gradio-synth.py, provider-manager.sh, GitHub Issue #94
|
|
36
|
+
#
|
|
37
|
+
# Supports three modes (auto-detected in priority order):
|
|
38
|
+
# 1. WebUI mode: Gradio WebUI running (soprano-webui), uses Python helper
|
|
39
|
+
# 2. API mode: OpenAI-compatible server (uvicorn soprano.server:app), uses curl
|
|
40
|
+
# 3. CLI mode: Direct `soprano` command — reloads model each call (slowest)
|
|
41
|
+
#
|
|
42
|
+
# Environment variables:
|
|
43
|
+
# SOPRANO_PORT — WebUI/API port (default: 7860)
|
|
44
|
+
# SOPRANO_DEVICE — Device for CLI mode: auto|cuda|cpu|mps (default: auto)
|
|
45
|
+
#
|
|
46
|
+
|
|
47
|
+
# Fix locale warnings
|
|
48
|
+
export LC_ALL=C
|
|
49
|
+
|
|
50
|
+
TEXT="$1"
|
|
51
|
+
VOICE_OVERRIDE="$2" # Ignored — Soprano has a single voice, kept for provider contract
|
|
52
|
+
|
|
53
|
+
# Strip emojis, asterisks, and markdown formatting
|
|
54
|
+
TEXT=$(printf '%s' "$TEXT" | perl -CSD -pe '
|
|
55
|
+
s/[\x{1F300}-\x{1F9FF}]//g;
|
|
56
|
+
s/[\x{2600}-\x{27BF}]//g;
|
|
57
|
+
s/[\x{FE00}-\x{FE0F}]//g;
|
|
58
|
+
s/[\x{200D}]//g;
|
|
59
|
+
s/[\x{2500}-\x{257F}]//g;
|
|
60
|
+
s/[\x{2580}-\x{259F}]//g;
|
|
61
|
+
s/\*+//g; s/#+\s*//g; s/`//g; s/~+//g; s/^\s*[-]\s*//g;
|
|
62
|
+
')
|
|
63
|
+
|
|
64
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
65
|
+
source "$SCRIPT_DIR/audio-cache-utils.sh"
|
|
66
|
+
|
|
67
|
+
SOPRANO_PORT="${SOPRANO_PORT:-7860}"
|
|
68
|
+
SOPRANO_DEVICE="${SOPRANO_DEVICE:-auto}"
|
|
69
|
+
|
|
70
|
+
# @function validate_inputs
|
|
71
|
+
# @intent Check required parameters
|
|
72
|
+
# @why Fail fast with clear errors if inputs missing
|
|
73
|
+
# @exitcode 1=missing text
|
|
74
|
+
if [[ -z "$TEXT" ]]; then
|
|
75
|
+
echo "Usage: $0 \"text to speak\" [voice_override]"
|
|
76
|
+
exit 1
|
|
77
|
+
fi
|
|
78
|
+
|
|
79
|
+
# @function check_webui_server
|
|
80
|
+
# @intent Detect if Soprano Gradio WebUI is reachable
|
|
81
|
+
# @why WebUI mode keeps model in memory for fastest repeated synthesis
|
|
82
|
+
# @returns exitcode 0=reachable, 1=not reachable
|
|
83
|
+
check_webui_server() {
|
|
84
|
+
curl -sf --max-time 2 "http://127.0.0.1:${SOPRANO_PORT}/gradio_api/info" -o /dev/null 2>/dev/null ||
|
|
85
|
+
curl -sf --max-time 2 "http://127.0.0.1:${SOPRANO_PORT}/info" -o /dev/null 2>/dev/null
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
# @function check_api_server
|
|
89
|
+
# @intent Detect if Soprano OpenAI-compatible API server is reachable
|
|
90
|
+
# @why API mode is simpler than WebUI (direct WAV response, no SSE polling)
|
|
91
|
+
# @returns exitcode 0=reachable, 1=not reachable
|
|
92
|
+
check_api_server() {
|
|
93
|
+
curl -sf --max-time 2 "http://127.0.0.1:${SOPRANO_PORT}/v1/audio/speech" \
|
|
94
|
+
-H "Content-Type: application/json" \
|
|
95
|
+
-d '{"input":"test"}' -o /dev/null 2>/dev/null
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
# @function check_soprano_available
|
|
99
|
+
# @intent Verify at least one synthesis mode is available
|
|
100
|
+
# @why Provide helpful installation instructions if nothing works
|
|
101
|
+
# @exitcode 2=soprano not installed and no server running
|
|
102
|
+
if ! command -v soprano &>/dev/null && ! check_webui_server && ! check_api_server; then
|
|
103
|
+
echo "❌ Error: Soprano TTS not installed and no server running on port $SOPRANO_PORT"
|
|
104
|
+
echo ""
|
|
105
|
+
echo "Install: pip install soprano-tts"
|
|
106
|
+
echo " (GPU): pip install soprano-tts[lmdeploy]"
|
|
107
|
+
echo ""
|
|
108
|
+
echo "Start WebUI: soprano-webui"
|
|
109
|
+
echo "Start API: uvicorn soprano.server:app --host 127.0.0.1 --port $SOPRANO_PORT"
|
|
110
|
+
exit 2
|
|
111
|
+
fi
|
|
112
|
+
|
|
113
|
+
# @function determine_audio_directory
|
|
114
|
+
# @intent Find appropriate directory for audio file storage
|
|
115
|
+
# @why Supports project-local and global storage
|
|
116
|
+
# @returns Sets $AUDIO_DIR global variable
|
|
117
|
+
# SECURITY: Canonicalize path to prevent traversal (#128)
|
|
118
|
+
if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
|
|
119
|
+
CLAUDE_PROJECT_DIR=$(cd "${CLAUDE_PROJECT_DIR}" 2>/dev/null && pwd -P) || CLAUDE_PROJECT_DIR=""
|
|
120
|
+
fi
|
|
121
|
+
if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
|
|
122
|
+
AUDIO_DIR="$CLAUDE_PROJECT_DIR/.claude/audio"
|
|
123
|
+
else
|
|
124
|
+
CURRENT_DIR="$PWD"
|
|
125
|
+
while [[ "$CURRENT_DIR" != "/" ]]; do
|
|
126
|
+
if [[ -d "$CURRENT_DIR/.claude" ]]; then
|
|
127
|
+
AUDIO_DIR="$CURRENT_DIR/.claude/audio"
|
|
128
|
+
break
|
|
129
|
+
fi
|
|
130
|
+
CURRENT_DIR=$(dirname "$CURRENT_DIR")
|
|
131
|
+
done
|
|
132
|
+
if [[ -z "$AUDIO_DIR" ]]; then
|
|
133
|
+
AUDIO_DIR="$HOME/.claude/audio"
|
|
134
|
+
fi
|
|
135
|
+
fi
|
|
136
|
+
|
|
137
|
+
mkdir -p "$AUDIO_DIR"
|
|
138
|
+
# SECURITY: Use mktemp for unpredictable filenames (#130)
|
|
139
|
+
TEMP_FILE=$(mktemp "$AUDIO_DIR/tts-XXXXXX.wav")
|
|
140
|
+
|
|
141
|
+
# @function synthesize_speech
|
|
142
|
+
# @intent Generate speech using best available Soprano mode
|
|
143
|
+
# @why Auto-detect WebUI → API → CLI for optimal performance
|
|
144
|
+
# @param Uses globals: $TEXT, $SOPRANO_PORT, $SOPRANO_DEVICE
|
|
145
|
+
# @returns Creates WAV file at $TEMP_FILE, sets $SYNTH_MODE
|
|
146
|
+
# @exitcode 4=synthesis error
|
|
147
|
+
SYNTH_MODE=""
|
|
148
|
+
|
|
149
|
+
if check_webui_server; then
|
|
150
|
+
# Gradio WebUI mode — use Python helper for SSE protocol
|
|
151
|
+
SYNTH_MODE="webui"
|
|
152
|
+
python3 "$SCRIPT_DIR/soprano-gradio-synth.py" "$TEXT" "$TEMP_FILE" "$SOPRANO_PORT" 2>/dev/null
|
|
153
|
+
elif check_api_server; then
|
|
154
|
+
# OpenAI-compatible API mode — direct curl
|
|
155
|
+
SYNTH_MODE="api"
|
|
156
|
+
# SECURITY: Use proper JSON encoding to prevent injection (#133)
|
|
157
|
+
_JSON_PAYLOAD=$(printf '%s' "$TEXT" | python3 -c 'import sys,json; print(json.dumps({"input":sys.stdin.read()}))' 2>/dev/null) || \
|
|
158
|
+
_JSON_PAYLOAD=$(printf '{"input":"%s"}' "$(printf '%s' "$TEXT" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\t/\\t/g')")
|
|
159
|
+
curl -sf "http://127.0.0.1:${SOPRANO_PORT}/v1/audio/speech" \
|
|
160
|
+
-H "Content-Type: application/json" \
|
|
161
|
+
-d "$_JSON_PAYLOAD" \
|
|
162
|
+
--output "$TEMP_FILE" 2>/dev/null
|
|
163
|
+
else
|
|
164
|
+
# CLI fallback — reloads model each call (slowest)
|
|
165
|
+
SYNTH_MODE="cli"
|
|
166
|
+
soprano "$TEXT" -o "$TEMP_FILE" -d "$SOPRANO_DEVICE" 2>/dev/null
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
if [[ ! -f "$TEMP_FILE" ]] || [[ ! -s "$TEMP_FILE" ]]; then
|
|
170
|
+
echo "❌ Failed to synthesize speech with Soprano ($SYNTH_MODE mode)"
|
|
171
|
+
[[ "$SYNTH_MODE" == "webui" ]] && echo " Try: python3 $SCRIPT_DIR/soprano-gradio-synth.py \"test\" /tmp/test.wav $SOPRANO_PORT"
|
|
172
|
+
exit 4
|
|
173
|
+
fi
|
|
174
|
+
|
|
175
|
+
# @function detect_remote_session
|
|
176
|
+
# @intent Auto-detect SSH/RDP sessions and enable audio compression
|
|
177
|
+
# @why Remote desktop audio is choppy without compression
|
|
178
|
+
# @returns Sets AGENTVIBES_RDP_MODE environment variable
|
|
179
|
+
if [[ -z "${AGENTVIBES_RDP_MODE:-}" ]]; then
|
|
180
|
+
if [[ -n "${SSH_CLIENT:-}" ]] || [[ -n "${SSH_TTY:-}" ]] || [[ "${DISPLAY:-}" =~ ^localhost:.* ]]; then
|
|
181
|
+
export AGENTVIBES_RDP_MODE=true
|
|
182
|
+
echo "🌐 Remote session detected - enabling audio compression"
|
|
183
|
+
fi
|
|
184
|
+
fi
|
|
185
|
+
|
|
186
|
+
# @function compress_for_remote
|
|
187
|
+
# @intent Compress TTS audio for remote sessions (SSH/RDP)
|
|
188
|
+
# @why Reduces bandwidth and prevents choppy playback
|
|
189
|
+
if [[ "${AGENTVIBES_RDP_MODE:-false}" == "true" ]] && command -v ffmpeg &>/dev/null; then
|
|
190
|
+
COMPRESSED_FILE=$(mktemp "$AUDIO_DIR/tts-compressed-XXXXXX.wav")
|
|
191
|
+
ffmpeg -i "$TEMP_FILE" -ac 1 -ar 22050 -b:a 64k -y "$COMPRESSED_FILE" 2>/dev/null
|
|
192
|
+
if [[ -f "$COMPRESSED_FILE" ]]; then
|
|
193
|
+
rm -f "$TEMP_FILE"
|
|
194
|
+
TEMP_FILE="$COMPRESSED_FILE"
|
|
195
|
+
fi
|
|
196
|
+
fi
|
|
197
|
+
|
|
198
|
+
# @function add_silence_padding
|
|
199
|
+
# @intent Add silence to prevent WSL audio static
|
|
200
|
+
# @why WSL audio subsystem cuts off first ~200ms
|
|
201
|
+
if command -v ffmpeg &>/dev/null; then
|
|
202
|
+
PADDED_FILE=$(mktemp "$AUDIO_DIR/tts-padded-XXXXXX.wav")
|
|
203
|
+
ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo:d=0.2 -i "$TEMP_FILE" \
|
|
204
|
+
-filter_complex "[0:a][1:a]concat=n=2:v=0:a=1[out]" \
|
|
205
|
+
-map "[out]" -y "$PADDED_FILE" 2>/dev/null
|
|
206
|
+
if [[ -f "$PADDED_FILE" ]]; then
|
|
207
|
+
rm -f "$TEMP_FILE"
|
|
208
|
+
TEMP_FILE="$PADDED_FILE"
|
|
209
|
+
fi
|
|
210
|
+
fi
|
|
211
|
+
|
|
212
|
+
# @function apply_audio_effects
|
|
213
|
+
# @intent Apply sox effects and background music via audio-processor.sh
|
|
214
|
+
# @param Uses global: $TEMP_FILE
|
|
215
|
+
# @returns Updates $TEMP_FILE to processed version
|
|
216
|
+
BACKGROUND_MUSIC=""
|
|
217
|
+
if [[ -f "$SCRIPT_DIR/audio-processor.sh" ]]; then
|
|
218
|
+
PROCESSED_FILE="$AUDIO_DIR/tts-processed-$(date +%s).wav"
|
|
219
|
+
PROCESSOR_OUTPUT=$("$SCRIPT_DIR/audio-processor.sh" "$TEMP_FILE" "default" "$PROCESSED_FILE" 2>/dev/null) || {
|
|
220
|
+
PROCESSED_FILE="$TEMP_FILE"
|
|
221
|
+
PROCESSOR_OUTPUT="$TEMP_FILE|"
|
|
222
|
+
}
|
|
223
|
+
PROCESSED_FILE="${PROCESSOR_OUTPUT%%|*}"
|
|
224
|
+
BACKGROUND_MUSIC="${PROCESSOR_OUTPUT##*|}"
|
|
225
|
+
if [[ -f "$PROCESSED_FILE" ]] && [[ "$PROCESSED_FILE" != "$TEMP_FILE" ]]; then
|
|
226
|
+
rm -f "$TEMP_FILE"
|
|
227
|
+
TEMP_FILE="$PROCESSED_FILE"
|
|
228
|
+
fi
|
|
229
|
+
fi
|
|
230
|
+
|
|
231
|
+
# @function play_audio
|
|
232
|
+
# @intent Play generated audio using available player with sequential playback
|
|
233
|
+
# @why Support multiple audio players and prevent overlapping audio
|
|
234
|
+
# SECURITY: Use user-isolated lock directory (#129)
|
|
235
|
+
_LOCK_DIR="${XDG_RUNTIME_DIR:-/tmp/agentvibes-$(id -u)}"
|
|
236
|
+
mkdir -p "$_LOCK_DIR"
|
|
237
|
+
chmod 700 "$_LOCK_DIR"
|
|
238
|
+
LOCK_FILE="$_LOCK_DIR/agentvibes-audio.lock"
|
|
239
|
+
|
|
240
|
+
# Auto-remove stale lock files (older than 30 seconds)
|
|
241
|
+
if [ -f "$LOCK_FILE" ]; then
|
|
242
|
+
if [[ "$(uname)" == "Darwin" ]]; then
|
|
243
|
+
_lock_mtime=$(stat -f %m "$LOCK_FILE" 2>/dev/null || echo 0)
|
|
244
|
+
else
|
|
245
|
+
_lock_mtime=$(stat -c %Y "$LOCK_FILE" 2>/dev/null || echo 0)
|
|
246
|
+
fi
|
|
247
|
+
_lock_age=$(( $(date +%s) - _lock_mtime ))
|
|
248
|
+
if [[ $_lock_age -gt 30 ]]; then
|
|
249
|
+
rm -f "$LOCK_FILE"
|
|
250
|
+
fi
|
|
251
|
+
fi
|
|
252
|
+
|
|
253
|
+
for i in {1..4}; do
|
|
254
|
+
if [ ! -f "$LOCK_FILE" ]; then
|
|
255
|
+
break
|
|
256
|
+
fi
|
|
257
|
+
sleep 0.5
|
|
258
|
+
done
|
|
259
|
+
|
|
260
|
+
if [ -f "$LOCK_FILE" ]; then
|
|
261
|
+
echo "⏭️ Skipping TTS (previous audio still playing)" >&2
|
|
262
|
+
exit 0
|
|
263
|
+
fi
|
|
264
|
+
|
|
265
|
+
touch "$LOCK_FILE"
|
|
266
|
+
|
|
267
|
+
AUDIO_DIR_PLAY="${TEMP_FILE%/*}"
|
|
268
|
+
WRITE_LOCK_FILE="$AUDIO_DIR_PLAY/$(basename "$TEMP_FILE" .wav).lock"
|
|
269
|
+
touch "$WRITE_LOCK_FILE"
|
|
270
|
+
|
|
271
|
+
DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null)
|
|
272
|
+
DURATION=${DURATION%.*}
|
|
273
|
+
DURATION=${DURATION:-1}
|
|
274
|
+
|
|
275
|
+
if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && [[ "${AGENTVIBES_NO_PLAYBACK:-false}" != "true" ]]; then
|
|
276
|
+
if [[ "$(uname -s)" == "Darwin" ]]; then
|
|
277
|
+
afplay "$TEMP_FILE" >/dev/null 2>&1 &
|
|
278
|
+
PLAYER_PID=$!
|
|
279
|
+
elif [[ -n "${TERMUX_VERSION:-}" ]] || [[ -d "/data/data/com.termux" ]]; then
|
|
280
|
+
termux-media-player play "$TEMP_FILE" >/dev/null 2>&1 &
|
|
281
|
+
PLAYER_PID=$!
|
|
282
|
+
else
|
|
283
|
+
(paplay "$TEMP_FILE" || mpv "$TEMP_FILE" || aplay "$TEMP_FILE") >/dev/null 2>&1 &
|
|
284
|
+
PLAYER_PID=$!
|
|
285
|
+
fi
|
|
286
|
+
fi
|
|
287
|
+
|
|
288
|
+
(sleep $DURATION; rm -f "$LOCK_FILE" "$WRITE_LOCK_FILE") &
|
|
289
|
+
disown
|
|
290
|
+
|
|
291
|
+
# @function display_cache_stats
|
|
292
|
+
# @intent Show audio cache statistics with color-coded output
|
|
293
|
+
AUDIO_DIR_PATH=$(get_audio_dir)
|
|
294
|
+
|
|
295
|
+
BLUE='\033[0;34m'
|
|
296
|
+
YELLOW='\033[1;33m'
|
|
297
|
+
PURPLE='\033[0;35m'
|
|
298
|
+
RED='\033[0;31m'
|
|
299
|
+
GREEN='\033[0;32m'
|
|
300
|
+
ORANGE='\033[0;33m'
|
|
301
|
+
WHITE='\033[1;37m'
|
|
302
|
+
CYAN='\033[0;36m'
|
|
303
|
+
GOLD='\033[38;5;226m'
|
|
304
|
+
NC='\033[0m'
|
|
305
|
+
|
|
306
|
+
AUTO_CLEAN_THRESHOLD=$(get_auto_clean_threshold)
|
|
307
|
+
INITIAL_SIZE=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
|
|
308
|
+
if [[ $INITIAL_SIZE -gt $((AUTO_CLEAN_THRESHOLD * 1048576)) ]]; then
|
|
309
|
+
DELETED=$(auto_clean_old_files "$AUDIO_DIR_PATH" "$AUTO_CLEAN_THRESHOLD")
|
|
310
|
+
if [[ $DELETED -gt 0 ]]; then
|
|
311
|
+
echo -e "${ORANGE}🧹 Auto-cleaned $DELETED old files${NC}"
|
|
312
|
+
fi
|
|
313
|
+
fi
|
|
314
|
+
|
|
315
|
+
FILE_COUNT=$(count_tts_files "$AUDIO_DIR_PATH")
|
|
316
|
+
SIZE_BYTES=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
|
|
317
|
+
SIZE_HUMAN=$(bytes_to_human "$SIZE_BYTES")
|
|
318
|
+
|
|
319
|
+
CACHE_COLOR=$GREEN
|
|
320
|
+
if [[ $SIZE_BYTES -gt 3221225472 ]]; then
|
|
321
|
+
CACHE_COLOR=$RED
|
|
322
|
+
elif [[ $SIZE_BYTES -gt 524288000 ]]; then
|
|
323
|
+
CACHE_COLOR=$YELLOW
|
|
324
|
+
fi
|
|
325
|
+
|
|
326
|
+
echo -e "${WHITE}💾 Saved to:${NC} ${CYAN}$TEMP_FILE${NC} ${YELLOW}$FILE_COUNT${NC} ${WHITE}🗄️${NC} ${CACHE_COLOR}$SIZE_HUMAN${NC} ${WHITE}🧹${NC}${GOLD}[${AUTO_CLEAN_THRESHOLD}mb]${NC}"
|
|
327
|
+
|
|
328
|
+
if [[ -n "$BACKGROUND_MUSIC" ]]; then
|
|
329
|
+
MUSIC_FILENAME=$(basename "$BACKGROUND_MUSIC")
|
|
330
|
+
echo -e "${WHITE}🎵 Background music:${NC} ${PURPLE}$MUSIC_FILENAME${NC}"
|
|
331
|
+
fi
|
|
332
|
+
echo -e "${WHITE}🎤 Voice:${NC} ${BLUE}Soprano-1.1-80M${NC} ${WHITE}(Soprano TTS, ${SYNTH_MODE} mode)${NC}"
|
|
333
|
+
|
|
334
|
+
# Show personality if configured
|
|
335
|
+
PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$SCRIPT_DIR/../.." && pwd)}"
|
|
336
|
+
PERSONALITY=$(cat "$PROJECT_ROOT/.claude/tts-personality.txt" 2>/dev/null || cat "$HOME/.claude/tts-personality.txt" 2>/dev/null || echo "")
|
|
337
|
+
if [[ -n "$PERSONALITY" ]] && [[ "$PERSONALITY" != "none" ]] && [[ "$PERSONALITY" != "normal" ]]; then
|
|
338
|
+
echo -e "${WHITE}💫 Personality:${NC} ${YELLOW}$PERSONALITY${NC}"
|
|
339
|
+
fi
|
|
340
|
+
|
|
341
|
+
if [[ -d "$AUDIO_DIR_PATH" ]]; then
|
|
342
|
+
AUDIO_SIZE=$(du -sm "$AUDIO_DIR_PATH" 2>/dev/null | cut -f1)
|
|
343
|
+
if [[ -n "$AUDIO_SIZE" ]] && [[ "$AUDIO_SIZE" -gt 100 ]]; then
|
|
344
|
+
echo -e "\033[0;31m⚠️ Audio cache is ${AUDIO_SIZE}MB - Run: /agent-vibes:cleanup\033[0m"
|
|
345
|
+
fi
|
|
346
|
+
fi
|
|
347
|
+
|
|
348
|
+
# Background music status
|
|
349
|
+
if [[ -z "$BACKGROUND_MUSIC" ]]; then
|
|
350
|
+
BACKGROUND_ENABLED_FILE="$PROJECT_ROOT/.claude/config/background-music-enabled.txt"
|
|
351
|
+
if [[ -f "$BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$BACKGROUND_ENABLED_FILE" 2>/dev/null; then
|
|
352
|
+
echo -e "${WHITE}🎵 Background music:${NC} ${PURPLE}Enabled but not playing (check config)${NC}"
|
|
353
|
+
else
|
|
354
|
+
echo -e "${WHITE}🎵 Background music:${NC} ${PURPLE}Disabled${NC}"
|
|
355
|
+
fi
|
|
356
|
+
fi
|