agentvibes 3.3.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/config/audio-effects.cfg +1 -1
- package/.claude/config/background-music-position.txt +1 -26
- package/.claude/github-star-reminder.txt +1 -1
- package/.claude/hooks/audio-cache-utils.sh +0 -0
- package/.claude/hooks/clawdbot-receiver-SECURE.sh +0 -0
- package/.claude/hooks/play-tts-soprano.sh +320 -0
- package/.claude/hooks/play-tts.sh +6 -0
- package/.claude/hooks/provider-manager.sh +17 -0
- package/.claude/hooks/soprano-gradio-synth.py +139 -0
- package/.claude/piper-voices-dir.txt +1 -1
- package/.mcp.json +6 -28
- package/README.md +19 -15
- package/RELEASE_NOTES.md +199 -1
- package/package.json +1 -1
- package/src/installer.js +339 -47
- package/.claude/config/background-music-volume.txt +0 -1
- package/.claude/config/background-music.cfg +0 -1
- package/.claude/config/background-music.txt +0 -1
- package/.claude/config/tts-speech-rate.txt +0 -1
- package/.claude/config/tts-verbosity.txt +0 -1
- package/.claude/hooks/bmad-party-manager.sh +0 -225
- package/.claude/hooks/stop.sh +0 -38
|
@@ -49,4 +49,4 @@ BMad Master|reverb 50 60 100 pitch -100|agentvibes_soft_flamenco_loop.mp3|0.30
|
|
|
49
49
|
_party_mode|compand 0.3,1 6:-70,-60,-20|agent_vibes_dark_chill_step_loop.mp3|0.40
|
|
50
50
|
|||
|
|
51
51
|
# Default (no agent specified) - clean with Bachata background|||
|
|
52
|
-
default|reverb
|
|
52
|
+
default|reverb 20 50 50|agentvibes_soft_flamenco_loop.mp3|0.30
|
|
@@ -1,26 +1 @@
|
|
|
1
|
-
|
|
2
|
-
Agent Vibes Japanese City Pop v1.mp3:29.392744
|
|
3
|
-
Agent Vibes ChillWave v2.mp3:22.154467
|
|
4
|
-
Agent Vibes Bossa Nova v2.mp3:23.733424
|
|
5
|
-
Agent Vibes Tabla Dream Pop v1.mp3:19.101043
|
|
6
|
-
Agent Vibes Hawaiian slack key guitar v2.mp3:36.381950
|
|
7
|
-
AgentVibes Soft Flamenco.mp3:23.160000
|
|
8
|
-
Agent Vibes Arabic v2.mp3:21.922268
|
|
9
|
-
Agent Vibes Goa Trance v2.mp3:55.953741
|
|
10
|
-
Agent Vibes Ganawa Ambient v2.mp3:39.680205
|
|
11
|
-
Agent Vibes Celtic Harp v1.mp3:42.190476
|
|
12
|
-
Agent Vibes Harpsichord v2.mp3:21.739410
|
|
13
|
-
Agent Vibes Japanese City Pop v1-loop.mp3:13.917551
|
|
14
|
-
Agent Vibes Hawaiian slack key guitar v2-loop.mp3:12.977143
|
|
15
|
-
Agent Vibes Ganawa Ambient v2-loop.mp3:.00000000000000000002815996
|
|
16
|
-
Agent Vibes Tabla Dream Pop v1-loop.mp3:.00000000000000000009067943
|
|
17
|
-
Agent Vibes ChillWave v2-loop.mp3:.00000000000000000007080511
|
|
18
|
-
Agent Vibes Harpsichord v2-loop.mp3:.00000000000000000013140818
|
|
19
|
-
agent_vibes_japanese_city_pop_v1_loop.mp3:6.054512
|
|
20
|
-
agent_vibes_bossa_nova_v2_loop.mp3:5.369524
|
|
21
|
-
agent_vibes_salsa_v2_loop.mp3:9.972790
|
|
22
|
-
agent_vibes_cumbia_v1_loop.mp3:5.717823
|
|
23
|
-
agent_vibes_arabic_v2_loop.mp3:.00000000000000000006132724
|
|
24
|
-
agent_vibes_chillwave_v2_loop.mp3:14.628390
|
|
25
|
-
agent_vibes_bachata_v1_loop.mp3:.00000000000000000005344000
|
|
26
|
-
agentvibes_soft_flamenco_loop.mp3:.00000000000000000006934441
|
|
1
|
+
agentvibes_soft_flamenco_loop.mp3:9.624036
|
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
20260210
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# File: .claude/hooks/play-tts-soprano.sh
|
|
4
|
+
#
|
|
5
|
+
# AgentVibes - Finally, your AI Agents can Talk Back! Text-to-Speech WITH personality for AI Assistants!
|
|
6
|
+
# Website: https://agentvibes.org
|
|
7
|
+
# Repository: https://github.com/paulpreibisch/AgentVibes
|
|
8
|
+
#
|
|
9
|
+
# Co-created by Paul Preibisch with Claude AI
|
|
10
|
+
# Copyright (c) 2025 Paul Preibisch
|
|
11
|
+
#
|
|
12
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
13
|
+
# you may not use this file except in compliance with the License.
|
|
14
|
+
# You may obtain a copy of the License at
|
|
15
|
+
#
|
|
16
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
17
|
+
#
|
|
18
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
# See the License for the specific language governing permissions and
|
|
22
|
+
# limitations under the License.
|
|
23
|
+
#
|
|
24
|
+
# DISCLAIMER: This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
25
|
+
# express or implied. Use at your own risk. See the Apache License for details.
|
|
26
|
+
#
|
|
27
|
+
# ---
|
|
28
|
+
#
|
|
29
|
+
# @fileoverview Soprano TTS Provider Implementation - Free, local, neural-quality TTS
|
|
30
|
+
# @context Provides ultra-lightweight on-device neural TTS via Soprano (80M params)
|
|
31
|
+
# @architecture Implements provider interface contract with 3 synthesis modes (WebUI/API/CLI)
|
|
32
|
+
# @dependencies soprano-tts (pip), soprano-gradio-synth.py, ffmpeg (optional padding), audio players
|
|
33
|
+
# @entrypoints Called by play-tts.sh router when provider=soprano
|
|
34
|
+
# @patterns Provider contract: text/voice → audio file path, auto-mode detection, Gradio SSE protocol
|
|
35
|
+
# @related play-tts.sh, soprano-gradio-synth.py, provider-manager.sh, GitHub Issue #94
|
|
36
|
+
#
|
|
37
|
+
# Supports three modes (auto-detected in priority order):
|
|
38
|
+
# 1. WebUI mode: Gradio WebUI running (soprano-webui), uses Python helper
|
|
39
|
+
# 2. API mode: OpenAI-compatible server (uvicorn soprano.server:app), uses curl
|
|
40
|
+
# 3. CLI mode: Direct `soprano` command — reloads model each call (slowest)
|
|
41
|
+
#
|
|
42
|
+
# Environment variables:
|
|
43
|
+
# SOPRANO_PORT — WebUI/API port (default: 7860)
|
|
44
|
+
# SOPRANO_DEVICE — Device for CLI mode: auto|cuda|cpu|mps (default: auto)
|
|
45
|
+
#
|
|
46
|
+
|
|
47
|
+
# Fix locale warnings
|
|
48
|
+
export LC_ALL=C
|
|
49
|
+
|
|
50
|
+
TEXT="$1"
|
|
51
|
+
VOICE_OVERRIDE="$2" # Ignored — Soprano has a single voice, kept for provider contract
|
|
52
|
+
|
|
53
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
54
|
+
source "$SCRIPT_DIR/audio-cache-utils.sh"
|
|
55
|
+
|
|
56
|
+
SOPRANO_PORT="${SOPRANO_PORT:-7860}"
|
|
57
|
+
SOPRANO_DEVICE="${SOPRANO_DEVICE:-auto}"
|
|
58
|
+
|
|
59
|
+
# @function validate_inputs
|
|
60
|
+
# @intent Check required parameters
|
|
61
|
+
# @why Fail fast with clear errors if inputs missing
|
|
62
|
+
# @exitcode 1=missing text
|
|
63
|
+
if [[ -z "$TEXT" ]]; then
|
|
64
|
+
echo "Usage: $0 \"text to speak\" [voice_override]"
|
|
65
|
+
exit 1
|
|
66
|
+
fi
|
|
67
|
+
|
|
68
|
+
# @function check_webui_server
|
|
69
|
+
# @intent Detect if Soprano Gradio WebUI is reachable
|
|
70
|
+
# @why WebUI mode keeps model in memory for fastest repeated synthesis
|
|
71
|
+
# @returns exitcode 0=reachable, 1=not reachable
|
|
72
|
+
check_webui_server() {
|
|
73
|
+
curl -sf --max-time 2 "http://127.0.0.1:${SOPRANO_PORT}/gradio_api/info" -o /dev/null 2>/dev/null ||
|
|
74
|
+
curl -sf --max-time 2 "http://127.0.0.1:${SOPRANO_PORT}/info" -o /dev/null 2>/dev/null
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# @function check_api_server
|
|
78
|
+
# @intent Detect if Soprano OpenAI-compatible API server is reachable
|
|
79
|
+
# @why API mode is simpler than WebUI (direct WAV response, no SSE polling)
|
|
80
|
+
# @returns exitcode 0=reachable, 1=not reachable
|
|
81
|
+
check_api_server() {
|
|
82
|
+
curl -sf --max-time 2 "http://127.0.0.1:${SOPRANO_PORT}/v1/audio/speech" \
|
|
83
|
+
-H "Content-Type: application/json" \
|
|
84
|
+
-d '{"input":"test"}' -o /dev/null 2>/dev/null
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
# @function check_soprano_available
|
|
88
|
+
# @intent Verify at least one synthesis mode is available
|
|
89
|
+
# @why Provide helpful installation instructions if nothing works
|
|
90
|
+
# @exitcode 2=soprano not installed and no server running
|
|
91
|
+
if ! command -v soprano &>/dev/null && ! check_webui_server && ! check_api_server; then
|
|
92
|
+
echo "❌ Error: Soprano TTS not installed and no server running on port $SOPRANO_PORT"
|
|
93
|
+
echo ""
|
|
94
|
+
echo "Install: pip install soprano-tts"
|
|
95
|
+
echo " (GPU): pip install soprano-tts[lmdeploy]"
|
|
96
|
+
echo ""
|
|
97
|
+
echo "Start WebUI: soprano-webui"
|
|
98
|
+
echo "Start API: uvicorn soprano.server:app --host 127.0.0.1 --port $SOPRANO_PORT"
|
|
99
|
+
exit 2
|
|
100
|
+
fi
|
|
101
|
+
|
|
102
|
+
# @function determine_audio_directory
|
|
103
|
+
# @intent Find appropriate directory for audio file storage
|
|
104
|
+
# @why Supports project-local and global storage
|
|
105
|
+
# @returns Sets $AUDIO_DIR global variable
|
|
106
|
+
if [[ -n "$CLAUDE_PROJECT_DIR" ]]; then
|
|
107
|
+
AUDIO_DIR="$CLAUDE_PROJECT_DIR/.claude/audio"
|
|
108
|
+
else
|
|
109
|
+
CURRENT_DIR="$PWD"
|
|
110
|
+
while [[ "$CURRENT_DIR" != "/" ]]; do
|
|
111
|
+
if [[ -d "$CURRENT_DIR/.claude" ]]; then
|
|
112
|
+
AUDIO_DIR="$CURRENT_DIR/.claude/audio"
|
|
113
|
+
break
|
|
114
|
+
fi
|
|
115
|
+
CURRENT_DIR=$(dirname "$CURRENT_DIR")
|
|
116
|
+
done
|
|
117
|
+
if [[ -z "$AUDIO_DIR" ]]; then
|
|
118
|
+
AUDIO_DIR="$HOME/.claude/audio"
|
|
119
|
+
fi
|
|
120
|
+
fi
|
|
121
|
+
|
|
122
|
+
mkdir -p "$AUDIO_DIR"
|
|
123
|
+
TEMP_FILE="$AUDIO_DIR/tts-$(date +%s).wav"
|
|
124
|
+
|
|
125
|
+
# @function synthesize_speech
|
|
126
|
+
# @intent Generate speech using best available Soprano mode
|
|
127
|
+
# @why Auto-detect WebUI → API → CLI for optimal performance
|
|
128
|
+
# @param Uses globals: $TEXT, $SOPRANO_PORT, $SOPRANO_DEVICE
|
|
129
|
+
# @returns Creates WAV file at $TEMP_FILE, sets $SYNTH_MODE
|
|
130
|
+
# @exitcode 4=synthesis error
|
|
131
|
+
SYNTH_MODE=""
|
|
132
|
+
|
|
133
|
+
if check_webui_server; then
|
|
134
|
+
# Gradio WebUI mode — use Python helper for SSE protocol
|
|
135
|
+
SYNTH_MODE="webui"
|
|
136
|
+
python3 "$SCRIPT_DIR/soprano-gradio-synth.py" "$TEXT" "$TEMP_FILE" "$SOPRANO_PORT" 2>/dev/null
|
|
137
|
+
elif check_api_server; then
|
|
138
|
+
# OpenAI-compatible API mode — direct curl
|
|
139
|
+
SYNTH_MODE="api"
|
|
140
|
+
curl -sf "http://127.0.0.1:${SOPRANO_PORT}/v1/audio/speech" \
|
|
141
|
+
-H "Content-Type: application/json" \
|
|
142
|
+
-d "$(printf '{"input":"%s"}' "$(echo "$TEXT" | sed 's/"/\\"/g')")" \
|
|
143
|
+
--output "$TEMP_FILE" 2>/dev/null
|
|
144
|
+
else
|
|
145
|
+
# CLI fallback — reloads model each call (slowest)
|
|
146
|
+
SYNTH_MODE="cli"
|
|
147
|
+
soprano "$TEXT" -o "$TEMP_FILE" -d "$SOPRANO_DEVICE" 2>/dev/null
|
|
148
|
+
fi
|
|
149
|
+
|
|
150
|
+
if [[ ! -f "$TEMP_FILE" ]] || [[ ! -s "$TEMP_FILE" ]]; then
|
|
151
|
+
echo "❌ Failed to synthesize speech with Soprano ($SYNTH_MODE mode)"
|
|
152
|
+
[[ "$SYNTH_MODE" == "webui" ]] && echo " Try: python3 $SCRIPT_DIR/soprano-gradio-synth.py \"test\" /tmp/test.wav $SOPRANO_PORT"
|
|
153
|
+
exit 4
|
|
154
|
+
fi
|
|
155
|
+
|
|
156
|
+
# @function detect_remote_session
|
|
157
|
+
# @intent Auto-detect SSH/RDP sessions and enable audio compression
|
|
158
|
+
# @why Remote desktop audio is choppy without compression
|
|
159
|
+
# @returns Sets AGENTVIBES_RDP_MODE environment variable
|
|
160
|
+
if [[ -z "${AGENTVIBES_RDP_MODE:-}" ]]; then
|
|
161
|
+
if [[ -n "${SSH_CLIENT:-}" ]] || [[ -n "${SSH_TTY:-}" ]] || [[ "${DISPLAY:-}" =~ ^localhost:.* ]]; then
|
|
162
|
+
export AGENTVIBES_RDP_MODE=true
|
|
163
|
+
echo "🌐 Remote session detected - enabling audio compression"
|
|
164
|
+
fi
|
|
165
|
+
fi
|
|
166
|
+
|
|
167
|
+
# @function compress_for_remote
|
|
168
|
+
# @intent Compress TTS audio for remote sessions (SSH/RDP)
|
|
169
|
+
# @why Reduces bandwidth and prevents choppy playback
|
|
170
|
+
if [[ "${AGENTVIBES_RDP_MODE:-false}" == "true" ]] && command -v ffmpeg &>/dev/null; then
|
|
171
|
+
COMPRESSED_FILE="$AUDIO_DIR/tts-compressed-$(date +%s).wav"
|
|
172
|
+
ffmpeg -i "$TEMP_FILE" -ac 1 -ar 22050 -b:a 64k -y "$COMPRESSED_FILE" 2>/dev/null
|
|
173
|
+
if [[ -f "$COMPRESSED_FILE" ]]; then
|
|
174
|
+
rm -f "$TEMP_FILE"
|
|
175
|
+
TEMP_FILE="$COMPRESSED_FILE"
|
|
176
|
+
fi
|
|
177
|
+
fi
|
|
178
|
+
|
|
179
|
+
# @function add_silence_padding
|
|
180
|
+
# @intent Add silence to prevent WSL audio static
|
|
181
|
+
# @why WSL audio subsystem cuts off first ~200ms
|
|
182
|
+
if command -v ffmpeg &>/dev/null; then
|
|
183
|
+
PADDED_FILE="$AUDIO_DIR/tts-padded-$(date +%s).wav"
|
|
184
|
+
ffmpeg -f lavfi -i anullsrc=r=44100:cl=stereo:d=0.2 -i "$TEMP_FILE" \
|
|
185
|
+
-filter_complex "[0:a][1:a]concat=n=2:v=0:a=1[out]" \
|
|
186
|
+
-map "[out]" -y "$PADDED_FILE" 2>/dev/null
|
|
187
|
+
if [[ -f "$PADDED_FILE" ]]; then
|
|
188
|
+
rm -f "$TEMP_FILE"
|
|
189
|
+
TEMP_FILE="$PADDED_FILE"
|
|
190
|
+
fi
|
|
191
|
+
fi
|
|
192
|
+
|
|
193
|
+
# @function apply_audio_effects
|
|
194
|
+
# @intent Apply sox effects and background music via audio-processor.sh
|
|
195
|
+
# @param Uses global: $TEMP_FILE
|
|
196
|
+
# @returns Updates $TEMP_FILE to processed version
|
|
197
|
+
BACKGROUND_MUSIC=""
|
|
198
|
+
if [[ -f "$SCRIPT_DIR/audio-processor.sh" ]]; then
|
|
199
|
+
PROCESSED_FILE="$AUDIO_DIR/tts-processed-$(date +%s).wav"
|
|
200
|
+
PROCESSOR_OUTPUT=$("$SCRIPT_DIR/audio-processor.sh" "$TEMP_FILE" "default" "$PROCESSED_FILE" 2>/dev/null) || {
|
|
201
|
+
PROCESSED_FILE="$TEMP_FILE"
|
|
202
|
+
PROCESSOR_OUTPUT="$TEMP_FILE|"
|
|
203
|
+
}
|
|
204
|
+
PROCESSED_FILE="${PROCESSOR_OUTPUT%%|*}"
|
|
205
|
+
BACKGROUND_MUSIC="${PROCESSOR_OUTPUT##*|}"
|
|
206
|
+
if [[ -f "$PROCESSED_FILE" ]] && [[ "$PROCESSED_FILE" != "$TEMP_FILE" ]]; then
|
|
207
|
+
rm -f "$TEMP_FILE"
|
|
208
|
+
TEMP_FILE="$PROCESSED_FILE"
|
|
209
|
+
fi
|
|
210
|
+
fi
|
|
211
|
+
|
|
212
|
+
# @function play_audio
|
|
213
|
+
# @intent Play generated audio using available player with sequential playback
|
|
214
|
+
# @why Support multiple audio players and prevent overlapping audio
|
|
215
|
+
LOCK_FILE="/tmp/agentvibes-audio.lock"
|
|
216
|
+
|
|
217
|
+
for i in {1..4}; do
|
|
218
|
+
if [ ! -f "$LOCK_FILE" ]; then
|
|
219
|
+
break
|
|
220
|
+
fi
|
|
221
|
+
sleep 0.5
|
|
222
|
+
done
|
|
223
|
+
|
|
224
|
+
if [ -f "$LOCK_FILE" ]; then
|
|
225
|
+
echo "⏭️ Skipping TTS (previous audio still playing)" >&2
|
|
226
|
+
exit 0
|
|
227
|
+
fi
|
|
228
|
+
|
|
229
|
+
touch "$LOCK_FILE"
|
|
230
|
+
|
|
231
|
+
AUDIO_DIR_PLAY="${TEMP_FILE%/*}"
|
|
232
|
+
WRITE_LOCK_FILE="$AUDIO_DIR_PLAY/$(basename "$TEMP_FILE" .wav).lock"
|
|
233
|
+
touch "$WRITE_LOCK_FILE"
|
|
234
|
+
|
|
235
|
+
DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null)
|
|
236
|
+
DURATION=${DURATION%.*}
|
|
237
|
+
DURATION=${DURATION:-1}
|
|
238
|
+
|
|
239
|
+
if [[ "${AGENTVIBES_TEST_MODE:-false}" != "true" ]] && [[ "${AGENTVIBES_NO_PLAYBACK:-false}" != "true" ]]; then
|
|
240
|
+
if [[ "$(uname -s)" == "Darwin" ]]; then
|
|
241
|
+
afplay "$TEMP_FILE" >/dev/null 2>&1 &
|
|
242
|
+
PLAYER_PID=$!
|
|
243
|
+
elif [[ -n "${TERMUX_VERSION:-}" ]] || [[ -d "/data/data/com.termux" ]]; then
|
|
244
|
+
termux-media-player play "$TEMP_FILE" >/dev/null 2>&1 &
|
|
245
|
+
PLAYER_PID=$!
|
|
246
|
+
else
|
|
247
|
+
(paplay "$TEMP_FILE" || mpv "$TEMP_FILE" || aplay "$TEMP_FILE") >/dev/null 2>&1 &
|
|
248
|
+
PLAYER_PID=$!
|
|
249
|
+
fi
|
|
250
|
+
fi
|
|
251
|
+
|
|
252
|
+
(sleep $DURATION; rm -f "$LOCK_FILE" "$WRITE_LOCK_FILE") &
|
|
253
|
+
disown
|
|
254
|
+
|
|
255
|
+
# @function display_cache_stats
|
|
256
|
+
# @intent Show audio cache statistics with color-coded output
|
|
257
|
+
AUDIO_DIR_PATH=$(get_audio_dir)
|
|
258
|
+
|
|
259
|
+
BLUE='\033[0;34m'
|
|
260
|
+
YELLOW='\033[1;33m'
|
|
261
|
+
PURPLE='\033[0;35m'
|
|
262
|
+
RED='\033[0;31m'
|
|
263
|
+
GREEN='\033[0;32m'
|
|
264
|
+
ORANGE='\033[0;33m'
|
|
265
|
+
WHITE='\033[1;37m'
|
|
266
|
+
CYAN='\033[0;36m'
|
|
267
|
+
GOLD='\033[38;5;226m'
|
|
268
|
+
NC='\033[0m'
|
|
269
|
+
|
|
270
|
+
AUTO_CLEAN_THRESHOLD=$(get_auto_clean_threshold)
|
|
271
|
+
INITIAL_SIZE=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
|
|
272
|
+
if [[ $INITIAL_SIZE -gt $((AUTO_CLEAN_THRESHOLD * 1048576)) ]]; then
|
|
273
|
+
DELETED=$(auto_clean_old_files "$AUDIO_DIR_PATH" "$AUTO_CLEAN_THRESHOLD")
|
|
274
|
+
if [[ $DELETED -gt 0 ]]; then
|
|
275
|
+
echo -e "${ORANGE}🧹 Auto-cleaned $DELETED old files${NC}"
|
|
276
|
+
fi
|
|
277
|
+
fi
|
|
278
|
+
|
|
279
|
+
FILE_COUNT=$(count_tts_files "$AUDIO_DIR_PATH")
|
|
280
|
+
SIZE_BYTES=$(calculate_tts_size_bytes "$AUDIO_DIR_PATH")
|
|
281
|
+
SIZE_HUMAN=$(bytes_to_human "$SIZE_BYTES")
|
|
282
|
+
|
|
283
|
+
CACHE_COLOR=$GREEN
|
|
284
|
+
if [[ $SIZE_BYTES -gt 3221225472 ]]; then
|
|
285
|
+
CACHE_COLOR=$RED
|
|
286
|
+
elif [[ $SIZE_BYTES -gt 524288000 ]]; then
|
|
287
|
+
CACHE_COLOR=$YELLOW
|
|
288
|
+
fi
|
|
289
|
+
|
|
290
|
+
echo -e "${WHITE}💾 Saved to:${NC} ${CYAN}$TEMP_FILE${NC} ${YELLOW}$FILE_COUNT${NC} ${WHITE}🗄️${NC} ${CACHE_COLOR}$SIZE_HUMAN${NC} ${WHITE}🧹${NC}${GOLD}[${AUTO_CLEAN_THRESHOLD}mb]${NC}"
|
|
291
|
+
|
|
292
|
+
if [[ -n "$BACKGROUND_MUSIC" ]]; then
|
|
293
|
+
MUSIC_FILENAME=$(basename "$BACKGROUND_MUSIC")
|
|
294
|
+
echo -e "${WHITE}🎵 Background music:${NC} ${PURPLE}$MUSIC_FILENAME${NC}"
|
|
295
|
+
fi
|
|
296
|
+
echo -e "${WHITE}🎤 Voice:${NC} ${BLUE}Soprano-1.1-80M${NC} ${WHITE}(Soprano TTS, ${SYNTH_MODE} mode)${NC}"
|
|
297
|
+
|
|
298
|
+
# Show personality if configured
|
|
299
|
+
PROJECT_ROOT="${PROJECT_ROOT:-$(cd "$SCRIPT_DIR/../.." && pwd)}"
|
|
300
|
+
PERSONALITY=$(cat "$PROJECT_ROOT/.claude/tts-personality.txt" 2>/dev/null || cat "$HOME/.claude/tts-personality.txt" 2>/dev/null || echo "")
|
|
301
|
+
if [[ -n "$PERSONALITY" ]] && [[ "$PERSONALITY" != "none" ]] && [[ "$PERSONALITY" != "normal" ]]; then
|
|
302
|
+
echo -e "${WHITE}💫 Personality:${NC} ${YELLOW}$PERSONALITY${NC}"
|
|
303
|
+
fi
|
|
304
|
+
|
|
305
|
+
if [[ -d "$AUDIO_DIR_PATH" ]]; then
|
|
306
|
+
AUDIO_SIZE=$(du -sm "$AUDIO_DIR_PATH" 2>/dev/null | cut -f1)
|
|
307
|
+
if [[ -n "$AUDIO_SIZE" ]] && [[ "$AUDIO_SIZE" -gt 100 ]]; then
|
|
308
|
+
echo -e "\033[0;31m⚠️ Audio cache is ${AUDIO_SIZE}MB - Run: /agent-vibes:cleanup\033[0m"
|
|
309
|
+
fi
|
|
310
|
+
fi
|
|
311
|
+
|
|
312
|
+
# Background music status
|
|
313
|
+
if [[ -z "$BACKGROUND_MUSIC" ]]; then
|
|
314
|
+
BACKGROUND_ENABLED_FILE="$PROJECT_ROOT/.claude/config/background-music-enabled.txt"
|
|
315
|
+
if [[ -f "$BACKGROUND_ENABLED_FILE" ]] && grep -q "true" "$BACKGROUND_ENABLED_FILE" 2>/dev/null; then
|
|
316
|
+
echo -e "${WHITE}🎵 Background music:${NC} ${PURPLE}Enabled but not playing (check config)${NC}"
|
|
317
|
+
else
|
|
318
|
+
echo -e "${WHITE}🎵 Background music:${NC} ${PURPLE}Disabled${NC}"
|
|
319
|
+
fi
|
|
320
|
+
fi
|
|
@@ -146,6 +146,9 @@ speak_text() {
|
|
|
146
146
|
piper)
|
|
147
147
|
"$SCRIPT_DIR/play-tts-piper.sh" "$text" "$voice"
|
|
148
148
|
;;
|
|
149
|
+
soprano)
|
|
150
|
+
"$SCRIPT_DIR/play-tts-soprano.sh" "$text" "$voice"
|
|
151
|
+
;;
|
|
149
152
|
macos)
|
|
150
153
|
"$SCRIPT_DIR/play-tts-macos.sh" "$text" "$voice"
|
|
151
154
|
;;
|
|
@@ -265,6 +268,9 @@ case "$ACTIVE_PROVIDER" in
|
|
|
265
268
|
piper)
|
|
266
269
|
exec "$SCRIPT_DIR/play-tts-piper.sh" "$TEXT" "$VOICE_OVERRIDE"
|
|
267
270
|
;;
|
|
271
|
+
soprano)
|
|
272
|
+
exec "$SCRIPT_DIR/play-tts-soprano.sh" "$TEXT" "$VOICE_OVERRIDE"
|
|
273
|
+
;;
|
|
268
274
|
macos)
|
|
269
275
|
exec "$SCRIPT_DIR/play-tts-macos.sh" "$TEXT" "$VOICE_OVERRIDE"
|
|
270
276
|
;;
|
|
@@ -190,6 +190,13 @@ migrate_voice_to_provider() {
|
|
|
190
190
|
# Default voices by provider
|
|
191
191
|
local piper_default="en_US-lessac-medium"
|
|
192
192
|
local macos_default="Samantha"
|
|
193
|
+
local soprano_default="soprano-default" # Single voice — no selection needed
|
|
194
|
+
|
|
195
|
+
# Soprano has a single voice, so migration is straightforward
|
|
196
|
+
if [[ "$target_provider" == "soprano" ]]; then
|
|
197
|
+
echo "$soprano_default"
|
|
198
|
+
return 0
|
|
199
|
+
fi
|
|
193
200
|
|
|
194
201
|
# If no current voice, return default for target provider
|
|
195
202
|
if [[ -z "$current_voice" ]]; then
|
|
@@ -201,6 +208,16 @@ migrate_voice_to_provider() {
|
|
|
201
208
|
return 0
|
|
202
209
|
fi
|
|
203
210
|
|
|
211
|
+
# If migrating FROM Soprano, return default for target provider
|
|
212
|
+
if [[ "$current_voice" == "soprano-default" ]]; then
|
|
213
|
+
case "$target_provider" in
|
|
214
|
+
piper) echo "$piper_default" ;;
|
|
215
|
+
macos) echo "$macos_default" ;;
|
|
216
|
+
*) echo "$piper_default" ;;
|
|
217
|
+
esac
|
|
218
|
+
return 0
|
|
219
|
+
fi
|
|
220
|
+
|
|
204
221
|
# Convert to lowercase for case-insensitive comparison (portable)
|
|
205
222
|
local current_voice_lower
|
|
206
223
|
current_voice_lower=$(echo "$current_voice" | tr '[:upper:]' '[:lower:]')
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
# File: .claude/hooks/soprano-gradio-synth.py
|
|
4
|
+
#
|
|
5
|
+
# AgentVibes - Finally, your AI Agents can Talk Back!
|
|
6
|
+
# Website: https://agentvibes.org
|
|
7
|
+
# Repository: https://github.com/paulpreibisch/AgentVibes
|
|
8
|
+
#
|
|
9
|
+
# Co-created by Paul Preibisch with Claude AI
|
|
10
|
+
# Copyright (c) 2025 Paul Preibisch
|
|
11
|
+
#
|
|
12
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
13
|
+
# you may not use this file except in compliance with the License.
|
|
14
|
+
# You may obtain a copy of the License at
|
|
15
|
+
#
|
|
16
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
17
|
+
#
|
|
18
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
# See the License for the specific language governing permissions and
|
|
22
|
+
# limitations under the License.
|
|
23
|
+
#
|
|
24
|
+
# ---
|
|
25
|
+
#
|
|
26
|
+
# Soprano Gradio WebUI synthesizer helper.
|
|
27
|
+
# Calls the Soprano WebUI's Gradio API and saves the result as a WAV file.
|
|
28
|
+
# Uses only Python stdlib (json, sys, urllib) — no extra dependencies.
|
|
29
|
+
#
|
|
30
|
+
# Usage: python3 soprano-gradio-synth.py "text to speak" output.wav [port]
|
|
31
|
+
#
|
|
32
|
+
"""
|
|
33
|
+
Soprano Gradio WebUI synthesizer helper for AgentVibes.
|
|
34
|
+
|
|
35
|
+
Calls the Soprano WebUI's Gradio API via the Server-Sent Events (SSE) protocol
|
|
36
|
+
and downloads the generated audio as a WAV file.
|
|
37
|
+
|
|
38
|
+
Flow:
|
|
39
|
+
1. Submit generation request → get event_id
|
|
40
|
+
2. Poll SSE stream for audio file URL
|
|
41
|
+
3. Download WAV file to output path
|
|
42
|
+
|
|
43
|
+
See: https://github.com/ekwek1/soprano
|
|
44
|
+
"""
|
|
45
|
+
import json
|
|
46
|
+
import sys
|
|
47
|
+
import urllib.request
|
|
48
|
+
import urllib.error
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def synth(text: str, output_path: str, port: int = 7860) -> None:
|
|
52
|
+
base = f"http://127.0.0.1:{port}"
|
|
53
|
+
|
|
54
|
+
# Step 1: Submit generation request
|
|
55
|
+
# Args: text, temperature, top_p, repetition_penalty, chunk_size, streaming
|
|
56
|
+
payload = json.dumps({
|
|
57
|
+
"data": [text, 0.0, 0.95, 1.2, 1, False]
|
|
58
|
+
}).encode()
|
|
59
|
+
|
|
60
|
+
event_id = submit_request(base, payload)
|
|
61
|
+
|
|
62
|
+
# Step 2: Poll SSE stream for audio file URL
|
|
63
|
+
audio_url = poll_for_result(base, event_id)
|
|
64
|
+
|
|
65
|
+
# Step 3: Download the audio file
|
|
66
|
+
download_file(audio_url, output_path)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def submit_request(base: str, payload: bytes) -> str:
|
|
70
|
+
"""Submit generation request to Gradio API, return event_id."""
|
|
71
|
+
for api_base in ["/gradio_api/call", "/call"]:
|
|
72
|
+
url = f"{base}{api_base}/generate_speech"
|
|
73
|
+
req = urllib.request.Request(
|
|
74
|
+
url,
|
|
75
|
+
data=payload,
|
|
76
|
+
headers={"Content-Type": "application/json"},
|
|
77
|
+
)
|
|
78
|
+
try:
|
|
79
|
+
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
80
|
+
return json.loads(resp.read())["event_id"]
|
|
81
|
+
except urllib.error.URLError:
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
raise RuntimeError("Could not reach Soprano WebUI API")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def poll_for_result(base: str, event_id: str) -> str:
|
|
88
|
+
"""Poll SSE endpoint until audio file URL is returned."""
|
|
89
|
+
for api_base in ["/gradio_api/call", "/call"]:
|
|
90
|
+
url = f"{base}{api_base}/generate_speech/{event_id}"
|
|
91
|
+
req = urllib.request.Request(url)
|
|
92
|
+
try:
|
|
93
|
+
with urllib.request.urlopen(req, timeout=120) as resp:
|
|
94
|
+
for raw_line in resp:
|
|
95
|
+
line = raw_line.decode("utf-8", errors="replace").strip()
|
|
96
|
+
if not line.startswith("data: "):
|
|
97
|
+
continue
|
|
98
|
+
try:
|
|
99
|
+
parsed = json.loads(line[6:])
|
|
100
|
+
except json.JSONDecodeError:
|
|
101
|
+
continue
|
|
102
|
+
# Response format: [{"path": "...", "url": "...", ...}, "status string"]
|
|
103
|
+
if isinstance(parsed, list) and len(parsed) >= 1:
|
|
104
|
+
audio = parsed[0]
|
|
105
|
+
if isinstance(audio, dict) and "url" in audio:
|
|
106
|
+
return audio["url"]
|
|
107
|
+
break
|
|
108
|
+
except urllib.error.URLError:
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
raise RuntimeError("No audio URL in Soprano response")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def download_file(url: str, output_path: str) -> None:
|
|
115
|
+
"""Download audio file from Gradio file server."""
|
|
116
|
+
req = urllib.request.Request(url)
|
|
117
|
+
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
118
|
+
with open(output_path, "wb") as f:
|
|
119
|
+
while True:
|
|
120
|
+
chunk = resp.read(8192)
|
|
121
|
+
if not chunk:
|
|
122
|
+
break
|
|
123
|
+
f.write(chunk)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
if __name__ == "__main__":
|
|
127
|
+
if len(sys.argv) < 3:
|
|
128
|
+
print(f"Usage: {sys.argv[0]} \"text\" output.wav [port]", file=sys.stderr)
|
|
129
|
+
sys.exit(1)
|
|
130
|
+
|
|
131
|
+
text = sys.argv[1]
|
|
132
|
+
output = sys.argv[2]
|
|
133
|
+
port = int(sys.argv[3]) if len(sys.argv) > 3 else 7860
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
synth(text, output, port)
|
|
137
|
+
except Exception as e:
|
|
138
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
139
|
+
sys.exit(1)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
/home/
|
|
1
|
+
/home/administrator/.claude/piper-voices
|
package/.mcp.json
CHANGED
|
@@ -1,34 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"mcpServers": {
|
|
3
|
-
"
|
|
4
|
-
"command": "
|
|
3
|
+
"agentvibes": {
|
|
4
|
+
"command": "npx",
|
|
5
5
|
"args": [
|
|
6
|
-
"
|
|
7
|
-
"
|
|
8
|
-
"
|
|
9
|
-
|
|
10
|
-
"--rm",
|
|
11
|
-
"--dns",
|
|
12
|
-
"8.8.8.8",
|
|
13
|
-
"--dns",
|
|
14
|
-
"8.8.4.4",
|
|
15
|
-
"-e",
|
|
16
|
-
"SONARQUBE_TOKEN",
|
|
17
|
-
"-e",
|
|
18
|
-
"SONARQUBE_ORG",
|
|
19
|
-
"-e",
|
|
20
|
-
"STORAGE_PATH",
|
|
21
|
-
"mcp/sonarqube"
|
|
22
|
-
],
|
|
23
|
-
"env": {
|
|
24
|
-
"SONARQUBE_TOKEN": "${SONARQUBE_TOKEN}",
|
|
25
|
-
"SONARQUBE_ORG": "${SONARQUBE_ORG}",
|
|
26
|
-
"STORAGE_PATH": "${STORAGE_PATH}"
|
|
27
|
-
}
|
|
28
|
-
},
|
|
29
|
-
"pieces": {
|
|
30
|
-
"type": "sse",
|
|
31
|
-
"url": "http://localhost:39300/model_context_protocol/2024-11-05/sse"
|
|
6
|
+
"-y",
|
|
7
|
+
"--package=agentvibes",
|
|
8
|
+
"agentvibes-mcp-server"
|
|
9
|
+
]
|
|
32
10
|
}
|
|
33
11
|
}
|
|
34
12
|
}
|