agentvibes 5.7.6 → 5.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agentvibes/config.json +12 -5
- package/.agentvibes/install-manifest.json +188 -300
- package/.claude/audio/tracks/celestial_velvet.mp3 +0 -0
- package/.claude/commands/agent-vibes-bmad-voices.md +117 -117
- package/.claude/commands/agent-vibes-rdp.md +24 -24
- package/.claude/config/audio-effects.cfg +3 -2
- package/.claude/config/audio-effects.cfg.sample +52 -52
- package/.claude/config/background-music-enabled.txt +1 -0
- package/.claude/config/background-music-position.txt +1 -1
- package/.claude/config/language.txt +1 -0
- package/.claude/docs/TERMUX_SETUP.md +408 -408
- package/.claude/hooks/audio-cache-utils.sh +0 -0
- package/.claude/hooks/audio-processor.sh +0 -0
- package/.claude/hooks/background-music-manager.sh +0 -0
- package/.claude/hooks/bmad-party-speak.sh +27 -6
- package/.claude/hooks/bmad-speak-enhanced.sh +0 -0
- package/.claude/hooks/bmad-speak.sh +0 -0
- package/.claude/hooks/bmad-tts-injector.sh +0 -0
- package/.claude/hooks/bmad-voice-manager.sh +0 -0
- package/.claude/hooks/clawdbot-receiver-SECURE.sh +0 -0
- package/.claude/hooks/clawdbot-receiver.sh +0 -0
- package/.claude/hooks/clean-audio-cache.sh +0 -0
- package/.claude/hooks/cleanup-cache.sh +0 -0
- package/.claude/hooks/configure-rdp-mode.sh +0 -0
- package/.claude/hooks/download-extra-voices.sh +0 -0
- package/.claude/hooks/effects-manager.sh +0 -0
- package/.claude/hooks/github-star-reminder.sh +0 -0
- package/.claude/hooks/language-manager.sh +0 -0
- package/.claude/hooks/learn-manager.sh +0 -0
- package/.claude/hooks/macos-voice-manager.sh +0 -0
- package/.claude/hooks/migrate-background-music.sh +0 -0
- package/.claude/hooks/migrate-to-agentvibes.sh +0 -0
- package/.claude/hooks/optimize-background-music.sh +0 -0
- package/.claude/hooks/path-resolver.sh +0 -0
- package/.claude/hooks/personality-manager.sh +0 -0
- package/.claude/hooks/piper-download-voices.sh +0 -0
- package/.claude/hooks/piper-installer.sh +0 -0
- package/.claude/hooks/piper-multispeaker-registry.sh +0 -0
- package/.claude/hooks/piper-voice-manager.sh +0 -0
- package/.claude/hooks/play-tts-agentvibes-receiver-for-voiceless-connections.sh +0 -0
- package/.claude/hooks/play-tts-agentvibes-receiver.sh +1 -0
- package/.claude/hooks/play-tts-enhanced.sh +0 -0
- package/.claude/hooks/play-tts-macos.sh +0 -0
- package/.claude/hooks/play-tts-piper.sh +0 -0
- package/.claude/hooks/play-tts-soprano.sh +0 -0
- package/.claude/hooks/play-tts-ssh-remote.sh +11 -8
- package/.claude/hooks/play-tts-termux-ssh.sh +0 -0
- package/.claude/hooks/play-tts-windows-receiver.sh +0 -0
- package/.claude/hooks/play-tts.sh +0 -0
- package/.claude/hooks/prepare-release.sh +0 -0
- package/.claude/hooks/provider-commands.sh +0 -0
- package/.claude/hooks/provider-manager.sh +0 -0
- package/.claude/hooks/replay-target-audio.sh +0 -0
- package/.claude/hooks/requirements.txt +6 -6
- package/.claude/hooks/sentiment-manager.sh +0 -0
- package/.claude/hooks/session-start-tts.sh +0 -0
- package/.claude/hooks/soprano-gradio-synth.py +139 -139
- package/.claude/hooks/speed-manager.sh +0 -0
- package/.claude/hooks/stop-tts.sh +0 -0
- package/.claude/hooks/termux-installer.sh +0 -0
- package/.claude/hooks/translate-manager.sh +0 -0
- package/.claude/hooks/translator.py +237 -237
- package/.claude/hooks/tts-queue-worker.sh +0 -0
- package/.claude/hooks/tts-queue.sh +0 -0
- package/.claude/hooks/verbosity-manager.sh +0 -0
- package/.claude/hooks/voice-manager.sh +0 -0
- package/.claude/hooks-windows/audio-cache-utils.ps1 +119 -119
- package/.claude/hooks-windows/audio-cache-utils.ps1.user.bak +119 -0
- package/.claude/hooks-windows/bmad-speak.ps1 +9 -38
- package/.claude/hooks-windows/play-tts-soprano.ps1 +13 -2
- package/.claude/hooks-windows/soprano-gradio-synth.py +153 -153
- package/.claude/hooks-windows/soprano-gradio-synth.py.user.bak +153 -0
- package/.claude/piper-voices-dir.txt +1 -1
- package/.claude/verbosity.txt +1 -1
- package/.clawdbot/README.md +105 -105
- package/.mcp.json +5 -14
- package/README.md +43 -2
- package/RELEASE_NOTES.md +110 -0
- package/WINDOWS-SETUP.md +208 -208
- package/bin/agent-vibes +39 -39
- package/bin/agentvibes-voice-browser.js +0 -0
- package/bin/agentvibes.js +0 -0
- package/bin/mcp-server.js +121 -121
- package/bin/mcp-server.sh +0 -0
- package/bin/test-bmad-pr +78 -78
- package/mcp-server/QUICK_START.md +203 -203
- package/mcp-server/README.md +345 -345
- package/mcp-server/WINDOWS_SETUP.md +0 -0
- package/mcp-server/examples/claude_desktop_config.json +11 -11
- package/mcp-server/examples/claude_desktop_config_piper.json +9 -9
- package/mcp-server/examples/custom_instructions.md +169 -169
- package/mcp-server/install-deps.js +0 -0
- package/mcp-server/server.py +1797 -1797
- package/mcp-server/test_server.py +0 -0
- package/package.json +1 -1
- package/src/cli/list-personalities.js +110 -110
- package/src/cli/list-voices.js +114 -114
- package/src/commands/bmad-voices.js +394 -394
- package/src/commands/install-mcp.js +476 -476
- package/src/console/audio-env.js +4 -1
- package/src/console/brand-colors.js +13 -13
- package/src/console/constants/personalities.js +44 -44
- package/src/console/tabs/agents-tab.js +85 -62
- package/src/console/tabs/help-tab.js +314 -314
- package/src/console/tabs/music-tab.js +3 -0
- package/src/console/tabs/readme-tab.js +272 -272
- package/src/console/tabs/setup-tab.js +285 -41
- package/src/console/tabs/voices-tab.js +14 -2
- package/src/console/widgets/destroy-list.js +25 -25
- package/src/console/widgets/notice.js +55 -55
- package/src/i18n/de.js +202 -202
- package/src/i18n/es.js +202 -202
- package/src/i18n/fr.js +202 -202
- package/src/i18n/hi.js +202 -202
- package/src/i18n/ja.js +202 -202
- package/src/i18n/ko.js +202 -202
- package/src/i18n/pt.js +202 -202
- package/src/i18n/strings.js +54 -54
- package/src/i18n/zh-CN.js +202 -202
- package/src/installer/language-screen.js +31 -31
- package/src/installer/music-file-input.js +304 -304
- package/src/installer.js +0 -0
- package/src/services/config-service.js +264 -264
- package/src/services/language-service.js +47 -47
- package/src/services/provider-service.js +143 -143
- package/src/utils/audio-duration-validator.js +298 -298
- package/src/utils/audio-format-validator.js +277 -277
- package/src/utils/dependency-checker.js +469 -469
- package/src/utils/file-ownership-verifier.js +358 -358
- package/src/utils/list-formatter.js +194 -194
- package/src/utils/music-file-validator.js +285 -285
- package/src/utils/preview-list-prompt.js +136 -136
- package/src/utils/secure-music-storage.js +412 -412
- package/templates/agentvibes-receiver.sh +231 -231
- package/templates/audio/welcome-music.mp3 +0 -0
- package/.claude/hooks/bmad-party-manager.sh +0 -225
- package/.claude/hooks/stop.sh +0 -38
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -19,10 +19,17 @@ set -euo pipefail
|
|
|
19
19
|
|
|
20
20
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
21
21
|
LOCK_FILE="/tmp/agentvibes-party-queue.lock"
|
|
22
|
+
DEBUG_LOG="/tmp/agentvibes-party-debug.log"
|
|
23
|
+
|
|
24
|
+
_dbg() { printf '[%s] %s\n' "$(date -Iseconds)" "$*" >> "$DEBUG_LOG" 2>/dev/null || true; }
|
|
22
25
|
|
|
23
26
|
# --- Read stdin ---
|
|
24
27
|
raw="$(cat)"
|
|
25
|
-
[[ -z "$raw" ]]
|
|
28
|
+
if [[ -z "$raw" ]]; then
|
|
29
|
+
_dbg "exit: empty stdin"
|
|
30
|
+
exit 0
|
|
31
|
+
fi
|
|
32
|
+
_dbg "fired (stdin ${#raw} bytes)"
|
|
26
33
|
|
|
27
34
|
# --- Parse all needed fields in one python3 call (fixes M5: 3x subprocess, echo safety) ---
|
|
28
35
|
# Outputs: TOOL_NAME|DISPLAY_NAME|RESPONSE_TEXT (newlines in response encoded as \n literals)
|
|
@@ -73,13 +80,26 @@ response_text="${rest#*|}"
|
|
|
73
80
|
response_text="${response_text//\\n/ }"
|
|
74
81
|
|
|
75
82
|
# --- Only handle Agent tool ---
|
|
76
|
-
[[ "$tool_name" != "Agent" ]]
|
|
83
|
+
if [[ "$tool_name" != "Agent" ]]; then
|
|
84
|
+
_dbg "skip: tool_name='$tool_name' (not Agent)"
|
|
85
|
+
exit 0
|
|
86
|
+
fi
|
|
77
87
|
|
|
78
88
|
# --- Fingerprint: only fire for BMAD party mode agents (safe string match, no pipe) ---
|
|
79
|
-
[[ "$raw"
|
|
89
|
+
if [[ "$raw" != *"BMAD agent in a collaborative roundtable"* ]]; then
|
|
90
|
+
_dbg "skip: fingerprint MISS (Agent call but prompt lacks 'BMAD agent in a collaborative roundtable')"
|
|
91
|
+
exit 0
|
|
92
|
+
fi
|
|
93
|
+
_dbg "fingerprint HIT: display='$display_name' text_len=${#response_text}"
|
|
80
94
|
|
|
81
|
-
[[ -z "$display_name" ]]
|
|
82
|
-
|
|
95
|
+
if [[ -z "$display_name" ]]; then
|
|
96
|
+
_dbg "skip: empty display_name"
|
|
97
|
+
exit 0
|
|
98
|
+
fi
|
|
99
|
+
if [[ -z "$response_text" ]]; then
|
|
100
|
+
_dbg "skip: empty response_text"
|
|
101
|
+
exit 0
|
|
102
|
+
fi
|
|
83
103
|
|
|
84
104
|
# --- Resolve project root ---
|
|
85
105
|
project_root="${CLAUDE_PROJECT_DIR:-}"
|
|
@@ -154,7 +174,8 @@ esac
|
|
|
154
174
|
exec 9>"$LOCK_FILE"
|
|
155
175
|
if command -v flock &>/dev/null; then
|
|
156
176
|
flock -w 60 9
|
|
157
|
-
"$bmad_speak
|
|
177
|
+
_dbg "invoking: $bmad_speak '$agent_id' (text_len=${#response_text})"
|
|
178
|
+
"$bmad_speak" "$agent_id" "$response_text" || _dbg "bmad-speak returned non-zero"
|
|
158
179
|
flock -u 9
|
|
159
180
|
else
|
|
160
181
|
# macOS fallback: atomic mkdir polling lock
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
play-tts-agentvibes-receiver-for-voiceless-connections.sh
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -60,7 +60,7 @@ SSH_PORT=""
|
|
|
60
60
|
if [[ -n "${AGENTVIBES_SSH_HOST:-}" ]]; then
|
|
61
61
|
SSH_HOST="$AGENTVIBES_SSH_HOST"
|
|
62
62
|
SSH_KEY="${AGENTVIBES_SSH_KEY:-}"
|
|
63
|
-
SSH_PORT="${AGENTVIBES_SSH_PORT:-
|
|
63
|
+
SSH_PORT="${AGENTVIBES_SSH_PORT:-}"
|
|
64
64
|
fi
|
|
65
65
|
|
|
66
66
|
# Priority 2: ~/.agentvibes/transport-config.json (ssh-remote section)
|
|
@@ -336,15 +336,18 @@ SSH_ARGS=()
|
|
|
336
336
|
[[ -n "$SSH_KEY" && -f "$SSH_KEY" ]] && SSH_ARGS+=(-i "$SSH_KEY")
|
|
337
337
|
[[ -n "$SSH_PORT" ]] && SSH_ARGS+=(-p "$SSH_PORT")
|
|
338
338
|
|
|
339
|
-
# ForceCommand receiver: SSH_ORIGINAL_COMMAND passes the payload directly
|
|
340
|
-
ssh
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
339
|
+
# ForceCommand receiver: SSH_ORIGINAL_COMMAND passes the payload directly.
|
|
340
|
+
# Run ssh inside the backgrounded subshell so its exit code is reachable via $?
|
|
341
|
+
# (a `wait` from outside the spawning shell would error: "pid X is not a child").
|
|
342
|
+
(
|
|
343
|
+
ssh -o ConnectTimeout=10 "${SSH_ARGS[@]}" "$SSH_HOST" "$ENCODED_PAYLOAD"
|
|
344
|
+
_exit=$?
|
|
345
|
+
if [[ $_exit -ne 0 ]]; then
|
|
345
346
|
echo "$(date -Iseconds) [ERROR] SSH to $SSH_HOST failed (exit $_exit)" \
|
|
346
347
|
>> "$HOME/.agentvibes/ssh-remote.log" 2>/dev/null || true
|
|
347
|
-
fi
|
|
348
|
+
fi
|
|
349
|
+
) &
|
|
350
|
+
SSH_PID=$!
|
|
348
351
|
|
|
349
352
|
echo "Sent to $SSH_HOST (PID: $SSH_PID)" >&2
|
|
350
353
|
exit 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
# AgentVibes TTS Hooks Requirements
|
|
2
|
-
# Install with: pip install -r requirements.txt
|
|
3
|
-
|
|
4
|
-
# Translation support for multi-language TTS and learning mode
|
|
5
|
-
deep-translator>=1.11.4
|
|
6
|
-
langdetect>=1.0.9
|
|
1
|
+
# AgentVibes TTS Hooks Requirements
|
|
2
|
+
# Install with: pip install -r requirements.txt
|
|
3
|
+
|
|
4
|
+
# Translation support for multi-language TTS and learning mode
|
|
5
|
+
deep-translator>=1.11.4
|
|
6
|
+
langdetect>=1.0.9
|
|
File without changes
|
|
File without changes
|
|
@@ -1,139 +1,139 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
#
|
|
3
|
-
# File: .claude/hooks/soprano-gradio-synth.py
|
|
4
|
-
#
|
|
5
|
-
# AgentVibes - Finally, your AI Agents can Talk Back!
|
|
6
|
-
# Website: https://agentvibes.org
|
|
7
|
-
# Repository: https://github.com/paulpreibisch/AgentVibes
|
|
8
|
-
#
|
|
9
|
-
# Co-created by Paul Preibisch with Claude AI
|
|
10
|
-
# Copyright (c) 2025 Paul Preibisch
|
|
11
|
-
#
|
|
12
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
13
|
-
# you may not use this file except in compliance with the License.
|
|
14
|
-
# You may obtain a copy of the License at
|
|
15
|
-
#
|
|
16
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
17
|
-
#
|
|
18
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
19
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
-
# See the License for the specific language governing permissions and
|
|
22
|
-
# limitations under the License.
|
|
23
|
-
#
|
|
24
|
-
# ---
|
|
25
|
-
#
|
|
26
|
-
# Soprano Gradio WebUI synthesizer helper.
|
|
27
|
-
# Calls the Soprano WebUI's Gradio API and saves the result as a WAV file.
|
|
28
|
-
# Uses only Python stdlib (json, sys, urllib) — no extra dependencies.
|
|
29
|
-
#
|
|
30
|
-
# Usage: python3 soprano-gradio-synth.py "text to speak" output.wav [port]
|
|
31
|
-
#
|
|
32
|
-
"""
|
|
33
|
-
Soprano Gradio WebUI synthesizer helper for AgentVibes.
|
|
34
|
-
|
|
35
|
-
Calls the Soprano WebUI's Gradio API via the Server-Sent Events (SSE) protocol
|
|
36
|
-
and downloads the generated audio as a WAV file.
|
|
37
|
-
|
|
38
|
-
Flow:
|
|
39
|
-
1. Submit generation request → get event_id
|
|
40
|
-
2. Poll SSE stream for audio file URL
|
|
41
|
-
3. Download WAV file to output path
|
|
42
|
-
|
|
43
|
-
See: https://github.com/ekwek1/soprano
|
|
44
|
-
"""
|
|
45
|
-
import json
|
|
46
|
-
import sys
|
|
47
|
-
import urllib.request
|
|
48
|
-
import urllib.error
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def synth(text: str, output_path: str, port: int = 7860) -> None:
|
|
52
|
-
base = f"http://127.0.0.1:{port}"
|
|
53
|
-
|
|
54
|
-
# Step 1: Submit generation request
|
|
55
|
-
# Args: text, temperature, top_p, repetition_penalty, chunk_size, streaming
|
|
56
|
-
payload = json.dumps({
|
|
57
|
-
"data": [text, 0.0, 0.95, 1.2, 1, False]
|
|
58
|
-
}).encode()
|
|
59
|
-
|
|
60
|
-
event_id = submit_request(base, payload)
|
|
61
|
-
|
|
62
|
-
# Step 2: Poll SSE stream for audio file URL
|
|
63
|
-
audio_url = poll_for_result(base, event_id)
|
|
64
|
-
|
|
65
|
-
# Step 3: Download the audio file
|
|
66
|
-
download_file(audio_url, output_path)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def submit_request(base: str, payload: bytes) -> str:
|
|
70
|
-
"""Submit generation request to Gradio API, return event_id."""
|
|
71
|
-
for api_base in ["/gradio_api/call", "/call"]:
|
|
72
|
-
url = f"{base}{api_base}/generate_speech"
|
|
73
|
-
req = urllib.request.Request(
|
|
74
|
-
url,
|
|
75
|
-
data=payload,
|
|
76
|
-
headers={"Content-Type": "application/json"},
|
|
77
|
-
)
|
|
78
|
-
try:
|
|
79
|
-
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
80
|
-
return json.loads(resp.read())["event_id"]
|
|
81
|
-
except urllib.error.URLError:
|
|
82
|
-
continue
|
|
83
|
-
|
|
84
|
-
raise RuntimeError("Could not reach Soprano WebUI API")
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def poll_for_result(base: str, event_id: str) -> str:
|
|
88
|
-
"""Poll SSE endpoint until audio file URL is returned."""
|
|
89
|
-
for api_base in ["/gradio_api/call", "/call"]:
|
|
90
|
-
url = f"{base}{api_base}/generate_speech/{event_id}"
|
|
91
|
-
req = urllib.request.Request(url)
|
|
92
|
-
try:
|
|
93
|
-
with urllib.request.urlopen(req, timeout=120) as resp:
|
|
94
|
-
for raw_line in resp:
|
|
95
|
-
line = raw_line.decode("utf-8", errors="replace").strip()
|
|
96
|
-
if not line.startswith("data: "):
|
|
97
|
-
continue
|
|
98
|
-
try:
|
|
99
|
-
parsed = json.loads(line[6:])
|
|
100
|
-
except json.JSONDecodeError:
|
|
101
|
-
continue
|
|
102
|
-
# Response format: [{"path": "...", "url": "...", ...}, "status string"]
|
|
103
|
-
if isinstance(parsed, list) and len(parsed) >= 1:
|
|
104
|
-
audio = parsed[0]
|
|
105
|
-
if isinstance(audio, dict) and "url" in audio:
|
|
106
|
-
return audio["url"]
|
|
107
|
-
break
|
|
108
|
-
except urllib.error.URLError:
|
|
109
|
-
continue
|
|
110
|
-
|
|
111
|
-
raise RuntimeError("No audio URL in Soprano response")
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def download_file(url: str, output_path: str) -> None:
|
|
115
|
-
"""Download audio file from Gradio file server."""
|
|
116
|
-
req = urllib.request.Request(url)
|
|
117
|
-
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
118
|
-
with open(output_path, "wb") as f:
|
|
119
|
-
while True:
|
|
120
|
-
chunk = resp.read(8192)
|
|
121
|
-
if not chunk:
|
|
122
|
-
break
|
|
123
|
-
f.write(chunk)
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
if __name__ == "__main__":
|
|
127
|
-
if len(sys.argv) < 3:
|
|
128
|
-
print(f"Usage: {sys.argv[0]} \"text\" output.wav [port]", file=sys.stderr)
|
|
129
|
-
sys.exit(1)
|
|
130
|
-
|
|
131
|
-
text = sys.argv[1]
|
|
132
|
-
output = sys.argv[2]
|
|
133
|
-
port = int(sys.argv[3]) if len(sys.argv) > 3 else 7860
|
|
134
|
-
|
|
135
|
-
try:
|
|
136
|
-
synth(text, output, port)
|
|
137
|
-
except Exception as e:
|
|
138
|
-
print(f"Error: {e}", file=sys.stderr)
|
|
139
|
-
sys.exit(1)
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
#
|
|
3
|
+
# File: .claude/hooks/soprano-gradio-synth.py
|
|
4
|
+
#
|
|
5
|
+
# AgentVibes - Finally, your AI Agents can Talk Back!
|
|
6
|
+
# Website: https://agentvibes.org
|
|
7
|
+
# Repository: https://github.com/paulpreibisch/AgentVibes
|
|
8
|
+
#
|
|
9
|
+
# Co-created by Paul Preibisch with Claude AI
|
|
10
|
+
# Copyright (c) 2025 Paul Preibisch
|
|
11
|
+
#
|
|
12
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
13
|
+
# you may not use this file except in compliance with the License.
|
|
14
|
+
# You may obtain a copy of the License at
|
|
15
|
+
#
|
|
16
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
17
|
+
#
|
|
18
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
# See the License for the specific language governing permissions and
|
|
22
|
+
# limitations under the License.
|
|
23
|
+
#
|
|
24
|
+
# ---
|
|
25
|
+
#
|
|
26
|
+
# Soprano Gradio WebUI synthesizer helper.
|
|
27
|
+
# Calls the Soprano WebUI's Gradio API and saves the result as a WAV file.
|
|
28
|
+
# Uses only Python stdlib (json, sys, urllib) — no extra dependencies.
|
|
29
|
+
#
|
|
30
|
+
# Usage: python3 soprano-gradio-synth.py "text to speak" output.wav [port]
|
|
31
|
+
#
|
|
32
|
+
"""
|
|
33
|
+
Soprano Gradio WebUI synthesizer helper for AgentVibes.
|
|
34
|
+
|
|
35
|
+
Calls the Soprano WebUI's Gradio API via the Server-Sent Events (SSE) protocol
|
|
36
|
+
and downloads the generated audio as a WAV file.
|
|
37
|
+
|
|
38
|
+
Flow:
|
|
39
|
+
1. Submit generation request → get event_id
|
|
40
|
+
2. Poll SSE stream for audio file URL
|
|
41
|
+
3. Download WAV file to output path
|
|
42
|
+
|
|
43
|
+
See: https://github.com/ekwek1/soprano
|
|
44
|
+
"""
|
|
45
|
+
import json
|
|
46
|
+
import sys
|
|
47
|
+
import urllib.request
|
|
48
|
+
import urllib.error
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def synth(text: str, output_path: str, port: int = 7860) -> None:
|
|
52
|
+
base = f"http://127.0.0.1:{port}"
|
|
53
|
+
|
|
54
|
+
# Step 1: Submit generation request
|
|
55
|
+
# Args: text, temperature, top_p, repetition_penalty, chunk_size, streaming
|
|
56
|
+
payload = json.dumps({
|
|
57
|
+
"data": [text, 0.0, 0.95, 1.2, 1, False]
|
|
58
|
+
}).encode()
|
|
59
|
+
|
|
60
|
+
event_id = submit_request(base, payload)
|
|
61
|
+
|
|
62
|
+
# Step 2: Poll SSE stream for audio file URL
|
|
63
|
+
audio_url = poll_for_result(base, event_id)
|
|
64
|
+
|
|
65
|
+
# Step 3: Download the audio file
|
|
66
|
+
download_file(audio_url, output_path)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def submit_request(base: str, payload: bytes) -> str:
|
|
70
|
+
"""Submit generation request to Gradio API, return event_id."""
|
|
71
|
+
for api_base in ["/gradio_api/call", "/call"]:
|
|
72
|
+
url = f"{base}{api_base}/generate_speech"
|
|
73
|
+
req = urllib.request.Request(
|
|
74
|
+
url,
|
|
75
|
+
data=payload,
|
|
76
|
+
headers={"Content-Type": "application/json"},
|
|
77
|
+
)
|
|
78
|
+
try:
|
|
79
|
+
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
80
|
+
return json.loads(resp.read())["event_id"]
|
|
81
|
+
except urllib.error.URLError:
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
raise RuntimeError("Could not reach Soprano WebUI API")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def poll_for_result(base: str, event_id: str) -> str:
|
|
88
|
+
"""Poll SSE endpoint until audio file URL is returned."""
|
|
89
|
+
for api_base in ["/gradio_api/call", "/call"]:
|
|
90
|
+
url = f"{base}{api_base}/generate_speech/{event_id}"
|
|
91
|
+
req = urllib.request.Request(url)
|
|
92
|
+
try:
|
|
93
|
+
with urllib.request.urlopen(req, timeout=120) as resp:
|
|
94
|
+
for raw_line in resp:
|
|
95
|
+
line = raw_line.decode("utf-8", errors="replace").strip()
|
|
96
|
+
if not line.startswith("data: "):
|
|
97
|
+
continue
|
|
98
|
+
try:
|
|
99
|
+
parsed = json.loads(line[6:])
|
|
100
|
+
except json.JSONDecodeError:
|
|
101
|
+
continue
|
|
102
|
+
# Response format: [{"path": "...", "url": "...", ...}, "status string"]
|
|
103
|
+
if isinstance(parsed, list) and len(parsed) >= 1:
|
|
104
|
+
audio = parsed[0]
|
|
105
|
+
if isinstance(audio, dict) and "url" in audio:
|
|
106
|
+
return audio["url"]
|
|
107
|
+
break
|
|
108
|
+
except urllib.error.URLError:
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
raise RuntimeError("No audio URL in Soprano response")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def download_file(url: str, output_path: str) -> None:
|
|
115
|
+
"""Download audio file from Gradio file server."""
|
|
116
|
+
req = urllib.request.Request(url)
|
|
117
|
+
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
118
|
+
with open(output_path, "wb") as f:
|
|
119
|
+
while True:
|
|
120
|
+
chunk = resp.read(8192)
|
|
121
|
+
if not chunk:
|
|
122
|
+
break
|
|
123
|
+
f.write(chunk)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
if __name__ == "__main__":
|
|
127
|
+
if len(sys.argv) < 3:
|
|
128
|
+
print(f"Usage: {sys.argv[0]} \"text\" output.wav [port]", file=sys.stderr)
|
|
129
|
+
sys.exit(1)
|
|
130
|
+
|
|
131
|
+
text = sys.argv[1]
|
|
132
|
+
output = sys.argv[2]
|
|
133
|
+
port = int(sys.argv[3]) if len(sys.argv) > 3 else 7860
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
synth(text, output, port)
|
|
137
|
+
except Exception as e:
|
|
138
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
139
|
+
sys.exit(1)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|