agentvibes 5.9.0 → 5.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/.agentvibes/config.json +3 -12
  2. package/.claude/commands/agent-vibes-bmad-voices.md +117 -117
  3. package/.claude/commands/agent-vibes-rdp.md +24 -24
  4. package/.claude/config/audio-effects.cfg +4 -5
  5. package/.claude/config/audio-effects.cfg.sample +52 -52
  6. package/.claude/config/background-music-enabled.txt +1 -1
  7. package/.claude/docs/TERMUX_SETUP.md +408 -408
  8. package/.claude/github-star-reminder.txt +1 -1
  9. package/.claude/hooks/audio-cache-utils.sh +0 -0
  10. package/.claude/hooks/audio-processor.sh +0 -0
  11. package/.claude/hooks/background-music-manager.sh +0 -0
  12. package/.claude/hooks/bmad-party-speak.sh +0 -0
  13. package/.claude/hooks/bmad-speak-enhanced.sh +0 -0
  14. package/.claude/hooks/bmad-speak.sh +0 -0
  15. package/.claude/hooks/bmad-tts-injector.sh +0 -0
  16. package/.claude/hooks/bmad-voice-manager.sh +0 -0
  17. package/.claude/hooks/clawdbot-receiver-SECURE.sh +0 -0
  18. package/.claude/hooks/clawdbot-receiver.sh +0 -0
  19. package/.claude/hooks/clean-audio-cache.sh +0 -0
  20. package/.claude/hooks/cleanup-cache.sh +0 -0
  21. package/.claude/hooks/configure-rdp-mode.sh +0 -0
  22. package/.claude/hooks/download-extra-voices.sh +0 -0
  23. package/.claude/hooks/effects-manager.sh +0 -0
  24. package/.claude/hooks/github-star-reminder.sh +0 -0
  25. package/.claude/hooks/language-manager.sh +0 -0
  26. package/.claude/hooks/learn-manager.sh +0 -0
  27. package/.claude/hooks/macos-voice-manager.sh +0 -0
  28. package/.claude/hooks/migrate-background-music.sh +0 -0
  29. package/.claude/hooks/migrate-to-agentvibes.sh +0 -0
  30. package/.claude/hooks/optimize-background-music.sh +0 -0
  31. package/.claude/hooks/path-resolver.sh +0 -0
  32. package/.claude/hooks/personality-manager.sh +0 -0
  33. package/.claude/hooks/piper-download-voices.sh +0 -0
  34. package/.claude/hooks/piper-installer.sh +0 -0
  35. package/.claude/hooks/piper-multispeaker-registry.sh +0 -0
  36. package/.claude/hooks/piper-voice-manager.sh +0 -0
  37. package/.claude/hooks/play-tts-agentvibes-receiver-for-voiceless-connections.sh +0 -0
  38. package/.claude/hooks/play-tts-enhanced.sh +0 -0
  39. package/.claude/hooks/play-tts-macos.sh +0 -0
  40. package/.claude/hooks/play-tts-piper.sh +20 -13
  41. package/.claude/hooks/play-tts-soprano.sh +0 -0
  42. package/.claude/hooks/play-tts-ssh-remote.sh +0 -0
  43. package/.claude/hooks/play-tts-termux-ssh.sh +0 -0
  44. package/.claude/hooks/play-tts-windows-receiver.sh +0 -0
  45. package/.claude/hooks/play-tts.sh +0 -0
  46. package/.claude/hooks/prepare-release.sh +0 -0
  47. package/.claude/hooks/provider-commands.sh +0 -0
  48. package/.claude/hooks/provider-manager.sh +0 -0
  49. package/.claude/hooks/replay-target-audio.sh +0 -0
  50. package/.claude/hooks/requirements.txt +6 -6
  51. package/.claude/hooks/sentiment-manager.sh +0 -0
  52. package/.claude/hooks/session-start-tts.sh +0 -0
  53. package/.claude/hooks/soprano-gradio-synth.py +139 -139
  54. package/.claude/hooks/speed-manager.sh +0 -0
  55. package/.claude/hooks/stop-tts.sh +0 -0
  56. package/.claude/hooks/termux-installer.sh +0 -0
  57. package/.claude/hooks/translate-manager.sh +0 -0
  58. package/.claude/hooks/translator.py +237 -237
  59. package/.claude/hooks/tts-queue-worker.sh +0 -0
  60. package/.claude/hooks/tts-queue.sh +0 -0
  61. package/.claude/hooks/verbosity-manager.sh +0 -0
  62. package/.claude/hooks/voice-manager.sh +6 -0
  63. package/.claude/hooks-windows/play-tts-windows-piper.ps1 +22 -16
  64. package/.claude/hooks-windows/soprano-gradio-synth.py +153 -153
  65. package/.claude/verbosity.txt +1 -1
  66. package/.clawdbot/README.md +105 -105
  67. package/.mcp.json +19 -6
  68. package/README.md +1 -1
  69. package/WINDOWS-SETUP.md +208 -208
  70. package/bin/agent-vibes +39 -39
  71. package/bin/agentvibes-voice-browser.js +0 -0
  72. package/bin/agentvibes.js +0 -0
  73. package/bin/mcp-server.js +121 -121
  74. package/bin/mcp-server.sh +0 -0
  75. package/bin/test-bmad-pr +78 -78
  76. package/mcp-server/QUICK_START.md +203 -203
  77. package/mcp-server/README.md +345 -345
  78. package/mcp-server/WINDOWS_SETUP.md +0 -0
  79. package/mcp-server/examples/claude_desktop_config.json +11 -11
  80. package/mcp-server/examples/claude_desktop_config_piper.json +9 -9
  81. package/mcp-server/examples/custom_instructions.md +169 -169
  82. package/mcp-server/install-deps.js +0 -0
  83. package/mcp-server/server.py +1807 -1797
  84. package/mcp-server/test_server.py +0 -0
  85. package/package.json +2 -2
  86. package/src/cli/list-personalities.js +110 -110
  87. package/src/cli/list-voices.js +114 -114
  88. package/src/commands/bmad-voices.js +394 -394
  89. package/src/commands/install-mcp.js +730 -476
  90. package/src/console/app.js +3 -3
  91. package/src/console/brand-colors.js +13 -13
  92. package/src/console/constants/personalities.js +44 -44
  93. package/src/console/tabs/agents-tab.js +6 -6
  94. package/src/console/tabs/help-tab.js +314 -314
  95. package/src/console/tabs/music-tab.js +1 -1
  96. package/src/console/tabs/readme-tab.js +272 -272
  97. package/src/console/tabs/receiver-tab.js +13 -13
  98. package/src/console/tabs/settings-tab.js +2 -2
  99. package/src/console/tabs/setup-tab.js +10 -10
  100. package/src/console/tabs/voices-tab.js +4 -4
  101. package/src/console/widgets/destroy-list.js +25 -25
  102. package/src/console/widgets/notice.js +55 -55
  103. package/src/console/widgets/personality-picker.js +2 -2
  104. package/src/console/widgets/reverb-picker.js +1 -1
  105. package/src/i18n/de.js +202 -202
  106. package/src/i18n/es.js +202 -202
  107. package/src/i18n/fr.js +202 -202
  108. package/src/i18n/hi.js +202 -202
  109. package/src/i18n/ja.js +202 -202
  110. package/src/i18n/ko.js +202 -202
  111. package/src/i18n/pt.js +202 -202
  112. package/src/i18n/strings.js +54 -54
  113. package/src/i18n/zh-CN.js +202 -202
  114. package/src/installer/language-screen.js +31 -31
  115. package/src/installer/music-file-input.js +304 -304
  116. package/src/installer.js +32 -27
  117. package/src/services/config-service.js +264 -264
  118. package/src/services/language-service.js +47 -47
  119. package/src/services/provider-service.js +143 -143
  120. package/src/services/tts-engine-service.js +2 -2
  121. package/src/utils/audio-duration-validator.js +298 -298
  122. package/src/utils/audio-format-validator.js +277 -277
  123. package/src/utils/dependency-checker.js +469 -469
  124. package/src/utils/file-ownership-verifier.js +358 -358
  125. package/src/utils/list-formatter.js +200 -194
  126. package/src/utils/music-file-validator.js +285 -285
  127. package/src/utils/platform-resolver.js +369 -0
  128. package/src/utils/preview-list-prompt.js +136 -136
  129. package/src/utils/provider-validator.js +9 -9
  130. package/src/utils/secure-music-storage.js +412 -412
  131. package/templates/agentvibes-receiver.sh +231 -231
  132. package/templates/audio/welcome-music.mp3 +0 -0
  133. package/.agentvibes/install-manifest.json +0 -330
  134. package/.claude/config/background-music-position.txt +0 -27
  135. package/.claude/config/background-music-volume.txt +0 -1
  136. package/.claude/config/background-music.cfg +0 -1
  137. package/.claude/config/background-music.txt +0 -1
  138. package/.claude/config/language.txt +0 -1
  139. package/.claude/config/reverb-level.txt +0 -1
  140. package/.claude/config/tts-speech-rate.txt +0 -1
  141. package/.claude/config/tts-verbosity.txt +0 -1
  142. package/.claude/hooks/play-tts-agentvibes-receiver.sh +0 -1
  143. package/.claude/hooks-windows/audio-cache-utils.ps1.user.bak +0 -119
  144. package/.claude/hooks-windows/soprano-gradio-synth.py.user.bak +0 -153
  145. package/.claude/piper-voices-dir.txt +0 -1
@@ -1 +1 @@
1
- 20260516
1
+ 20260525
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -73,10 +73,8 @@ TEXT=$(printf '%s' "$TEXT" | perl -CSD -pe '
73
73
  s/^\s*[-]\s*//g; # list dashes
74
74
  ')
75
75
 
76
- # Source voice manager and language manager
77
- # Use readlink -f to handle symlinks correctly
78
- SCRIPT_PATH="$(readlink -f "${BASH_SOURCE[0]}")"
79
- SCRIPT_DIR="$(dirname "$SCRIPT_PATH")"
76
+ # cd-based resolution works on macOS (BSD readlink lacks -f) and Linux alike
77
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)"
80
78
  source "$SCRIPT_DIR/piper-voice-manager.sh"
81
79
  source "$SCRIPT_DIR/language-manager.sh"
82
80
  source "$SCRIPT_DIR/audio-cache-utils.sh"
@@ -226,11 +224,19 @@ if [[ -z "$TEXT" ]]; then
226
224
  fi
227
225
 
228
226
  # Augment PATH for non-interactive shells (pipx installs to ~/.local/bin which
229
- # interactive shells get via .bashrc/.zshrc, but Bash tool calls skip profile)
227
+ # interactive shells get via .bashrc/.zshrc, but Bash tool calls skip profile).
228
+ # Mac: add both Apple Silicon (/opt/homebrew) and Intel (/usr/local) Homebrew locations.
230
229
  export PATH="$HOME/.local/bin:$HOME/.local/share/pipx/venvs/piper-tts/bin:$PATH"
230
+ if [[ "$(uname -s 2>/dev/null)" == "Darwin" ]]; then
231
+ export PATH="/opt/homebrew/bin:/usr/local/bin:$PATH"
232
+ fi
233
+
234
+ # Resolve explicit piper binary path — avoids bare `piper` invocation failing when
235
+ # PATH augmentation above hasn't propagated into nested subshells.
236
+ PIPER_BIN=$(command -v piper 2>/dev/null || echo "")
231
237
 
232
238
  # Check if Piper is installed
233
- if ! command -v piper &> /dev/null; then
239
+ if [[ -z "$PIPER_BIN" ]]; then
234
240
  echo "❌ Error: Piper TTS not installed"
235
241
  echo "Install with: pipx install piper-tts"
236
242
  echo "Or run: .claude/hooks/piper-installer.sh"
@@ -309,6 +315,8 @@ else
309
315
  fi
310
316
 
311
317
  mkdir -p "$AUDIO_DIR"
318
+ # Normalize to canonical path (handles Git Bash /tmp→/c/Users/..., macOS /var→/private/var)
319
+ AUDIO_DIR=$(cd "$AUDIO_DIR" && pwd -P)
312
320
  _tmp=$(mktemp "$AUDIO_DIR/tts-XXXXXX"); TEMP_FILE="${_tmp}.wav"; mv "$_tmp" "$TEMP_FILE"
313
321
 
314
322
  # @function get_speech_rate
@@ -380,6 +388,10 @@ get_speech_rate() {
380
388
 
381
389
  SPEECH_RATE=$(get_speech_rate)
382
390
 
391
+ # Ensure piper log directory exists so stderr redirect never silently fails
392
+ _PIPER_LOG_DIR="${AGENTVIBES_LOG_DIR:-$HOME/.local/state/agentvibes/logs}"
393
+ mkdir -p "$_PIPER_LOG_DIR" 2>/dev/null || true
394
+
383
395
  # @function synthesize_with_piper
384
396
  # @intent Generate speech using Piper TTS
385
397
  # @why Provides free, offline TTS alternative
@@ -391,10 +403,10 @@ SPEECH_RATE=$(get_speech_rate)
391
403
  if [[ -n "${SPEAKER_ID:-}" ]]; then
392
404
  # Multi-speaker voice: Pass speaker ID
393
405
  # SECURITY: Use printf instead of echo for pipe safety (#134)
394
- printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --speaker "$SPEAKER_ID" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
406
+ printf '%s\n' "$TEXT" | "$PIPER_BIN" --model "$VOICE_PATH" --speaker "$SPEAKER_ID" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>>"$_PIPER_LOG_DIR/piper.log"
395
407
  else
396
408
  # Single-speaker voice
397
- printf '%s\n' "$TEXT" | piper --model "$VOICE_PATH" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>/dev/null
409
+ printf '%s\n' "$TEXT" | "$PIPER_BIN" --model "$VOICE_PATH" --length-scale "$SPEECH_RATE" --sentence-silence 2.0 --output_file "$TEMP_FILE" 2>>"$_PIPER_LOG_DIR/piper.log"
398
410
  fi
399
411
 
400
412
  if [[ ! -f "$TEMP_FILE" ]] || [[ ! -s "$TEMP_FILE" ]]; then
@@ -538,11 +550,6 @@ AUDIO_DIR="${TEMP_FILE%/*}"
538
550
  WRITE_LOCK_FILE="$AUDIO_DIR/$(basename "$TEMP_FILE" .wav).lock"
539
551
  touch "$WRITE_LOCK_FILE"
540
552
 
541
- # Get audio duration for proper lock timing
542
- DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null)
543
- DURATION=${DURATION%.*} # Round to integer
544
- DURATION=${DURATION:-1} # Default to 1 second if detection fails
545
-
546
553
  # Get audio duration for proper lock timing
547
554
  DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TEMP_FILE" 2>/dev/null || true)
548
555
  DURATION=${DURATION%.*} # Round to integer
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -1,6 +1,6 @@
1
- # AgentVibes TTS Hooks Requirements
2
- # Install with: pip install -r requirements.txt
3
-
4
- # Translation support for multi-language TTS and learning mode
5
- deep-translator>=1.11.4
6
- langdetect>=1.0.9
1
+ # AgentVibes TTS Hooks Requirements
2
+ # Install with: pip install -r requirements.txt
3
+
4
+ # Translation support for multi-language TTS and learning mode
5
+ deep-translator>=1.11.4
6
+ langdetect>=1.0.9
File without changes
File without changes
@@ -1,139 +1,139 @@
1
- #!/usr/bin/env python3
2
- #
3
- # File: .claude/hooks/soprano-gradio-synth.py
4
- #
5
- # AgentVibes - Finally, your AI Agents can Talk Back!
6
- # Website: https://agentvibes.org
7
- # Repository: https://github.com/paulpreibisch/AgentVibes
8
- #
9
- # Co-created by Paul Preibisch with Claude AI
10
- # Copyright (c) 2025 Paul Preibisch
11
- #
12
- # Licensed under the Apache License, Version 2.0 (the "License");
13
- # you may not use this file except in compliance with the License.
14
- # You may obtain a copy of the License at
15
- #
16
- # http://www.apache.org/licenses/LICENSE-2.0
17
- #
18
- # Unless required by applicable law or agreed to in writing, software
19
- # distributed under the License is distributed on an "AS IS" BASIS,
20
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
- # See the License for the specific language governing permissions and
22
- # limitations under the License.
23
- #
24
- # ---
25
- #
26
- # Soprano Gradio WebUI synthesizer helper.
27
- # Calls the Soprano WebUI's Gradio API and saves the result as a WAV file.
28
- # Uses only Python stdlib (json, sys, urllib) — no extra dependencies.
29
- #
30
- # Usage: python3 soprano-gradio-synth.py "text to speak" output.wav [port]
31
- #
32
- """
33
- Soprano Gradio WebUI synthesizer helper for AgentVibes.
34
-
35
- Calls the Soprano WebUI's Gradio API via the Server-Sent Events (SSE) protocol
36
- and downloads the generated audio as a WAV file.
37
-
38
- Flow:
39
- 1. Submit generation request → get event_id
40
- 2. Poll SSE stream for audio file URL
41
- 3. Download WAV file to output path
42
-
43
- See: https://github.com/ekwek1/soprano
44
- """
45
- import json
46
- import sys
47
- import urllib.request
48
- import urllib.error
49
-
50
-
51
- def synth(text: str, output_path: str, port: int = 7860) -> None:
52
- base = f"http://127.0.0.1:{port}"
53
-
54
- # Step 1: Submit generation request
55
- # Args: text, temperature, top_p, repetition_penalty, chunk_size, streaming
56
- payload = json.dumps({
57
- "data": [text, 0.0, 0.95, 1.2, 1, False]
58
- }).encode()
59
-
60
- event_id = submit_request(base, payload)
61
-
62
- # Step 2: Poll SSE stream for audio file URL
63
- audio_url = poll_for_result(base, event_id)
64
-
65
- # Step 3: Download the audio file
66
- download_file(audio_url, output_path)
67
-
68
-
69
- def submit_request(base: str, payload: bytes) -> str:
70
- """Submit generation request to Gradio API, return event_id."""
71
- for api_base in ["/gradio_api/call", "/call"]:
72
- url = f"{base}{api_base}/generate_speech"
73
- req = urllib.request.Request(
74
- url,
75
- data=payload,
76
- headers={"Content-Type": "application/json"},
77
- )
78
- try:
79
- with urllib.request.urlopen(req, timeout=30) as resp:
80
- return json.loads(resp.read())["event_id"]
81
- except urllib.error.URLError:
82
- continue
83
-
84
- raise RuntimeError("Could not reach Soprano WebUI API")
85
-
86
-
87
- def poll_for_result(base: str, event_id: str) -> str:
88
- """Poll SSE endpoint until audio file URL is returned."""
89
- for api_base in ["/gradio_api/call", "/call"]:
90
- url = f"{base}{api_base}/generate_speech/{event_id}"
91
- req = urllib.request.Request(url)
92
- try:
93
- with urllib.request.urlopen(req, timeout=120) as resp:
94
- for raw_line in resp:
95
- line = raw_line.decode("utf-8", errors="replace").strip()
96
- if not line.startswith("data: "):
97
- continue
98
- try:
99
- parsed = json.loads(line[6:])
100
- except json.JSONDecodeError:
101
- continue
102
- # Response format: [{"path": "...", "url": "...", ...}, "status string"]
103
- if isinstance(parsed, list) and len(parsed) >= 1:
104
- audio = parsed[0]
105
- if isinstance(audio, dict) and "url" in audio:
106
- return audio["url"]
107
- break
108
- except urllib.error.URLError:
109
- continue
110
-
111
- raise RuntimeError("No audio URL in Soprano response")
112
-
113
-
114
- def download_file(url: str, output_path: str) -> None:
115
- """Download audio file from Gradio file server."""
116
- req = urllib.request.Request(url)
117
- with urllib.request.urlopen(req, timeout=30) as resp:
118
- with open(output_path, "wb") as f:
119
- while True:
120
- chunk = resp.read(8192)
121
- if not chunk:
122
- break
123
- f.write(chunk)
124
-
125
-
126
- if __name__ == "__main__":
127
- if len(sys.argv) < 3:
128
- print(f"Usage: {sys.argv[0]} \"text\" output.wav [port]", file=sys.stderr)
129
- sys.exit(1)
130
-
131
- text = sys.argv[1]
132
- output = sys.argv[2]
133
- port = int(sys.argv[3]) if len(sys.argv) > 3 else 7860
134
-
135
- try:
136
- synth(text, output, port)
137
- except Exception as e:
138
- print(f"Error: {e}", file=sys.stderr)
139
- sys.exit(1)
1
+ #!/usr/bin/env python3
2
+ #
3
+ # File: .claude/hooks/soprano-gradio-synth.py
4
+ #
5
+ # AgentVibes - Finally, your AI Agents can Talk Back!
6
+ # Website: https://agentvibes.org
7
+ # Repository: https://github.com/paulpreibisch/AgentVibes
8
+ #
9
+ # Co-created by Paul Preibisch with Claude AI
10
+ # Copyright (c) 2025 Paul Preibisch
11
+ #
12
+ # Licensed under the Apache License, Version 2.0 (the "License");
13
+ # you may not use this file except in compliance with the License.
14
+ # You may obtain a copy of the License at
15
+ #
16
+ # http://www.apache.org/licenses/LICENSE-2.0
17
+ #
18
+ # Unless required by applicable law or agreed to in writing, software
19
+ # distributed under the License is distributed on an "AS IS" BASIS,
20
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21
+ # See the License for the specific language governing permissions and
22
+ # limitations under the License.
23
+ #
24
+ # ---
25
+ #
26
+ # Soprano Gradio WebUI synthesizer helper.
27
+ # Calls the Soprano WebUI's Gradio API and saves the result as a WAV file.
28
+ # Uses only Python stdlib (json, sys, urllib) — no extra dependencies.
29
+ #
30
+ # Usage: python3 soprano-gradio-synth.py "text to speak" output.wav [port]
31
+ #
32
+ """
33
+ Soprano Gradio WebUI synthesizer helper for AgentVibes.
34
+
35
+ Calls the Soprano WebUI's Gradio API via the Server-Sent Events (SSE) protocol
36
+ and downloads the generated audio as a WAV file.
37
+
38
+ Flow:
39
+ 1. Submit generation request → get event_id
40
+ 2. Poll SSE stream for audio file URL
41
+ 3. Download WAV file to output path
42
+
43
+ See: https://github.com/ekwek1/soprano
44
+ """
45
+ import json
46
+ import sys
47
+ import urllib.request
48
+ import urllib.error
49
+
50
+
51
+ def synth(text: str, output_path: str, port: int = 7860) -> None:
52
+ base = f"http://127.0.0.1:{port}"
53
+
54
+ # Step 1: Submit generation request
55
+ # Args: text, temperature, top_p, repetition_penalty, chunk_size, streaming
56
+ payload = json.dumps({
57
+ "data": [text, 0.0, 0.95, 1.2, 1, False]
58
+ }).encode()
59
+
60
+ event_id = submit_request(base, payload)
61
+
62
+ # Step 2: Poll SSE stream for audio file URL
63
+ audio_url = poll_for_result(base, event_id)
64
+
65
+ # Step 3: Download the audio file
66
+ download_file(audio_url, output_path)
67
+
68
+
69
+ def submit_request(base: str, payload: bytes) -> str:
70
+ """Submit generation request to Gradio API, return event_id."""
71
+ for api_base in ["/gradio_api/call", "/call"]:
72
+ url = f"{base}{api_base}/generate_speech"
73
+ req = urllib.request.Request(
74
+ url,
75
+ data=payload,
76
+ headers={"Content-Type": "application/json"},
77
+ )
78
+ try:
79
+ with urllib.request.urlopen(req, timeout=30) as resp:
80
+ return json.loads(resp.read())["event_id"]
81
+ except urllib.error.URLError:
82
+ continue
83
+
84
+ raise RuntimeError("Could not reach Soprano WebUI API")
85
+
86
+
87
+ def poll_for_result(base: str, event_id: str) -> str:
88
+ """Poll SSE endpoint until audio file URL is returned."""
89
+ for api_base in ["/gradio_api/call", "/call"]:
90
+ url = f"{base}{api_base}/generate_speech/{event_id}"
91
+ req = urllib.request.Request(url)
92
+ try:
93
+ with urllib.request.urlopen(req, timeout=120) as resp:
94
+ for raw_line in resp:
95
+ line = raw_line.decode("utf-8", errors="replace").strip()
96
+ if not line.startswith("data: "):
97
+ continue
98
+ try:
99
+ parsed = json.loads(line[6:])
100
+ except json.JSONDecodeError:
101
+ continue
102
+ # Response format: [{"path": "...", "url": "...", ...}, "status string"]
103
+ if isinstance(parsed, list) and len(parsed) >= 1:
104
+ audio = parsed[0]
105
+ if isinstance(audio, dict) and "url" in audio:
106
+ return audio["url"]
107
+ break
108
+ except urllib.error.URLError:
109
+ continue
110
+
111
+ raise RuntimeError("No audio URL in Soprano response")
112
+
113
+
114
+ def download_file(url: str, output_path: str) -> None:
115
+ """Download audio file from Gradio file server."""
116
+ req = urllib.request.Request(url)
117
+ with urllib.request.urlopen(req, timeout=30) as resp:
118
+ with open(output_path, "wb") as f:
119
+ while True:
120
+ chunk = resp.read(8192)
121
+ if not chunk:
122
+ break
123
+ f.write(chunk)
124
+
125
+
126
+ if __name__ == "__main__":
127
+ if len(sys.argv) < 3:
128
+ print(f"Usage: {sys.argv[0]} \"text\" output.wav [port]", file=sys.stderr)
129
+ sys.exit(1)
130
+
131
+ text = sys.argv[1]
132
+ output = sys.argv[2]
133
+ port = int(sys.argv[3]) if len(sys.argv) > 3 else 7860
134
+
135
+ try:
136
+ synth(text, output, port)
137
+ except Exception as e:
138
+ print(f"Error: {e}", file=sys.stderr)
139
+ sys.exit(1)
File without changes
File without changes
File without changes
File without changes