agentvibes 4.2.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. package/.agentvibes/bmad/bmad-voices.md +69 -69
  2. package/.agentvibes/config.json +12 -0
  3. package/.claude/activation-instructions +54 -54
  4. package/.claude/audio/tracks/README.md +52 -52
  5. package/.claude/commands/agent-vibes/add.md +21 -21
  6. package/.claude/commands/agent-vibes/agent-vibes.md +101 -101
  7. package/.claude/commands/agent-vibes/agent.md +79 -79
  8. package/.claude/commands/agent-vibes/background-music.md +111 -111
  9. package/.claude/commands/agent-vibes/bmad.md +198 -198
  10. package/.claude/commands/agent-vibes/clean.md +18 -18
  11. package/.claude/commands/agent-vibes/cleanup.md +18 -18
  12. package/.claude/commands/agent-vibes/commands.json +145 -145
  13. package/.claude/commands/agent-vibes/effects.md +97 -97
  14. package/.claude/commands/agent-vibes/get.md +9 -9
  15. package/.claude/commands/agent-vibes/hide.md +91 -91
  16. package/.claude/commands/agent-vibes/language.md +23 -23
  17. package/.claude/commands/agent-vibes/learn.md +67 -67
  18. package/.claude/commands/agent-vibes/list.md +13 -13
  19. package/.claude/commands/agent-vibes/mute.md +37 -37
  20. package/.claude/commands/agent-vibes/preview.md +17 -17
  21. package/.claude/commands/agent-vibes/provider.md +68 -68
  22. package/.claude/commands/agent-vibes/replay-target.md +14 -14
  23. package/.claude/commands/agent-vibes/sample.md +12 -12
  24. package/.claude/commands/agent-vibes/set-favorite-voice.md +84 -84
  25. package/.claude/commands/agent-vibes/set-pretext.md +65 -65
  26. package/.claude/commands/agent-vibes/set-speed.md +41 -41
  27. package/.claude/commands/agent-vibes/show.md +84 -84
  28. package/.claude/commands/agent-vibes/switch.md +87 -87
  29. package/.claude/commands/agent-vibes/target-voice.md +26 -26
  30. package/.claude/commands/agent-vibes/target.md +30 -30
  31. package/.claude/commands/agent-vibes/translate.md +68 -68
  32. package/.claude/commands/agent-vibes/unmute.md +45 -45
  33. package/.claude/commands/agent-vibes/verbosity.md +89 -89
  34. package/.claude/commands/agent-vibes/whoami.md +7 -7
  35. package/.claude/commands/agent-vibes-bmad-voices.md +117 -117
  36. package/.claude/commands/agent-vibes-rdp.md +24 -24
  37. package/.claude/config/agentvibes.json +1 -0
  38. package/.claude/config/audio-effects.cfg +2 -2
  39. package/.claude/config/audio-effects.cfg.sample +52 -52
  40. package/.claude/config/background-music-volume.txt +1 -0
  41. package/.claude/config/intro-text.txt +1 -0
  42. package/.claude/config/piper-speech-rate.txt +4 -0
  43. package/.claude/config/piper-target-speech-rate.txt +1 -0
  44. package/.claude/config/reverb-level.txt +1 -0
  45. package/.claude/config/tts-speech-rate.txt +4 -0
  46. package/.claude/config/tts-target-speech-rate.txt +1 -0
  47. package/.claude/docs/TERMUX_SETUP.md +408 -408
  48. package/.claude/github-star-reminder.txt +1 -1
  49. package/.claude/hooks/README-TTS-QUEUE.md +135 -135
  50. package/.claude/hooks/audio-cache-utils.sh +246 -246
  51. package/.claude/hooks/audio-processor.sh +433 -433
  52. package/.claude/hooks/background-music-manager.sh +404 -404
  53. package/.claude/hooks/bmad-speak-enhanced.sh +165 -165
  54. package/.claude/hooks/bmad-speak.sh +269 -269
  55. package/.claude/hooks/bmad-tts-injector.sh +568 -568
  56. package/.claude/hooks/bmad-voice-manager.sh +928 -928
  57. package/.claude/hooks/clawdbot-receiver-SECURE.sh +129 -129
  58. package/.claude/hooks/clawdbot-receiver.sh +107 -107
  59. package/.claude/hooks/clean-audio-cache.sh +22 -22
  60. package/.claude/hooks/cleanup-cache.sh +106 -106
  61. package/.claude/hooks/configure-rdp-mode.sh +137 -137
  62. package/.claude/hooks/download-extra-voices.sh +244 -244
  63. package/.claude/hooks/effects-manager.sh +268 -268
  64. package/.claude/hooks/github-star-reminder.sh +154 -154
  65. package/.claude/hooks/language-manager.sh +362 -362
  66. package/.claude/hooks/learn-manager.sh +492 -492
  67. package/.claude/hooks/macos-voice-manager.sh +205 -205
  68. package/.claude/hooks/migrate-background-music.sh +125 -125
  69. package/.claude/hooks/migrate-to-agentvibes.sh +161 -161
  70. package/.claude/hooks/optimize-background-music.sh +87 -87
  71. package/.claude/hooks/path-resolver.sh +60 -60
  72. package/.claude/hooks/personality-manager.sh +448 -448
  73. package/.claude/hooks/piper-download-voices.sh +225 -225
  74. package/.claude/hooks/piper-installer.sh +292 -292
  75. package/.claude/hooks/piper-multispeaker-registry.sh +171 -171
  76. package/.claude/hooks/piper-voice-manager.sh +24 -3
  77. package/.claude/hooks/play-tts-agentvibes-receiver-for-voiceless-connections.sh +90 -90
  78. package/.claude/hooks/play-tts-enhanced.sh +105 -105
  79. package/.claude/hooks/play-tts-macos.sh +368 -368
  80. package/.claude/hooks/play-tts-piper.sh +679 -679
  81. package/.claude/hooks/play-tts-soprano.sh +356 -356
  82. package/.claude/hooks/play-tts-ssh-remote.sh +167 -167
  83. package/.claude/hooks/play-tts-termux-ssh.sh +169 -169
  84. package/.claude/hooks/play-tts.sh +301 -301
  85. package/.claude/hooks/prepare-release.sh +54 -54
  86. package/.claude/hooks/provider-commands.sh +617 -617
  87. package/.claude/hooks/provider-manager.sh +399 -399
  88. package/.claude/hooks/replay-target-audio.sh +95 -95
  89. package/.claude/hooks/requirements.txt +6 -6
  90. package/.claude/hooks/sentiment-manager.sh +201 -201
  91. package/.claude/hooks/session-start-tts.sh +81 -81
  92. package/.claude/hooks/soprano-gradio-synth.py +139 -139
  93. package/.claude/hooks/speed-manager.sh +291 -291
  94. package/.claude/hooks/stop-tts.sh +84 -84
  95. package/.claude/hooks/termux-installer.sh +261 -261
  96. package/.claude/hooks/translate-manager.sh +341 -341
  97. package/.claude/hooks/translator.py +237 -237
  98. package/.claude/hooks/tts-queue-worker.sh +145 -145
  99. package/.claude/hooks/tts-queue.sh +165 -165
  100. package/.claude/hooks/verbosity-manager.sh +178 -178
  101. package/.claude/hooks/voice-manager.sh +548 -548
  102. package/.claude/hooks-windows/audio-cache-utils.ps1 +119 -119
  103. package/.claude/hooks-windows/background-music-manager.ps1 +348 -0
  104. package/.claude/hooks-windows/clean-audio-cache.ps1 +53 -0
  105. package/.claude/hooks-windows/download-extra-voices.ps1 +185 -0
  106. package/.claude/hooks-windows/effects-manager.ps1 +294 -0
  107. package/.claude/hooks-windows/language-manager.ps1 +193 -0
  108. package/.claude/hooks-windows/learn-manager.ps1 +241 -0
  109. package/.claude/hooks-windows/personality-manager.ps1 +266 -0
  110. package/.claude/hooks-windows/play-tts-piper.ps1 +209 -0
  111. package/.claude/hooks-windows/play-tts-sapi.ps1 +108 -0
  112. package/.claude/hooks-windows/play-tts-soprano.ps1 +159 -158
  113. package/.claude/hooks-windows/play-tts-windows-piper.ps1 +50 -5
  114. package/.claude/hooks-windows/play-tts-windows-sapi.ps1 +108 -108
  115. package/.claude/hooks-windows/play-tts.ps1 +344 -266
  116. package/.claude/hooks-windows/provider-manager.ps1 +29 -10
  117. package/.claude/hooks-windows/session-start-tts.ps1 +124 -124
  118. package/.claude/hooks-windows/soprano-gradio-synth.py +153 -153
  119. package/.claude/hooks-windows/speed-manager.ps1 +166 -0
  120. package/.claude/hooks-windows/verbosity-manager.ps1 +119 -0
  121. package/.claude/hooks-windows/voice-manager-windows.ps1 +92 -8
  122. package/.claude/output-styles/agent-vibes.md +202 -202
  123. package/.claude/personalities/angry.md +14 -14
  124. package/.claude/personalities/annoying.md +14 -14
  125. package/.claude/personalities/crass.md +14 -14
  126. package/.claude/personalities/dramatic.md +14 -14
  127. package/.claude/personalities/dry-humor.md +50 -50
  128. package/.claude/personalities/flirty.md +20 -20
  129. package/.claude/personalities/funny.md +14 -14
  130. package/.claude/personalities/grandpa.md +32 -32
  131. package/.claude/personalities/millennial.md +14 -14
  132. package/.claude/personalities/moody.md +14 -14
  133. package/.claude/personalities/normal.md +16 -16
  134. package/.claude/personalities/pirate.md +14 -14
  135. package/.claude/personalities/poetic.md +14 -14
  136. package/.claude/personalities/professional.md +14 -14
  137. package/.claude/personalities/rapper.md +55 -55
  138. package/.claude/personalities/robot.md +14 -14
  139. package/.claude/personalities/sarcastic.md +38 -38
  140. package/.claude/personalities/sassy.md +14 -14
  141. package/.claude/personalities/surfer-dude.md +14 -14
  142. package/.claude/personalities/zen.md +14 -14
  143. package/.claude/settings.json +15 -15
  144. package/.claude/verbosity.txt +1 -1
  145. package/.clawdbot/README.md +105 -105
  146. package/.clawdbot/skill/SKILL.md +241 -241
  147. package/.mcp.json +12 -0
  148. package/CLAUDE.md +170 -170
  149. package/README.md +2029 -2007
  150. package/RELEASE_NOTES.md +1310 -1203
  151. package/WINDOWS-SETUP.md +208 -208
  152. package/bin/agent-vibes +39 -39
  153. package/bin/agentvibes-voice-browser.js +1840 -1840
  154. package/bin/agentvibes.js +48 -2
  155. package/bin/mcp-server.js +121 -121
  156. package/bin/mcp-server.sh +206 -206
  157. package/bin/test-bmad-pr +78 -78
  158. package/mcp-server/QUICK_START.md +203 -203
  159. package/mcp-server/README.md +345 -345
  160. package/mcp-server/WINDOWS_SETUP.md +260 -260
  161. package/mcp-server/docs/troubleshooting-audio.md +313 -313
  162. package/mcp-server/examples/claude_desktop_config.json +11 -11
  163. package/mcp-server/examples/claude_desktop_config_piper.json +9 -9
  164. package/mcp-server/examples/custom_instructions.md +169 -169
  165. package/mcp-server/install-deps.js +130 -130
  166. package/mcp-server/pyproject.toml +52 -52
  167. package/mcp-server/requirements.txt +2 -2
  168. package/mcp-server/server.py +1465 -1453
  169. package/mcp-server/test_server.py +395 -395
  170. package/mcp-server/test_windows_script_parity.py +336 -0
  171. package/package.json +110 -110
  172. package/setup-windows.ps1 +815 -815
  173. package/src/bmad-detector.js +71 -71
  174. package/src/cli/list-personalities.js +110 -110
  175. package/src/cli/list-voices.js +114 -114
  176. package/src/commands/bmad-voices.js +394 -394
  177. package/src/commands/install-mcp.js +476 -476
  178. package/src/console/app.js +824 -824
  179. package/src/console/audio-env.js +20 -1
  180. package/src/console/brand-colors.js +13 -13
  181. package/src/console/constants/personalities.js +44 -44
  182. package/src/console/footer-config.js +50 -50
  183. package/src/console/modals/modal-overlay.js +247 -247
  184. package/src/console/navigation.js +62 -62
  185. package/src/console/tabs/agents-tab.js +1684 -1516
  186. package/src/console/tabs/help-tab.js +261 -261
  187. package/src/console/tabs/install-tab.js +1007 -991
  188. package/src/console/tabs/music-tab.js +22 -8
  189. package/src/console/tabs/placeholder-tab.js +53 -53
  190. package/src/console/tabs/readme-tab.js +267 -267
  191. package/src/console/tabs/receiver-tab.js +1472 -1212
  192. package/src/console/tabs/settings-tab.js +152 -79
  193. package/src/console/tabs/voices-tab.js +100 -21
  194. package/src/console/widgets/destroy-list.js +25 -25
  195. package/src/console/widgets/format-utils.js +89 -89
  196. package/src/console/widgets/notice.js +55 -55
  197. package/src/console/widgets/personality-picker.js +185 -185
  198. package/src/console/widgets/reverb-picker.js +94 -94
  199. package/src/console/widgets/track-picker.js +285 -285
  200. package/src/installer/music-file-input.js +304 -304
  201. package/src/installer.js +5882 -5829
  202. package/src/services/agent-voice-store.js +423 -423
  203. package/src/services/config-service.js +264 -264
  204. package/src/services/navigation-service.js +123 -123
  205. package/src/services/provider-service.js +132 -132
  206. package/src/services/verbosity-service.js +157 -157
  207. package/src/utils/audio-duration-validator.js +298 -298
  208. package/src/utils/audio-format-validator.js +277 -277
  209. package/src/utils/dependency-checker.js +469 -466
  210. package/src/utils/file-ownership-verifier.js +358 -358
  211. package/src/utils/list-formatter.js +194 -194
  212. package/src/utils/music-file-validator.js +285 -285
  213. package/src/utils/preview-list-prompt.js +136 -136
  214. package/src/utils/provider-validator.js +96 -12
  215. package/src/utils/secure-music-storage.js +412 -412
  216. package/templates/agentvibes-receiver.sh +482 -482
  217. package/templates/audio/welcome-music.mp3 +0 -0
  218. package/voice-assignments.json +8244 -8244
  219. package/.claude/config/background-music-position.txt +0 -1
@@ -1,482 +1,482 @@
1
- #!/usr/bin/env bash
2
- #
3
- # File: agentvibes-receiver.sh
4
- # Location: User installs to ~/.agentvibes/play-remote.sh
5
- #
6
- # AgentVibes SSH-TTS Receiver (v2 — self-contained pipeline)
7
- # Receives TTS requests via SSH, generates and plays audio locally.
8
- #
9
- # Supports two payload formats:
10
- # 1. JSON payload (v2): single base64-encoded JSON with all config
11
- # 2. Legacy positional args: base64_text voice_name (backward compat)
12
- #
13
- # Pipeline: TTS (piper|soprano|macos|windows-sapi) → sox effects → ffmpeg music mix → audio player
14
- # All steps run in foreground (required for SSH ForceCommand).
15
- #
16
- # Installation:
17
- # curl -sSL https://raw.githubusercontent.com/paulpreibisch/AgentVibes/main/scripts/install-ssh-receiver.sh | bash
18
- #
19
- # Copyright (c) 2025 Paul Preibisch
20
- # Licensed under Apache-2.0
21
- #
22
-
23
- set -euo pipefail
24
-
25
- # ---------------------------------------------------------------------------
26
- # Environment setup for SSH ForceCommand context
27
- # ---------------------------------------------------------------------------
28
-
29
- # ForceCommand passes args via SSH_ORIGINAL_COMMAND env var
30
- # SECURITY: Use read -ra instead of eval to prevent command injection
31
- if [[ -n "${SSH_ORIGINAL_COMMAND:-}" ]]; then
32
- read -ra _ssh_args <<< "$SSH_ORIGINAL_COMMAND"
33
- set -- "${_ssh_args[@]}"
34
- fi
35
-
36
- # Handle -- argument separator (skip it if present)
37
- if [[ "${1:-}" == "--" ]]; then
38
- shift
39
- fi
40
-
41
- # ---------------------------------------------------------------------------
42
- # Configuration — customize these for your installation
43
- # ---------------------------------------------------------------------------
44
-
45
- # Ensure common tool paths are available in restricted SSH context
46
- export PATH="$HOME/.local/bin:/usr/local/bin:/usr/bin:/bin:$PATH"
47
-
48
- # All paths use $HOME — the receiver user's own home directory.
49
- # During install, voices and tracks are symlinked here from the desktop user.
50
- # This avoids needing access to another user's home directory.
51
-
52
- # Where piper voice models are stored
53
- VOICES_DIR="${AGENTVIBES_VOICES_DIR:-$HOME/.claude/piper-voices}"
54
-
55
- # Where background music tracks are stored
56
- TRACKS_DIR="${AGENTVIBES_TRACKS_DIR:-$HOME/.claude/audio/tracks}"
57
-
58
- # Log file — the TUI reads from this location
59
- LOG_FILE="${AGENTVIBES_RECEIVER_LOG:-$HOME/.agentvibes/receiver.log}"
60
-
61
- # PipeWire/PulseAudio — connect to the desktop user's audio session.
62
- # Cross-user audio is tricky: Unix sockets reject different-uid callers
63
- # even with ACLs. The reliable approach is localhost TCP on a fixed port.
64
- # The setup script configures PipeWire-Pulse to listen on 127.0.0.1:34567.
65
- AGENTVIBES_PULSE_PORT="${AGENTVIBES_PULSE_PORT:-34567}"
66
-
67
- if [[ -z "${PULSE_SERVER:-}" ]]; then
68
- _own_runtime="/run/user/$(id -u)"
69
- # Detect if we're the dedicated receiver user — always use TCP to reach
70
- # the desktop user's audio session, even if we have our own pulse socket.
71
- _is_receiver_user=false
72
- [[ "$(whoami)" == "agentvibes-receiver" ]] && _is_receiver_user=true
73
-
74
- if [[ "$_is_receiver_user" == true ]]; then
75
- # Dedicated receiver user — must use TCP to desktop user's PipeWire-Pulse
76
- export PULSE_SERVER="tcp:127.0.0.1:$AGENTVIBES_PULSE_PORT"
77
- elif [[ -e "$_own_runtime/pulse/native" ]]; then
78
- # Same user — use own Unix socket (fastest)
79
- export PULSE_SERVER="unix:$_own_runtime/pulse/native"
80
- else
81
- # Different user — use localhost TCP (setup by agentvibes installer)
82
- export PULSE_SERVER="tcp:127.0.0.1:$AGENTVIBES_PULSE_PORT"
83
- fi
84
- fi
85
-
86
- # XDG_RUNTIME_DIR still needed for pipewire tools (pw-play fallback)
87
- if [[ -z "${XDG_RUNTIME_DIR:-}" ]] || [[ ! -e "$XDG_RUNTIME_DIR/pipewire-0" ]]; then
88
- for _rd in /run/user/*/; do
89
- [[ -e "${_rd}pipewire-0" ]] && { export XDG_RUNTIME_DIR="${_rd%/}"; break; }
90
- done
91
- fi
92
- export XDG_RUNTIME_DIR="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}"
93
-
94
- # Audio playback — detect available player
95
- # Prefer paplay over pw-play: pw-play from a different user causes
96
- # PipeWire flat-volume side effects that drop the master volume.
97
- AUDIO_PLAYER=""
98
- AUDIO_PLAYER_ARGS=()
99
-
100
- # Check for user-configured sink (set via TUI receiver tab [S] key)
101
- SINK_CONFIG="${AGENTVIBES_RECEIVER_SINK:-$HOME/.agentvibes/receiver-sink.txt}"
102
- _default_sink=""
103
- if [[ -f "$SINK_CONFIG" ]]; then
104
- _configured_sink=$(head -1 "$SINK_CONFIG" 2>/dev/null | tr -d '[:space:]')
105
- # Validate sink name format (alphanumeric, hyphens, underscores, dots)
106
- if [[ -n "$_configured_sink" ]] && [[ "$_configured_sink" =~ ^[a-zA-Z0-9._-]+$ ]]; then
107
- _default_sink="$_configured_sink"
108
- fi
109
- fi
110
- # Fall back to system default if no valid config
111
- if [[ -z "$_default_sink" ]]; then
112
- _default_sink=$(pactl get-default-sink 2>/dev/null || true)
113
- fi
114
-
115
- if command -v paplay &>/dev/null; then
116
- AUDIO_PLAYER="paplay"
117
- [[ -n "$_default_sink" ]] && AUDIO_PLAYER_ARGS=(--device="$_default_sink")
118
- elif command -v pw-play &>/dev/null; then
119
- AUDIO_PLAYER="pw-play"
120
- [[ -n "$_default_sink" ]] && AUDIO_PLAYER_ARGS=(--target="$_default_sink")
121
- elif command -v aplay &>/dev/null; then
122
- AUDIO_PLAYER="aplay"
123
- fi
124
-
125
- # ---------------------------------------------------------------------------
126
- # Input parsing
127
- # ---------------------------------------------------------------------------
128
-
129
- ENCODED_PAYLOAD="${1:-}"
130
-
131
- if [[ -z "$ENCODED_PAYLOAD" ]]; then
132
- echo "Error: No payload provided" >&2
133
- echo "Usage: $0 <base64-encoded-json-or-text> [voice]" >&2
134
- exit 1
135
- fi
136
-
137
- # SECURITY: Validate base64 format (reject shell metacharacters)
138
- if [[ ! "$ENCODED_PAYLOAD" =~ ^[A-Za-z0-9+/=]+$ ]]; then
139
- echo "Error: Payload must be base64-encoded" >&2
140
- exit 1
141
- fi
142
-
143
- # Decode base64
144
- DECODED=$(printf '%s' "$ENCODED_PAYLOAD" | base64 -d 2>/dev/null) || {
145
- echo "Error: Failed to decode base64 payload" >&2
146
- exit 1
147
- }
148
-
149
- # ---------------------------------------------------------------------------
150
- # Parse payload — JSON (v2) or plain text (legacy)
151
- # ---------------------------------------------------------------------------
152
-
153
- TEXT=""
154
- VOICE="en_US-lessac-medium"
155
- SOX_EFFECTS=""
156
- BG_FILE=""
157
- BG_VOLUME="0.10"
158
- PROJECT=""
159
- PRETEXT=""
160
- SPEED=""
161
- PROVIDER="piper"
162
-
163
- # Detect JSON payload (starts with '{')
164
- if [[ "$DECODED" == "{"* ]]; then
165
- # JSON v2 payload — extract fields with lightweight parsing
166
- # SECURITY: Use parameter extraction, not eval
167
- if command -v jq &>/dev/null; then
168
- TEXT=$(printf '%s' "$DECODED" | jq -r '.text // empty' 2>/dev/null) || TEXT=""
169
- VOICE=$(printf '%s' "$DECODED" | jq -r '.voice // "en_US-lessac-medium"' 2>/dev/null) || VOICE="en_US-lessac-medium"
170
- SOX_EFFECTS=$(printf '%s' "$DECODED" | jq -r '.effects // empty' 2>/dev/null) || SOX_EFFECTS=""
171
- BG_FILE=$(printf '%s' "$DECODED" | jq -r '.music // empty' 2>/dev/null) || BG_FILE=""
172
- BG_VOLUME=$(printf '%s' "$DECODED" | jq -r '.volume // "0.10"' 2>/dev/null) || BG_VOLUME="0.10"
173
- PROJECT=$(printf '%s' "$DECODED" | jq -r '.project // empty' 2>/dev/null) || PROJECT=""
174
- PRETEXT=$(printf '%s' "$DECODED" | jq -r '.pretext // empty' 2>/dev/null) || PRETEXT=""
175
- SPEED=$(printf '%s' "$DECODED" | jq -r '.speed // empty' 2>/dev/null) || SPEED=""
176
- PROVIDER=$(printf '%s' "$DECODED" | jq -r '.provider // "piper"' 2>/dev/null) || PROVIDER="piper"
177
- else
178
- # Fallback: extract with grep/sed (no jq available)
179
- TEXT=$(printf '%s' "$DECODED" | grep -o '"text"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
180
- VOICE=$(printf '%s' "$DECODED" | grep -o '"voice"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
181
- SOX_EFFECTS=$(printf '%s' "$DECODED" | grep -o '"effects"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
182
- BG_FILE=$(printf '%s' "$DECODED" | grep -o '"music"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
183
- BG_VOLUME=$(printf '%s' "$DECODED" | grep -o '"volume"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
184
- PROJECT=$(printf '%s' "$DECODED" | grep -o '"project"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
185
- PRETEXT=$(printf '%s' "$DECODED" | grep -o '"pretext"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
186
- SPEED=$(printf '%s' "$DECODED" | grep -o '"speed"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
187
- PROVIDER=$(printf '%s' "$DECODED" | grep -o '"provider"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
188
- [[ -z "$VOICE" ]] && VOICE="en_US-lessac-medium"
189
- [[ -z "$BG_VOLUME" ]] && BG_VOLUME="0.10"
190
- [[ -z "$PROVIDER" ]] && PROVIDER="piper"
191
- fi
192
- else
193
- # Legacy format: plain text, voice from positional arg
194
- TEXT="$DECODED"
195
- VOICE="${2:-en_US-lessac-medium}"
196
- fi
197
-
198
- # Validate required text
199
- if [[ -z "$TEXT" ]]; then
200
- echo "Error: No text in payload" >&2
201
- exit 1
202
- fi
203
-
204
- # SECURITY: Validate voice format (alphanumeric, hyphens, underscores only)
205
- if [[ ! "$VOICE" =~ ^[a-zA-Z0-9_-]+$ ]]; then
206
- echo "Error: Invalid voice format" >&2
207
- exit 1
208
- fi
209
-
210
- # SECURITY: Validate volume is a number
211
- if [[ -n "$BG_VOLUME" ]] && [[ ! "$BG_VOLUME" =~ ^[0-9]+\.?[0-9]*$ ]]; then
212
- BG_VOLUME="0.10"
213
- fi
214
-
215
- # SECURITY: Validate speed is a number (prevents awk injection)
216
- if [[ -n "$SPEED" ]] && [[ ! "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
217
- SPEED=""
218
- fi
219
-
220
- # SECURITY: Validate provider format (known providers only)
221
- case "$PROVIDER" in
222
- piper|soprano|macos|windows-sapi) ;;
223
- *) PROVIDER="piper" ;;
224
- esac
225
-
226
- # Prepend pretext if provided
227
- if [[ -n "$PRETEXT" ]]; then
228
- TEXT="${PRETEXT}. ${TEXT}"
229
- fi
230
-
231
- # ---------------------------------------------------------------------------
232
- # Structured logging (for receiver tab to display)
233
- # ---------------------------------------------------------------------------
234
-
235
- LOG_ID=$(printf '%04x' $((RANDOM % 65536)))
236
-
237
- log_message() {
238
- local status="$1"
239
- local detail="${2:-}"
240
- local timestamp
241
- timestamp=$(date '+%Y-%m-%dT%H:%M:%S')
242
- local log_dir
243
- log_dir=$(dirname "$LOG_FILE")
244
- mkdir -p "$log_dir" 2>/dev/null || true
245
- # Extract sender IP from SSH_CLIENT (set by sshd: "IP PORT PORT")
246
- local sender_ip="${SSH_CLIENT%% *}"
247
- [[ -z "$sender_ip" ]] && sender_ip="local"
248
- # Format: TIMESTAMP|STATUS|PROJECT|VOICE|TEXT_PREVIEW|DETAIL|IP|LOG_ID
249
- local preview="${TEXT:0:200}"
250
- printf '%s|%s|%s|%s|%s|%s|%s|%s\n' \
251
- "$timestamp" "$status" "${PROJECT:-unknown}" "$VOICE" "$preview" "$detail" "$sender_ip" "$LOG_ID" \
252
- >> "$LOG_FILE" 2>/dev/null || true
253
- }
254
-
255
- log_message "RECEIVED" "provider=${PROVIDER} effects=${SOX_EFFECTS:-none} music=${BG_FILE:-none}"
256
-
257
- # ---------------------------------------------------------------------------
258
- # Temp files with cleanup
259
- # ---------------------------------------------------------------------------
260
-
261
- # Use own runtime dir for temp files (not the desktop user's)
262
- _TEMP_BASE="/run/user/$(id -u)"
263
- [[ -d "$_TEMP_BASE" ]] && [[ -w "$_TEMP_BASE" ]] || _TEMP_BASE="/tmp"
264
- RAW_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-XXXXXX.wav")
265
- EFFECTS_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-fx-XXXXXX.wav")
266
- FINAL_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-final-XXXXXX.wav")
267
- trap 'rm -f "$RAW_WAV" "$EFFECTS_WAV" "$FINAL_WAV"' EXIT
268
-
269
- # ---------------------------------------------------------------------------
270
- # Step 1: Generate TTS audio (multi-provider dispatch)
271
- # ---------------------------------------------------------------------------
272
-
273
- _generate_tts_piper() {
274
- local model="$VOICES_DIR/${VOICE}.onnx"
275
- if [[ ! -f "$model" ]]; then
276
- # Fallback: try any available voice rather than failing
277
- local fallback
278
- fallback=$(find "$VOICES_DIR" -maxdepth 1 -name '*.onnx' -type f 2>/dev/null | head -1)
279
- if [[ -n "$fallback" ]]; then
280
- local fallback_name
281
- fallback_name=$(basename "$fallback" .onnx)
282
- log_message "WARN" "Voice $VOICE not found, falling back to $fallback_name"
283
- echo "Warning: Voice $VOICE not found, using $fallback_name" >&2
284
- VOICE="$fallback_name"
285
- model="$fallback"
286
- else
287
- log_message "ERROR" "No voice models found in $VOICES_DIR"
288
- echo "Error: No voice models found in $VOICES_DIR" >&2
289
- return 1
290
- fi
291
- fi
292
-
293
- local args=(--model "$model" --output_file "$RAW_WAV")
294
- if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
295
- args+=(--length_scale "$SPEED")
296
- fi
297
-
298
- echo "$TEXT" | piper "${args[@]}" 2>/dev/null || {
299
- log_message "ERROR" "Piper TTS failed"
300
- echo "Error: Piper TTS generation failed" >&2
301
- return 1
302
- }
303
- }
304
-
305
- _generate_tts_soprano() {
306
- local soprano_port="${SOPRANO_PORT:-7860}"
307
-
308
- # Try API mode first (OpenAI-compatible endpoint)
309
- if curl -sf -X POST "http://127.0.0.1:${soprano_port}/v1/audio/speech" \
310
- -H "Content-Type: application/json" \
311
- -d "{\"input\":$(printf '%s' "$TEXT" | jq -Rs .)}" \
312
- --output "$RAW_WAV" 2>/dev/null; then
313
- return 0
314
- fi
315
-
316
- # Try CLI mode — options before --, text as final positional arg
317
- if command -v soprano &>/dev/null; then
318
- soprano -o "$RAW_WAV" -- "$TEXT" 2>/dev/null && return 0
319
- fi
320
-
321
- log_message "ERROR" "Soprano TTS failed — is soprano running on port ${soprano_port}?"
322
- echo "Error: Soprano TTS unavailable (tried API and CLI)" >&2
323
- return 1
324
- }
325
-
326
- _generate_tts_macos() {
327
- if ! command -v say &>/dev/null; then
328
- log_message "ERROR" "macOS say command not found"
329
- echo "Error: macOS say command not available" >&2
330
- return 1
331
- fi
332
-
333
- local say_args=(-v "$VOICE")
334
- # Convert speed multiplier to WPM (say uses WPM, default ~200)
335
- if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
336
- local wpm
337
- wpm=$(awk "BEGIN {printf \"%d\", 200 * $SPEED}")
338
- say_args+=(-r "$wpm")
339
- fi
340
-
341
- # say outputs AIFF — convert to WAV for consistent pipeline
342
- local aiff_tmp="${RAW_WAV%.wav}.aiff"
343
- echo "$TEXT" | say "${say_args[@]}" -o "$aiff_tmp" 2>/dev/null || {
344
- log_message "ERROR" "macOS say failed"
345
- rm -f "$aiff_tmp"
346
- return 1
347
- }
348
-
349
- if command -v ffmpeg &>/dev/null; then
350
- ffmpeg -y -i "$aiff_tmp" "$RAW_WAV" </dev/null 2>/dev/null
351
- rm -f "$aiff_tmp"
352
- else
353
- # No ffmpeg — rename and hope player handles AIFF
354
- mv "$aiff_tmp" "$RAW_WAV"
355
- fi
356
- }
357
-
358
- _generate_tts_windows_sapi() {
359
- # Windows SAPI via PowerShell (works in WSL2 via powershell.exe)
360
- local ps_cmd=""
361
- if command -v powershell.exe &>/dev/null; then
362
- ps_cmd="powershell.exe"
363
- elif command -v pwsh &>/dev/null; then
364
- ps_cmd="pwsh"
365
- else
366
- log_message "ERROR" "PowerShell not found for Windows SAPI"
367
- echo "Error: PowerShell required for Windows SAPI" >&2
368
- return 1
369
- fi
370
-
371
- # SECURITY: Escape text for PowerShell single-quoted string
372
- local escaped_text
373
- escaped_text=$(printf '%s' "$TEXT" | sed "s/'/''/g")
374
-
375
- local rate=0
376
- if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
377
- # SAPI rate: -10 to 10, 0 is normal. Speed 1.0=0, 2.0=5, 0.5=-5
378
- rate=$(awk "BEGIN {r = ($SPEED - 1.0) * 10; if (r > 10) r = 10; if (r < -10) r = -10; printf \"%d\", r}")
379
- fi
380
-
381
- $ps_cmd -NoProfile -Command "
382
- Add-Type -AssemblyName System.Speech
383
- \$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer
384
- \$synth.Rate = $rate
385
- \$synth.SetOutputToWaveFile('$(wslpath -w "$RAW_WAV" 2>/dev/null || echo "$RAW_WAV")')
386
- \$synth.Speak('$escaped_text')
387
- \$synth.Dispose()
388
- " 2>/dev/null || {
389
- log_message "ERROR" "Windows SAPI TTS failed"
390
- echo "Error: Windows SAPI generation failed" >&2
391
- return 1
392
- }
393
- }
394
-
395
- # Dispatch to the appropriate TTS provider
396
- case "$PROVIDER" in
397
- piper)
398
- _generate_tts_piper || exit 1
399
- ;;
400
- soprano)
401
- _generate_tts_soprano || exit 1
402
- ;;
403
- macos)
404
- _generate_tts_macos || exit 1
405
- ;;
406
- windows-sapi)
407
- _generate_tts_windows_sapi || exit 1
408
- ;;
409
- *)
410
- log_message "ERROR" "Unknown provider: $PROVIDER"
411
- echo "Error: Unknown TTS provider: $PROVIDER" >&2
412
- exit 1
413
- ;;
414
- esac
415
-
416
- PLAY_FILE="$RAW_WAV"
417
-
418
- # ---------------------------------------------------------------------------
419
- # Step 2: Apply sox effects (reverb, EQ, etc.)
420
- # ---------------------------------------------------------------------------
421
-
422
- if [[ -n "$SOX_EFFECTS" ]] && command -v sox &>/dev/null; then
423
- # SECURITY: Validate effects contain only safe characters (alphanumeric, spaces, dots, hyphens, underscores)
424
- if [[ "$SOX_EFFECTS" =~ ^[a-zA-Z0-9\ ._-]+$ ]]; then
425
- sox "$RAW_WAV" "$EFFECTS_WAV" $SOX_EFFECTS 2>/dev/null && PLAY_FILE="$EFFECTS_WAV"
426
- else
427
- log_message "WARN" "Rejected unsafe sox effects: ${SOX_EFFECTS:0:50}"
428
- fi
429
- fi
430
-
431
- # ---------------------------------------------------------------------------
432
- # Step 3: Mix background music (if configured)
433
- # ---------------------------------------------------------------------------
434
-
435
- if [[ -n "$BG_FILE" ]] && command -v ffmpeg &>/dev/null; then
436
- BG_PATH="$TRACKS_DIR/$BG_FILE"
437
- if [[ -f "$BG_PATH" ]]; then
438
- DURATION=$(ffprobe -v error -show_entries format=duration \
439
- -of default=noprint_wrappers=1:nokey=1 "$PLAY_FILE" 2>/dev/null || echo "")
440
- if [[ -n "$DURATION" ]]; then
441
- TOTAL_DUR=$(awk "BEGIN {printf \"%.2f\", $DURATION + 2}")
442
- FADE_OUT=$(awk "BEGIN {printf \"%.2f\", $DURATION}")
443
- timeout 20 ffmpeg -y -i "$PLAY_FILE" -stream_loop -1 -i "$BG_PATH" \
444
- -filter_complex "[1:a]volume=${BG_VOLUME},afade=t=in:st=0:d=0.3,afade=t=out:st=${FADE_OUT}:d=2[bg];[0:a]adelay=2000|2000[v];[v][bg]amix=inputs=2:duration=longest[out]" \
445
- -map "[out]" -t "$TOTAL_DUR" "$FINAL_WAV" </dev/null 2>/dev/null && PLAY_FILE="$FINAL_WAV"
446
- fi
447
- fi
448
- fi
449
-
450
- # ---------------------------------------------------------------------------
451
- # Step 4: Play audio in foreground (required for SSH — no backgrounding)
452
- # ---------------------------------------------------------------------------
453
-
454
- if [[ -z "$AUDIO_PLAYER" ]]; then
455
- log_message "ERROR" "No audio player found (pw-play, paplay, aplay)"
456
- echo "Error: No audio player available" >&2
457
- exit 1
458
- fi
459
-
460
- # Save master volume before playback — flat-volumes in PipeWire/PulseAudio
461
- # can change master volume when a new stream connects from another user.
462
- _saved_vol=""
463
- if command -v pactl &>/dev/null; then
464
- _saved_vol=$(pactl get-sink-volume @DEFAULT_SINK@ 2>/dev/null | grep -o '[0-9]*%' | head -1)
465
- fi
466
-
467
- log_message "PLAYING" "player=$AUDIO_PLAYER sink=${_default_sink:-unknown} vol=${_saved_vol:-?} pulse=${PULSE_SERVER:-unset}"
468
-
469
- _play_err=$($AUDIO_PLAYER "${AUDIO_PLAYER_ARGS[@]}" "$PLAY_FILE" 2>&1) || {
470
- log_message "ERROR" "Playback failed with $AUDIO_PLAYER: $_play_err"
471
- echo "Error: Audio playback failed" >&2
472
- echo "Detail: $_play_err" >&2
473
- exit 1
474
- }
475
-
476
- # Restore master volume to what it was before playback
477
- if [[ -n "$_saved_vol" ]] && command -v pactl &>/dev/null; then
478
- pactl set-sink-volume @DEFAULT_SINK@ "$_saved_vol" 2>/dev/null || true
479
- fi
480
-
481
- log_message "DONE" ""
482
- exit 0
1
+ #!/usr/bin/env bash
2
+ #
3
+ # File: agentvibes-receiver.sh
4
+ # Location: User installs to ~/.agentvibes/play-remote.sh
5
+ #
6
+ # AgentVibes SSH-TTS Receiver (v2 — self-contained pipeline)
7
+ # Receives TTS requests via SSH, generates and plays audio locally.
8
+ #
9
+ # Supports two payload formats:
10
+ # 1. JSON payload (v2): single base64-encoded JSON with all config
11
+ # 2. Legacy positional args: base64_text voice_name (backward compat)
12
+ #
13
+ # Pipeline: TTS (piper|soprano|macos|windows-sapi) → sox effects → ffmpeg music mix → audio player
14
+ # All steps run in foreground (required for SSH ForceCommand).
15
+ #
16
+ # Installation:
17
+ # curl -sSL https://raw.githubusercontent.com/paulpreibisch/AgentVibes/main/scripts/install-ssh-receiver.sh | bash
18
+ #
19
+ # Copyright (c) 2025 Paul Preibisch
20
+ # Licensed under Apache-2.0
21
+ #
22
+
23
+ set -euo pipefail
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Environment setup for SSH ForceCommand context
27
+ # ---------------------------------------------------------------------------
28
+
29
+ # ForceCommand passes args via SSH_ORIGINAL_COMMAND env var
30
+ # SECURITY: Use read -ra instead of eval to prevent command injection
31
+ if [[ -n "${SSH_ORIGINAL_COMMAND:-}" ]]; then
32
+ read -ra _ssh_args <<< "$SSH_ORIGINAL_COMMAND"
33
+ set -- "${_ssh_args[@]}"
34
+ fi
35
+
36
+ # Handle -- argument separator (skip it if present)
37
+ if [[ "${1:-}" == "--" ]]; then
38
+ shift
39
+ fi
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # Configuration — customize these for your installation
43
+ # ---------------------------------------------------------------------------
44
+
45
+ # Ensure common tool paths are available in restricted SSH context
46
+ export PATH="$HOME/.local/bin:/usr/local/bin:/usr/bin:/bin:$PATH"
47
+
48
+ # All paths use $HOME — the receiver user's own home directory.
49
+ # During install, voices and tracks are symlinked here from the desktop user.
50
+ # This avoids needing access to another user's home directory.
51
+
52
+ # Where piper voice models are stored
53
+ VOICES_DIR="${AGENTVIBES_VOICES_DIR:-$HOME/.claude/piper-voices}"
54
+
55
+ # Where background music tracks are stored
56
+ TRACKS_DIR="${AGENTVIBES_TRACKS_DIR:-$HOME/.claude/audio/tracks}"
57
+
58
+ # Log file — the TUI reads from this location
59
+ LOG_FILE="${AGENTVIBES_RECEIVER_LOG:-$HOME/.agentvibes/receiver.log}"
60
+
61
+ # PipeWire/PulseAudio — connect to the desktop user's audio session.
62
+ # Cross-user audio is tricky: Unix sockets reject different-uid callers
63
+ # even with ACLs. The reliable approach is localhost TCP on a fixed port.
64
+ # The setup script configures PipeWire-Pulse to listen on 127.0.0.1:34567.
65
+ AGENTVIBES_PULSE_PORT="${AGENTVIBES_PULSE_PORT:-34567}"
66
+
67
+ if [[ -z "${PULSE_SERVER:-}" ]]; then
68
+ _own_runtime="/run/user/$(id -u)"
69
+ # Detect if we're the dedicated receiver user — always use TCP to reach
70
+ # the desktop user's audio session, even if we have our own pulse socket.
71
+ _is_receiver_user=false
72
+ [[ "$(whoami)" == "agentvibes-receiver" ]] && _is_receiver_user=true
73
+
74
+ if [[ "$_is_receiver_user" == true ]]; then
75
+ # Dedicated receiver user — must use TCP to desktop user's PipeWire-Pulse
76
+ export PULSE_SERVER="tcp:127.0.0.1:$AGENTVIBES_PULSE_PORT"
77
+ elif [[ -e "$_own_runtime/pulse/native" ]]; then
78
+ # Same user — use own Unix socket (fastest)
79
+ export PULSE_SERVER="unix:$_own_runtime/pulse/native"
80
+ else
81
+ # Different user — use localhost TCP (setup by agentvibes installer)
82
+ export PULSE_SERVER="tcp:127.0.0.1:$AGENTVIBES_PULSE_PORT"
83
+ fi
84
+ fi
85
+
86
+ # XDG_RUNTIME_DIR still needed for pipewire tools (pw-play fallback)
87
+ if [[ -z "${XDG_RUNTIME_DIR:-}" ]] || [[ ! -e "$XDG_RUNTIME_DIR/pipewire-0" ]]; then
88
+ for _rd in /run/user/*/; do
89
+ [[ -e "${_rd}pipewire-0" ]] && { export XDG_RUNTIME_DIR="${_rd%/}"; break; }
90
+ done
91
+ fi
92
+ export XDG_RUNTIME_DIR="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}"
93
+
94
+ # Audio playback — detect available player
95
+ # Prefer paplay over pw-play: pw-play from a different user causes
96
+ # PipeWire flat-volume side effects that drop the master volume.
97
+ AUDIO_PLAYER=""
98
+ AUDIO_PLAYER_ARGS=()
99
+
100
+ # Check for user-configured sink (set via TUI receiver tab [S] key)
101
+ SINK_CONFIG="${AGENTVIBES_RECEIVER_SINK:-$HOME/.agentvibes/receiver-sink.txt}"
102
+ _default_sink=""
103
+ if [[ -f "$SINK_CONFIG" ]]; then
104
+ _configured_sink=$(head -1 "$SINK_CONFIG" 2>/dev/null | tr -d '[:space:]')
105
+ # Validate sink name format (alphanumeric, hyphens, underscores, dots)
106
+ if [[ -n "$_configured_sink" ]] && [[ "$_configured_sink" =~ ^[a-zA-Z0-9._-]+$ ]]; then
107
+ _default_sink="$_configured_sink"
108
+ fi
109
+ fi
110
+ # Fall back to system default if no valid config
111
+ if [[ -z "$_default_sink" ]]; then
112
+ _default_sink=$(pactl get-default-sink 2>/dev/null || true)
113
+ fi
114
+
115
+ if command -v paplay &>/dev/null; then
116
+ AUDIO_PLAYER="paplay"
117
+ [[ -n "$_default_sink" ]] && AUDIO_PLAYER_ARGS=(--device="$_default_sink")
118
+ elif command -v pw-play &>/dev/null; then
119
+ AUDIO_PLAYER="pw-play"
120
+ [[ -n "$_default_sink" ]] && AUDIO_PLAYER_ARGS=(--target="$_default_sink")
121
+ elif command -v aplay &>/dev/null; then
122
+ AUDIO_PLAYER="aplay"
123
+ fi
124
+
125
+ # ---------------------------------------------------------------------------
126
+ # Input parsing
127
+ # ---------------------------------------------------------------------------
128
+
129
+ ENCODED_PAYLOAD="${1:-}"
130
+
131
+ if [[ -z "$ENCODED_PAYLOAD" ]]; then
132
+ echo "Error: No payload provided" >&2
133
+ echo "Usage: $0 <base64-encoded-json-or-text> [voice]" >&2
134
+ exit 1
135
+ fi
136
+
137
+ # SECURITY: Validate base64 format (reject shell metacharacters)
138
+ if [[ ! "$ENCODED_PAYLOAD" =~ ^[A-Za-z0-9+/=]+$ ]]; then
139
+ echo "Error: Payload must be base64-encoded" >&2
140
+ exit 1
141
+ fi
142
+
143
+ # Decode base64
144
+ DECODED=$(printf '%s' "$ENCODED_PAYLOAD" | base64 -d 2>/dev/null) || {
145
+ echo "Error: Failed to decode base64 payload" >&2
146
+ exit 1
147
+ }
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # Parse payload — JSON (v2) or plain text (legacy)
151
+ # ---------------------------------------------------------------------------
152
+
153
+ TEXT=""
154
+ VOICE="en_US-lessac-medium"
155
+ SOX_EFFECTS=""
156
+ BG_FILE=""
157
+ BG_VOLUME="0.10"
158
+ PROJECT=""
159
+ PRETEXT=""
160
+ SPEED=""
161
+ PROVIDER="piper"
162
+
163
+ # Detect JSON payload (starts with '{')
164
+ if [[ "$DECODED" == "{"* ]]; then
165
+ # JSON v2 payload — extract fields with lightweight parsing
166
+ # SECURITY: Use parameter extraction, not eval
167
+ if command -v jq &>/dev/null; then
168
+ TEXT=$(printf '%s' "$DECODED" | jq -r '.text // empty' 2>/dev/null) || TEXT=""
169
+ VOICE=$(printf '%s' "$DECODED" | jq -r '.voice // "en_US-lessac-medium"' 2>/dev/null) || VOICE="en_US-lessac-medium"
170
+ SOX_EFFECTS=$(printf '%s' "$DECODED" | jq -r '.effects // empty' 2>/dev/null) || SOX_EFFECTS=""
171
+ BG_FILE=$(printf '%s' "$DECODED" | jq -r '.music // empty' 2>/dev/null) || BG_FILE=""
172
+ BG_VOLUME=$(printf '%s' "$DECODED" | jq -r '.volume // "0.10"' 2>/dev/null) || BG_VOLUME="0.10"
173
+ PROJECT=$(printf '%s' "$DECODED" | jq -r '.project // empty' 2>/dev/null) || PROJECT=""
174
+ PRETEXT=$(printf '%s' "$DECODED" | jq -r '.pretext // empty' 2>/dev/null) || PRETEXT=""
175
+ SPEED=$(printf '%s' "$DECODED" | jq -r '.speed // empty' 2>/dev/null) || SPEED=""
176
+ PROVIDER=$(printf '%s' "$DECODED" | jq -r '.provider // "piper"' 2>/dev/null) || PROVIDER="piper"
177
+ else
178
+ # Fallback: extract with grep/sed (no jq available)
179
+ TEXT=$(printf '%s' "$DECODED" | grep -o '"text"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
180
+ VOICE=$(printf '%s' "$DECODED" | grep -o '"voice"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
181
+ SOX_EFFECTS=$(printf '%s' "$DECODED" | grep -o '"effects"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
182
+ BG_FILE=$(printf '%s' "$DECODED" | grep -o '"music"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
183
+ BG_VOLUME=$(printf '%s' "$DECODED" | grep -o '"volume"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
184
+ PROJECT=$(printf '%s' "$DECODED" | grep -o '"project"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
185
+ PRETEXT=$(printf '%s' "$DECODED" | grep -o '"pretext"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
186
+ SPEED=$(printf '%s' "$DECODED" | grep -o '"speed"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
187
+ PROVIDER=$(printf '%s' "$DECODED" | grep -o '"provider"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
188
+ [[ -z "$VOICE" ]] && VOICE="en_US-lessac-medium"
189
+ [[ -z "$BG_VOLUME" ]] && BG_VOLUME="0.10"
190
+ [[ -z "$PROVIDER" ]] && PROVIDER="piper"
191
+ fi
192
+ else
193
+ # Legacy format: plain text, voice from positional arg
194
+ TEXT="$DECODED"
195
+ VOICE="${2:-en_US-lessac-medium}"
196
+ fi
197
+
198
+ # Validate required text
199
+ if [[ -z "$TEXT" ]]; then
200
+ echo "Error: No text in payload" >&2
201
+ exit 1
202
+ fi
203
+
204
+ # SECURITY: Validate voice format (alphanumeric, hyphens, underscores only)
205
+ if [[ ! "$VOICE" =~ ^[a-zA-Z0-9_-]+$ ]]; then
206
+ echo "Error: Invalid voice format" >&2
207
+ exit 1
208
+ fi
209
+
210
+ # SECURITY: Validate volume is a number
211
+ if [[ -n "$BG_VOLUME" ]] && [[ ! "$BG_VOLUME" =~ ^[0-9]+\.?[0-9]*$ ]]; then
212
+ BG_VOLUME="0.10"
213
+ fi
214
+
215
+ # SECURITY: Validate speed is a number (prevents awk injection)
216
+ if [[ -n "$SPEED" ]] && [[ ! "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
217
+ SPEED=""
218
+ fi
219
+
220
+ # SECURITY: Validate provider format (known providers only)
221
+ case "$PROVIDER" in
222
+ piper|soprano|macos|windows-sapi) ;;
223
+ *) PROVIDER="piper" ;;
224
+ esac
225
+
226
+ # Prepend pretext if provided
227
+ if [[ -n "$PRETEXT" ]]; then
228
+ TEXT="${PRETEXT}. ${TEXT}"
229
+ fi
230
+
231
+ # ---------------------------------------------------------------------------
232
+ # Structured logging (for receiver tab to display)
233
+ # ---------------------------------------------------------------------------
234
+
235
+ LOG_ID=$(printf '%04x' $((RANDOM % 65536)))
236
+
237
+ log_message() {
238
+ local status="$1"
239
+ local detail="${2:-}"
240
+ local timestamp
241
+ timestamp=$(date '+%Y-%m-%dT%H:%M:%S')
242
+ local log_dir
243
+ log_dir=$(dirname "$LOG_FILE")
244
+ mkdir -p "$log_dir" 2>/dev/null || true
245
+ # Extract sender IP from SSH_CLIENT (set by sshd: "IP PORT PORT")
246
+ local sender_ip="${SSH_CLIENT%% *}"
247
+ [[ -z "$sender_ip" ]] && sender_ip="local"
248
+ # Format: TIMESTAMP|STATUS|PROJECT|VOICE|TEXT_PREVIEW|DETAIL|IP|LOG_ID
249
+ local preview="${TEXT:0:200}"
250
+ printf '%s|%s|%s|%s|%s|%s|%s|%s\n' \
251
+ "$timestamp" "$status" "${PROJECT:-unknown}" "$VOICE" "$preview" "$detail" "$sender_ip" "$LOG_ID" \
252
+ >> "$LOG_FILE" 2>/dev/null || true
253
+ }
254
+
255
+ log_message "RECEIVED" "provider=${PROVIDER} effects=${SOX_EFFECTS:-none} music=${BG_FILE:-none}"
256
+
257
+ # ---------------------------------------------------------------------------
258
+ # Temp files with cleanup
259
+ # ---------------------------------------------------------------------------
260
+
261
+ # Use own runtime dir for temp files (not the desktop user's)
262
+ _TEMP_BASE="/run/user/$(id -u)"
263
+ [[ -d "$_TEMP_BASE" ]] && [[ -w "$_TEMP_BASE" ]] || _TEMP_BASE="/tmp"
264
+ RAW_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-XXXXXX.wav")
265
+ EFFECTS_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-fx-XXXXXX.wav")
266
+ FINAL_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-final-XXXXXX.wav")
267
+ trap 'rm -f "$RAW_WAV" "$EFFECTS_WAV" "$FINAL_WAV"' EXIT
268
+
269
+ # ---------------------------------------------------------------------------
270
+ # Step 1: Generate TTS audio (multi-provider dispatch)
271
+ # ---------------------------------------------------------------------------
272
+
273
+ _generate_tts_piper() {
274
+ local model="$VOICES_DIR/${VOICE}.onnx"
275
+ if [[ ! -f "$model" ]]; then
276
+ # Fallback: try any available voice rather than failing
277
+ local fallback
278
+ fallback=$(find "$VOICES_DIR" -maxdepth 1 -name '*.onnx' -type f 2>/dev/null | head -1)
279
+ if [[ -n "$fallback" ]]; then
280
+ local fallback_name
281
+ fallback_name=$(basename "$fallback" .onnx)
282
+ log_message "WARN" "Voice $VOICE not found, falling back to $fallback_name"
283
+ echo "Warning: Voice $VOICE not found, using $fallback_name" >&2
284
+ VOICE="$fallback_name"
285
+ model="$fallback"
286
+ else
287
+ log_message "ERROR" "No voice models found in $VOICES_DIR"
288
+ echo "Error: No voice models found in $VOICES_DIR" >&2
289
+ return 1
290
+ fi
291
+ fi
292
+
293
+ local args=(--model "$model" --output_file "$RAW_WAV")
294
+ if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
295
+ args+=(--length_scale "$SPEED")
296
+ fi
297
+
298
+ echo "$TEXT" | piper "${args[@]}" 2>/dev/null || {
299
+ log_message "ERROR" "Piper TTS failed"
300
+ echo "Error: Piper TTS generation failed" >&2
301
+ return 1
302
+ }
303
+ }
304
+
305
+ _generate_tts_soprano() {
306
+ local soprano_port="${SOPRANO_PORT:-7860}"
307
+
308
+ # Try API mode first (OpenAI-compatible endpoint)
309
+ if curl -sf -X POST "http://127.0.0.1:${soprano_port}/v1/audio/speech" \
310
+ -H "Content-Type: application/json" \
311
+ -d "{\"input\":$(printf '%s' "$TEXT" | jq -Rs .)}" \
312
+ --output "$RAW_WAV" 2>/dev/null; then
313
+ return 0
314
+ fi
315
+
316
+ # Try CLI mode — options before --, text as final positional arg
317
+ if command -v soprano &>/dev/null; then
318
+ soprano -o "$RAW_WAV" -- "$TEXT" 2>/dev/null && return 0
319
+ fi
320
+
321
+ log_message "ERROR" "Soprano TTS failed — is soprano running on port ${soprano_port}?"
322
+ echo "Error: Soprano TTS unavailable (tried API and CLI)" >&2
323
+ return 1
324
+ }
325
+
326
+ _generate_tts_macos() {
327
+ if ! command -v say &>/dev/null; then
328
+ log_message "ERROR" "macOS say command not found"
329
+ echo "Error: macOS say command not available" >&2
330
+ return 1
331
+ fi
332
+
333
+ local say_args=(-v "$VOICE")
334
+ # Convert speed multiplier to WPM (say uses WPM, default ~200)
335
+ if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
336
+ local wpm
337
+ wpm=$(awk "BEGIN {printf \"%d\", 200 * $SPEED}")
338
+ say_args+=(-r "$wpm")
339
+ fi
340
+
341
+ # say outputs AIFF — convert to WAV for consistent pipeline
342
+ local aiff_tmp="${RAW_WAV%.wav}.aiff"
343
+ echo "$TEXT" | say "${say_args[@]}" -o "$aiff_tmp" 2>/dev/null || {
344
+ log_message "ERROR" "macOS say failed"
345
+ rm -f "$aiff_tmp"
346
+ return 1
347
+ }
348
+
349
+ if command -v ffmpeg &>/dev/null; then
350
+ ffmpeg -y -i "$aiff_tmp" "$RAW_WAV" </dev/null 2>/dev/null
351
+ rm -f "$aiff_tmp"
352
+ else
353
+ # No ffmpeg — rename and hope player handles AIFF
354
+ mv "$aiff_tmp" "$RAW_WAV"
355
+ fi
356
+ }
357
+
358
+ _generate_tts_windows_sapi() {
359
+ # Windows SAPI via PowerShell (works in WSL2 via powershell.exe)
360
+ local ps_cmd=""
361
+ if command -v powershell.exe &>/dev/null; then
362
+ ps_cmd="powershell.exe"
363
+ elif command -v pwsh &>/dev/null; then
364
+ ps_cmd="pwsh"
365
+ else
366
+ log_message "ERROR" "PowerShell not found for Windows SAPI"
367
+ echo "Error: PowerShell required for Windows SAPI" >&2
368
+ return 1
369
+ fi
370
+
371
+ # SECURITY: Escape text for PowerShell single-quoted string
372
+ local escaped_text
373
+ escaped_text=$(printf '%s' "$TEXT" | sed "s/'/''/g")
374
+
375
+ local rate=0
376
+ if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
377
+ # SAPI rate: -10 to 10, 0 is normal. Speed 1.0=0, 2.0=5, 0.5=-5
378
+ rate=$(awk "BEGIN {r = ($SPEED - 1.0) * 10; if (r > 10) r = 10; if (r < -10) r = -10; printf \"%d\", r}")
379
+ fi
380
+
381
+ $ps_cmd -NoProfile -Command "
382
+ Add-Type -AssemblyName System.Speech
383
+ \$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer
384
+ \$synth.Rate = $rate
385
+ \$synth.SetOutputToWaveFile('$(wslpath -w "$RAW_WAV" 2>/dev/null || echo "$RAW_WAV")')
386
+ \$synth.Speak('$escaped_text')
387
+ \$synth.Dispose()
388
+ " 2>/dev/null || {
389
+ log_message "ERROR" "Windows SAPI TTS failed"
390
+ echo "Error: Windows SAPI generation failed" >&2
391
+ return 1
392
+ }
393
+ }
394
+
395
+ # Dispatch to the appropriate TTS provider
396
+ case "$PROVIDER" in
397
+ piper)
398
+ _generate_tts_piper || exit 1
399
+ ;;
400
+ soprano)
401
+ _generate_tts_soprano || exit 1
402
+ ;;
403
+ macos)
404
+ _generate_tts_macos || exit 1
405
+ ;;
406
+ windows-sapi)
407
+ _generate_tts_windows_sapi || exit 1
408
+ ;;
409
+ *)
410
+ log_message "ERROR" "Unknown provider: $PROVIDER"
411
+ echo "Error: Unknown TTS provider: $PROVIDER" >&2
412
+ exit 1
413
+ ;;
414
+ esac
415
+
416
+ PLAY_FILE="$RAW_WAV"
417
+
418
+ # ---------------------------------------------------------------------------
419
+ # Step 2: Apply sox effects (reverb, EQ, etc.)
420
+ # ---------------------------------------------------------------------------
421
+
422
+ if [[ -n "$SOX_EFFECTS" ]] && command -v sox &>/dev/null; then
423
+ # SECURITY: Validate effects contain only safe characters (alphanumeric, spaces, dots, hyphens, underscores)
424
+ if [[ "$SOX_EFFECTS" =~ ^[a-zA-Z0-9\ ._-]+$ ]]; then
425
+ sox "$RAW_WAV" "$EFFECTS_WAV" $SOX_EFFECTS 2>/dev/null && PLAY_FILE="$EFFECTS_WAV"
426
+ else
427
+ log_message "WARN" "Rejected unsafe sox effects: ${SOX_EFFECTS:0:50}"
428
+ fi
429
+ fi
430
+
431
+ # ---------------------------------------------------------------------------
432
+ # Step 3: Mix background music (if configured)
433
+ # ---------------------------------------------------------------------------
434
+
435
+ if [[ -n "$BG_FILE" ]] && command -v ffmpeg &>/dev/null; then
436
+ BG_PATH="$TRACKS_DIR/$BG_FILE"
437
+ if [[ -f "$BG_PATH" ]]; then
438
+ DURATION=$(ffprobe -v error -show_entries format=duration \
439
+ -of default=noprint_wrappers=1:nokey=1 "$PLAY_FILE" 2>/dev/null || echo "")
440
+ if [[ -n "$DURATION" ]]; then
441
+ TOTAL_DUR=$(awk "BEGIN {printf \"%.2f\", $DURATION + 2}")
442
+ FADE_OUT=$(awk "BEGIN {printf \"%.2f\", $DURATION}")
443
+ timeout 20 ffmpeg -y -i "$PLAY_FILE" -stream_loop -1 -i "$BG_PATH" \
444
+ -filter_complex "[1:a]volume=${BG_VOLUME},afade=t=in:st=0:d=0.3,afade=t=out:st=${FADE_OUT}:d=2[bg];[0:a]adelay=2000|2000[v];[v][bg]amix=inputs=2:duration=longest[out]" \
445
+ -map "[out]" -t "$TOTAL_DUR" "$FINAL_WAV" </dev/null 2>/dev/null && PLAY_FILE="$FINAL_WAV"
446
+ fi
447
+ fi
448
+ fi
449
+
450
+ # ---------------------------------------------------------------------------
451
+ # Step 4: Play audio in foreground (required for SSH — no backgrounding)
452
+ # ---------------------------------------------------------------------------
453
+
454
+ if [[ -z "$AUDIO_PLAYER" ]]; then
455
+ log_message "ERROR" "No audio player found (pw-play, paplay, aplay)"
456
+ echo "Error: No audio player available" >&2
457
+ exit 1
458
+ fi
459
+
460
+ # Save master volume before playback — flat-volumes in PipeWire/PulseAudio
461
+ # can change master volume when a new stream connects from another user.
462
+ _saved_vol=""
463
+ if command -v pactl &>/dev/null; then
464
+ _saved_vol=$(pactl get-sink-volume @DEFAULT_SINK@ 2>/dev/null | grep -o '[0-9]*%' | head -1)
465
+ fi
466
+
467
+ log_message "PLAYING" "player=$AUDIO_PLAYER sink=${_default_sink:-unknown} vol=${_saved_vol:-?} pulse=${PULSE_SERVER:-unset}"
468
+
469
+ _play_err=$($AUDIO_PLAYER "${AUDIO_PLAYER_ARGS[@]}" "$PLAY_FILE" 2>&1) || {
470
+ log_message "ERROR" "Playback failed with $AUDIO_PLAYER: $_play_err"
471
+ echo "Error: Audio playback failed" >&2
472
+ echo "Detail: $_play_err" >&2
473
+ exit 1
474
+ }
475
+
476
+ # Restore master volume to what it was before playback
477
+ if [[ -n "$_saved_vol" ]] && command -v pactl &>/dev/null; then
478
+ pactl set-sink-volume @DEFAULT_SINK@ "$_saved_vol" 2>/dev/null || true
479
+ fi
480
+
481
+ log_message "DONE" ""
482
+ exit 0