agentvibes 4.2.0 → 4.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agentvibes/bmad/bmad-voices.md +69 -69
- package/.agentvibes/config.json +12 -0
- package/.claude/activation-instructions +54 -54
- package/.claude/audio/tracks/README.md +52 -52
- package/.claude/commands/agent-vibes/add.md +21 -21
- package/.claude/commands/agent-vibes/agent-vibes.md +101 -101
- package/.claude/commands/agent-vibes/agent.md +79 -79
- package/.claude/commands/agent-vibes/background-music.md +111 -111
- package/.claude/commands/agent-vibes/bmad.md +198 -198
- package/.claude/commands/agent-vibes/clean.md +18 -18
- package/.claude/commands/agent-vibes/cleanup.md +18 -18
- package/.claude/commands/agent-vibes/commands.json +145 -145
- package/.claude/commands/agent-vibes/effects.md +97 -97
- package/.claude/commands/agent-vibes/get.md +9 -9
- package/.claude/commands/agent-vibes/hide.md +91 -91
- package/.claude/commands/agent-vibes/language.md +23 -23
- package/.claude/commands/agent-vibes/learn.md +67 -67
- package/.claude/commands/agent-vibes/list.md +13 -13
- package/.claude/commands/agent-vibes/mute.md +37 -37
- package/.claude/commands/agent-vibes/preview.md +17 -17
- package/.claude/commands/agent-vibes/provider.md +68 -68
- package/.claude/commands/agent-vibes/replay-target.md +14 -14
- package/.claude/commands/agent-vibes/sample.md +12 -12
- package/.claude/commands/agent-vibes/set-favorite-voice.md +84 -84
- package/.claude/commands/agent-vibes/set-pretext.md +65 -65
- package/.claude/commands/agent-vibes/set-speed.md +41 -41
- package/.claude/commands/agent-vibes/show.md +84 -84
- package/.claude/commands/agent-vibes/switch.md +87 -87
- package/.claude/commands/agent-vibes/target-voice.md +26 -26
- package/.claude/commands/agent-vibes/target.md +30 -30
- package/.claude/commands/agent-vibes/translate.md +68 -68
- package/.claude/commands/agent-vibes/unmute.md +45 -45
- package/.claude/commands/agent-vibes/verbosity.md +89 -89
- package/.claude/commands/agent-vibes/whoami.md +7 -7
- package/.claude/commands/agent-vibes-bmad-voices.md +117 -117
- package/.claude/commands/agent-vibes-rdp.md +24 -24
- package/.claude/config/agentvibes.json +1 -0
- package/.claude/config/audio-effects.cfg +2 -2
- package/.claude/config/audio-effects.cfg.sample +52 -52
- package/.claude/config/background-music-volume.txt +1 -0
- package/.claude/config/intro-text.txt +1 -0
- package/.claude/config/piper-speech-rate.txt +4 -0
- package/.claude/config/piper-target-speech-rate.txt +1 -0
- package/.claude/config/reverb-level.txt +1 -0
- package/.claude/config/tts-speech-rate.txt +4 -0
- package/.claude/config/tts-target-speech-rate.txt +1 -0
- package/.claude/docs/TERMUX_SETUP.md +408 -408
- package/.claude/github-star-reminder.txt +1 -1
- package/.claude/hooks/README-TTS-QUEUE.md +135 -135
- package/.claude/hooks/audio-cache-utils.sh +246 -246
- package/.claude/hooks/audio-processor.sh +433 -433
- package/.claude/hooks/background-music-manager.sh +404 -404
- package/.claude/hooks/bmad-speak-enhanced.sh +165 -165
- package/.claude/hooks/bmad-speak.sh +269 -269
- package/.claude/hooks/bmad-tts-injector.sh +568 -568
- package/.claude/hooks/bmad-voice-manager.sh +928 -928
- package/.claude/hooks/clawdbot-receiver-SECURE.sh +129 -129
- package/.claude/hooks/clawdbot-receiver.sh +107 -107
- package/.claude/hooks/clean-audio-cache.sh +22 -22
- package/.claude/hooks/cleanup-cache.sh +106 -106
- package/.claude/hooks/configure-rdp-mode.sh +137 -137
- package/.claude/hooks/download-extra-voices.sh +244 -244
- package/.claude/hooks/effects-manager.sh +268 -268
- package/.claude/hooks/github-star-reminder.sh +154 -154
- package/.claude/hooks/language-manager.sh +362 -362
- package/.claude/hooks/learn-manager.sh +492 -492
- package/.claude/hooks/macos-voice-manager.sh +205 -205
- package/.claude/hooks/migrate-background-music.sh +125 -125
- package/.claude/hooks/migrate-to-agentvibes.sh +161 -161
- package/.claude/hooks/optimize-background-music.sh +87 -87
- package/.claude/hooks/path-resolver.sh +60 -60
- package/.claude/hooks/personality-manager.sh +448 -448
- package/.claude/hooks/piper-download-voices.sh +225 -225
- package/.claude/hooks/piper-installer.sh +292 -292
- package/.claude/hooks/piper-multispeaker-registry.sh +171 -171
- package/.claude/hooks/piper-voice-manager.sh +24 -3
- package/.claude/hooks/play-tts-agentvibes-receiver-for-voiceless-connections.sh +90 -90
- package/.claude/hooks/play-tts-enhanced.sh +105 -105
- package/.claude/hooks/play-tts-macos.sh +368 -368
- package/.claude/hooks/play-tts-piper.sh +679 -679
- package/.claude/hooks/play-tts-soprano.sh +356 -356
- package/.claude/hooks/play-tts-ssh-remote.sh +167 -167
- package/.claude/hooks/play-tts-termux-ssh.sh +169 -169
- package/.claude/hooks/play-tts.sh +301 -301
- package/.claude/hooks/prepare-release.sh +54 -54
- package/.claude/hooks/provider-commands.sh +617 -617
- package/.claude/hooks/provider-manager.sh +399 -399
- package/.claude/hooks/replay-target-audio.sh +95 -95
- package/.claude/hooks/requirements.txt +6 -6
- package/.claude/hooks/sentiment-manager.sh +201 -201
- package/.claude/hooks/session-start-tts.sh +81 -81
- package/.claude/hooks/soprano-gradio-synth.py +139 -139
- package/.claude/hooks/speed-manager.sh +291 -291
- package/.claude/hooks/stop-tts.sh +84 -84
- package/.claude/hooks/termux-installer.sh +261 -261
- package/.claude/hooks/translate-manager.sh +341 -341
- package/.claude/hooks/translator.py +237 -237
- package/.claude/hooks/tts-queue-worker.sh +145 -145
- package/.claude/hooks/tts-queue.sh +165 -165
- package/.claude/hooks/verbosity-manager.sh +178 -178
- package/.claude/hooks/voice-manager.sh +548 -548
- package/.claude/hooks-windows/audio-cache-utils.ps1 +119 -119
- package/.claude/hooks-windows/background-music-manager.ps1 +348 -0
- package/.claude/hooks-windows/clean-audio-cache.ps1 +53 -0
- package/.claude/hooks-windows/download-extra-voices.ps1 +185 -0
- package/.claude/hooks-windows/effects-manager.ps1 +294 -0
- package/.claude/hooks-windows/language-manager.ps1 +193 -0
- package/.claude/hooks-windows/learn-manager.ps1 +241 -0
- package/.claude/hooks-windows/personality-manager.ps1 +266 -0
- package/.claude/hooks-windows/play-tts-piper.ps1 +209 -0
- package/.claude/hooks-windows/play-tts-sapi.ps1 +108 -0
- package/.claude/hooks-windows/play-tts-soprano.ps1 +159 -158
- package/.claude/hooks-windows/play-tts-windows-piper.ps1 +50 -5
- package/.claude/hooks-windows/play-tts-windows-sapi.ps1 +108 -108
- package/.claude/hooks-windows/play-tts.ps1 +344 -266
- package/.claude/hooks-windows/provider-manager.ps1 +29 -10
- package/.claude/hooks-windows/session-start-tts.ps1 +124 -124
- package/.claude/hooks-windows/soprano-gradio-synth.py +153 -153
- package/.claude/hooks-windows/speed-manager.ps1 +166 -0
- package/.claude/hooks-windows/verbosity-manager.ps1 +119 -0
- package/.claude/hooks-windows/voice-manager-windows.ps1 +92 -8
- package/.claude/output-styles/agent-vibes.md +202 -202
- package/.claude/personalities/angry.md +14 -14
- package/.claude/personalities/annoying.md +14 -14
- package/.claude/personalities/crass.md +14 -14
- package/.claude/personalities/dramatic.md +14 -14
- package/.claude/personalities/dry-humor.md +50 -50
- package/.claude/personalities/flirty.md +20 -20
- package/.claude/personalities/funny.md +14 -14
- package/.claude/personalities/grandpa.md +32 -32
- package/.claude/personalities/millennial.md +14 -14
- package/.claude/personalities/moody.md +14 -14
- package/.claude/personalities/normal.md +16 -16
- package/.claude/personalities/pirate.md +14 -14
- package/.claude/personalities/poetic.md +14 -14
- package/.claude/personalities/professional.md +14 -14
- package/.claude/personalities/rapper.md +55 -55
- package/.claude/personalities/robot.md +14 -14
- package/.claude/personalities/sarcastic.md +38 -38
- package/.claude/personalities/sassy.md +14 -14
- package/.claude/personalities/surfer-dude.md +14 -14
- package/.claude/personalities/zen.md +14 -14
- package/.claude/settings.json +15 -15
- package/.claude/verbosity.txt +1 -1
- package/.clawdbot/README.md +105 -105
- package/.clawdbot/skill/SKILL.md +241 -241
- package/.mcp.json +12 -0
- package/CLAUDE.md +170 -170
- package/README.md +2029 -2007
- package/RELEASE_NOTES.md +1310 -1203
- package/WINDOWS-SETUP.md +208 -208
- package/bin/agent-vibes +39 -39
- package/bin/agentvibes-voice-browser.js +1840 -1840
- package/bin/agentvibes.js +48 -2
- package/bin/mcp-server.js +121 -121
- package/bin/mcp-server.sh +206 -206
- package/bin/test-bmad-pr +78 -78
- package/mcp-server/QUICK_START.md +203 -203
- package/mcp-server/README.md +345 -345
- package/mcp-server/WINDOWS_SETUP.md +260 -260
- package/mcp-server/docs/troubleshooting-audio.md +313 -313
- package/mcp-server/examples/claude_desktop_config.json +11 -11
- package/mcp-server/examples/claude_desktop_config_piper.json +9 -9
- package/mcp-server/examples/custom_instructions.md +169 -169
- package/mcp-server/install-deps.js +130 -130
- package/mcp-server/pyproject.toml +52 -52
- package/mcp-server/requirements.txt +2 -2
- package/mcp-server/server.py +1465 -1453
- package/mcp-server/test_server.py +395 -395
- package/mcp-server/test_windows_script_parity.py +336 -0
- package/package.json +110 -110
- package/setup-windows.ps1 +815 -815
- package/src/bmad-detector.js +71 -71
- package/src/cli/list-personalities.js +110 -110
- package/src/cli/list-voices.js +114 -114
- package/src/commands/bmad-voices.js +394 -394
- package/src/commands/install-mcp.js +476 -476
- package/src/console/app.js +824 -824
- package/src/console/audio-env.js +20 -1
- package/src/console/brand-colors.js +13 -13
- package/src/console/constants/personalities.js +44 -44
- package/src/console/footer-config.js +50 -50
- package/src/console/modals/modal-overlay.js +247 -247
- package/src/console/navigation.js +62 -62
- package/src/console/tabs/agents-tab.js +1684 -1516
- package/src/console/tabs/help-tab.js +261 -261
- package/src/console/tabs/install-tab.js +1007 -991
- package/src/console/tabs/music-tab.js +22 -8
- package/src/console/tabs/placeholder-tab.js +53 -53
- package/src/console/tabs/readme-tab.js +267 -267
- package/src/console/tabs/receiver-tab.js +1472 -1212
- package/src/console/tabs/settings-tab.js +208 -84
- package/src/console/tabs/voices-tab.js +100 -21
- package/src/console/widgets/destroy-list.js +25 -25
- package/src/console/widgets/format-utils.js +89 -89
- package/src/console/widgets/notice.js +55 -55
- package/src/console/widgets/personality-picker.js +185 -185
- package/src/console/widgets/reverb-picker.js +94 -94
- package/src/console/widgets/track-picker.js +285 -285
- package/src/installer/music-file-input.js +304 -304
- package/src/installer.js +5895 -5829
- package/src/services/agent-voice-store.js +423 -423
- package/src/services/config-service.js +264 -264
- package/src/services/navigation-service.js +123 -123
- package/src/services/provider-service.js +143 -132
- package/src/services/verbosity-service.js +157 -157
- package/src/utils/audio-duration-validator.js +298 -298
- package/src/utils/audio-format-validator.js +277 -277
- package/src/utils/dependency-checker.js +469 -466
- package/src/utils/file-ownership-verifier.js +358 -358
- package/src/utils/list-formatter.js +194 -194
- package/src/utils/music-file-validator.js +285 -285
- package/src/utils/preview-list-prompt.js +136 -136
- package/src/utils/provider-validator.js +96 -12
- package/src/utils/secure-music-storage.js +412 -412
- package/templates/agentvibes-receiver.sh +482 -482
- package/templates/audio/welcome-music.mp3 +0 -0
- package/voice-assignments.json +8244 -8244
- package/.claude/config/background-music-position.txt +0 -1
|
@@ -1,482 +1,482 @@
|
|
|
1
|
-
#!/usr/bin/env bash
|
|
2
|
-
#
|
|
3
|
-
# File: agentvibes-receiver.sh
|
|
4
|
-
# Location: User installs to ~/.agentvibes/play-remote.sh
|
|
5
|
-
#
|
|
6
|
-
# AgentVibes SSH-TTS Receiver (v2 — self-contained pipeline)
|
|
7
|
-
# Receives TTS requests via SSH, generates and plays audio locally.
|
|
8
|
-
#
|
|
9
|
-
# Supports two payload formats:
|
|
10
|
-
# 1. JSON payload (v2): single base64-encoded JSON with all config
|
|
11
|
-
# 2. Legacy positional args: base64_text voice_name (backward compat)
|
|
12
|
-
#
|
|
13
|
-
# Pipeline: TTS (piper|soprano|macos|windows-sapi) → sox effects → ffmpeg music mix → audio player
|
|
14
|
-
# All steps run in foreground (required for SSH ForceCommand).
|
|
15
|
-
#
|
|
16
|
-
# Installation:
|
|
17
|
-
# curl -sSL https://raw.githubusercontent.com/paulpreibisch/AgentVibes/main/scripts/install-ssh-receiver.sh | bash
|
|
18
|
-
#
|
|
19
|
-
# Copyright (c) 2025 Paul Preibisch
|
|
20
|
-
# Licensed under Apache-2.0
|
|
21
|
-
#
|
|
22
|
-
|
|
23
|
-
set -euo pipefail
|
|
24
|
-
|
|
25
|
-
# ---------------------------------------------------------------------------
|
|
26
|
-
# Environment setup for SSH ForceCommand context
|
|
27
|
-
# ---------------------------------------------------------------------------
|
|
28
|
-
|
|
29
|
-
# ForceCommand passes args via SSH_ORIGINAL_COMMAND env var
|
|
30
|
-
# SECURITY: Use read -ra instead of eval to prevent command injection
|
|
31
|
-
if [[ -n "${SSH_ORIGINAL_COMMAND:-}" ]]; then
|
|
32
|
-
read -ra _ssh_args <<< "$SSH_ORIGINAL_COMMAND"
|
|
33
|
-
set -- "${_ssh_args[@]}"
|
|
34
|
-
fi
|
|
35
|
-
|
|
36
|
-
# Handle -- argument separator (skip it if present)
|
|
37
|
-
if [[ "${1:-}" == "--" ]]; then
|
|
38
|
-
shift
|
|
39
|
-
fi
|
|
40
|
-
|
|
41
|
-
# ---------------------------------------------------------------------------
|
|
42
|
-
# Configuration — customize these for your installation
|
|
43
|
-
# ---------------------------------------------------------------------------
|
|
44
|
-
|
|
45
|
-
# Ensure common tool paths are available in restricted SSH context
|
|
46
|
-
export PATH="$HOME/.local/bin:/usr/local/bin:/usr/bin:/bin:$PATH"
|
|
47
|
-
|
|
48
|
-
# All paths use $HOME — the receiver user's own home directory.
|
|
49
|
-
# During install, voices and tracks are symlinked here from the desktop user.
|
|
50
|
-
# This avoids needing access to another user's home directory.
|
|
51
|
-
|
|
52
|
-
# Where piper voice models are stored
|
|
53
|
-
VOICES_DIR="${AGENTVIBES_VOICES_DIR:-$HOME/.claude/piper-voices}"
|
|
54
|
-
|
|
55
|
-
# Where background music tracks are stored
|
|
56
|
-
TRACKS_DIR="${AGENTVIBES_TRACKS_DIR:-$HOME/.claude/audio/tracks}"
|
|
57
|
-
|
|
58
|
-
# Log file — the TUI reads from this location
|
|
59
|
-
LOG_FILE="${AGENTVIBES_RECEIVER_LOG:-$HOME/.agentvibes/receiver.log}"
|
|
60
|
-
|
|
61
|
-
# PipeWire/PulseAudio — connect to the desktop user's audio session.
|
|
62
|
-
# Cross-user audio is tricky: Unix sockets reject different-uid callers
|
|
63
|
-
# even with ACLs. The reliable approach is localhost TCP on a fixed port.
|
|
64
|
-
# The setup script configures PipeWire-Pulse to listen on 127.0.0.1:34567.
|
|
65
|
-
AGENTVIBES_PULSE_PORT="${AGENTVIBES_PULSE_PORT:-34567}"
|
|
66
|
-
|
|
67
|
-
if [[ -z "${PULSE_SERVER:-}" ]]; then
|
|
68
|
-
_own_runtime="/run/user/$(id -u)"
|
|
69
|
-
# Detect if we're the dedicated receiver user — always use TCP to reach
|
|
70
|
-
# the desktop user's audio session, even if we have our own pulse socket.
|
|
71
|
-
_is_receiver_user=false
|
|
72
|
-
[[ "$(whoami)" == "agentvibes-receiver" ]] && _is_receiver_user=true
|
|
73
|
-
|
|
74
|
-
if [[ "$_is_receiver_user" == true ]]; then
|
|
75
|
-
# Dedicated receiver user — must use TCP to desktop user's PipeWire-Pulse
|
|
76
|
-
export PULSE_SERVER="tcp:127.0.0.1:$AGENTVIBES_PULSE_PORT"
|
|
77
|
-
elif [[ -e "$_own_runtime/pulse/native" ]]; then
|
|
78
|
-
# Same user — use own Unix socket (fastest)
|
|
79
|
-
export PULSE_SERVER="unix:$_own_runtime/pulse/native"
|
|
80
|
-
else
|
|
81
|
-
# Different user — use localhost TCP (setup by agentvibes installer)
|
|
82
|
-
export PULSE_SERVER="tcp:127.0.0.1:$AGENTVIBES_PULSE_PORT"
|
|
83
|
-
fi
|
|
84
|
-
fi
|
|
85
|
-
|
|
86
|
-
# XDG_RUNTIME_DIR still needed for pipewire tools (pw-play fallback)
|
|
87
|
-
if [[ -z "${XDG_RUNTIME_DIR:-}" ]] || [[ ! -e "$XDG_RUNTIME_DIR/pipewire-0" ]]; then
|
|
88
|
-
for _rd in /run/user/*/; do
|
|
89
|
-
[[ -e "${_rd}pipewire-0" ]] && { export XDG_RUNTIME_DIR="${_rd%/}"; break; }
|
|
90
|
-
done
|
|
91
|
-
fi
|
|
92
|
-
export XDG_RUNTIME_DIR="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}"
|
|
93
|
-
|
|
94
|
-
# Audio playback — detect available player
|
|
95
|
-
# Prefer paplay over pw-play: pw-play from a different user causes
|
|
96
|
-
# PipeWire flat-volume side effects that drop the master volume.
|
|
97
|
-
AUDIO_PLAYER=""
|
|
98
|
-
AUDIO_PLAYER_ARGS=()
|
|
99
|
-
|
|
100
|
-
# Check for user-configured sink (set via TUI receiver tab [S] key)
|
|
101
|
-
SINK_CONFIG="${AGENTVIBES_RECEIVER_SINK:-$HOME/.agentvibes/receiver-sink.txt}"
|
|
102
|
-
_default_sink=""
|
|
103
|
-
if [[ -f "$SINK_CONFIG" ]]; then
|
|
104
|
-
_configured_sink=$(head -1 "$SINK_CONFIG" 2>/dev/null | tr -d '[:space:]')
|
|
105
|
-
# Validate sink name format (alphanumeric, hyphens, underscores, dots)
|
|
106
|
-
if [[ -n "$_configured_sink" ]] && [[ "$_configured_sink" =~ ^[a-zA-Z0-9._-]+$ ]]; then
|
|
107
|
-
_default_sink="$_configured_sink"
|
|
108
|
-
fi
|
|
109
|
-
fi
|
|
110
|
-
# Fall back to system default if no valid config
|
|
111
|
-
if [[ -z "$_default_sink" ]]; then
|
|
112
|
-
_default_sink=$(pactl get-default-sink 2>/dev/null || true)
|
|
113
|
-
fi
|
|
114
|
-
|
|
115
|
-
if command -v paplay &>/dev/null; then
|
|
116
|
-
AUDIO_PLAYER="paplay"
|
|
117
|
-
[[ -n "$_default_sink" ]] && AUDIO_PLAYER_ARGS=(--device="$_default_sink")
|
|
118
|
-
elif command -v pw-play &>/dev/null; then
|
|
119
|
-
AUDIO_PLAYER="pw-play"
|
|
120
|
-
[[ -n "$_default_sink" ]] && AUDIO_PLAYER_ARGS=(--target="$_default_sink")
|
|
121
|
-
elif command -v aplay &>/dev/null; then
|
|
122
|
-
AUDIO_PLAYER="aplay"
|
|
123
|
-
fi
|
|
124
|
-
|
|
125
|
-
# ---------------------------------------------------------------------------
|
|
126
|
-
# Input parsing
|
|
127
|
-
# ---------------------------------------------------------------------------
|
|
128
|
-
|
|
129
|
-
ENCODED_PAYLOAD="${1:-}"
|
|
130
|
-
|
|
131
|
-
if [[ -z "$ENCODED_PAYLOAD" ]]; then
|
|
132
|
-
echo "Error: No payload provided" >&2
|
|
133
|
-
echo "Usage: $0 <base64-encoded-json-or-text> [voice]" >&2
|
|
134
|
-
exit 1
|
|
135
|
-
fi
|
|
136
|
-
|
|
137
|
-
# SECURITY: Validate base64 format (reject shell metacharacters)
|
|
138
|
-
if [[ ! "$ENCODED_PAYLOAD" =~ ^[A-Za-z0-9+/=]+$ ]]; then
|
|
139
|
-
echo "Error: Payload must be base64-encoded" >&2
|
|
140
|
-
exit 1
|
|
141
|
-
fi
|
|
142
|
-
|
|
143
|
-
# Decode base64
|
|
144
|
-
DECODED=$(printf '%s' "$ENCODED_PAYLOAD" | base64 -d 2>/dev/null) || {
|
|
145
|
-
echo "Error: Failed to decode base64 payload" >&2
|
|
146
|
-
exit 1
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
# ---------------------------------------------------------------------------
|
|
150
|
-
# Parse payload — JSON (v2) or plain text (legacy)
|
|
151
|
-
# ---------------------------------------------------------------------------
|
|
152
|
-
|
|
153
|
-
TEXT=""
|
|
154
|
-
VOICE="en_US-lessac-medium"
|
|
155
|
-
SOX_EFFECTS=""
|
|
156
|
-
BG_FILE=""
|
|
157
|
-
BG_VOLUME="0.10"
|
|
158
|
-
PROJECT=""
|
|
159
|
-
PRETEXT=""
|
|
160
|
-
SPEED=""
|
|
161
|
-
PROVIDER="piper"
|
|
162
|
-
|
|
163
|
-
# Detect JSON payload (starts with '{')
|
|
164
|
-
if [[ "$DECODED" == "{"* ]]; then
|
|
165
|
-
# JSON v2 payload — extract fields with lightweight parsing
|
|
166
|
-
# SECURITY: Use parameter extraction, not eval
|
|
167
|
-
if command -v jq &>/dev/null; then
|
|
168
|
-
TEXT=$(printf '%s' "$DECODED" | jq -r '.text // empty' 2>/dev/null) || TEXT=""
|
|
169
|
-
VOICE=$(printf '%s' "$DECODED" | jq -r '.voice // "en_US-lessac-medium"' 2>/dev/null) || VOICE="en_US-lessac-medium"
|
|
170
|
-
SOX_EFFECTS=$(printf '%s' "$DECODED" | jq -r '.effects // empty' 2>/dev/null) || SOX_EFFECTS=""
|
|
171
|
-
BG_FILE=$(printf '%s' "$DECODED" | jq -r '.music // empty' 2>/dev/null) || BG_FILE=""
|
|
172
|
-
BG_VOLUME=$(printf '%s' "$DECODED" | jq -r '.volume // "0.10"' 2>/dev/null) || BG_VOLUME="0.10"
|
|
173
|
-
PROJECT=$(printf '%s' "$DECODED" | jq -r '.project // empty' 2>/dev/null) || PROJECT=""
|
|
174
|
-
PRETEXT=$(printf '%s' "$DECODED" | jq -r '.pretext // empty' 2>/dev/null) || PRETEXT=""
|
|
175
|
-
SPEED=$(printf '%s' "$DECODED" | jq -r '.speed // empty' 2>/dev/null) || SPEED=""
|
|
176
|
-
PROVIDER=$(printf '%s' "$DECODED" | jq -r '.provider // "piper"' 2>/dev/null) || PROVIDER="piper"
|
|
177
|
-
else
|
|
178
|
-
# Fallback: extract with grep/sed (no jq available)
|
|
179
|
-
TEXT=$(printf '%s' "$DECODED" | grep -o '"text"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
180
|
-
VOICE=$(printf '%s' "$DECODED" | grep -o '"voice"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
181
|
-
SOX_EFFECTS=$(printf '%s' "$DECODED" | grep -o '"effects"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
182
|
-
BG_FILE=$(printf '%s' "$DECODED" | grep -o '"music"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
183
|
-
BG_VOLUME=$(printf '%s' "$DECODED" | grep -o '"volume"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
184
|
-
PROJECT=$(printf '%s' "$DECODED" | grep -o '"project"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
185
|
-
PRETEXT=$(printf '%s' "$DECODED" | grep -o '"pretext"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
186
|
-
SPEED=$(printf '%s' "$DECODED" | grep -o '"speed"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
187
|
-
PROVIDER=$(printf '%s' "$DECODED" | grep -o '"provider"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
188
|
-
[[ -z "$VOICE" ]] && VOICE="en_US-lessac-medium"
|
|
189
|
-
[[ -z "$BG_VOLUME" ]] && BG_VOLUME="0.10"
|
|
190
|
-
[[ -z "$PROVIDER" ]] && PROVIDER="piper"
|
|
191
|
-
fi
|
|
192
|
-
else
|
|
193
|
-
# Legacy format: plain text, voice from positional arg
|
|
194
|
-
TEXT="$DECODED"
|
|
195
|
-
VOICE="${2:-en_US-lessac-medium}"
|
|
196
|
-
fi
|
|
197
|
-
|
|
198
|
-
# Validate required text
|
|
199
|
-
if [[ -z "$TEXT" ]]; then
|
|
200
|
-
echo "Error: No text in payload" >&2
|
|
201
|
-
exit 1
|
|
202
|
-
fi
|
|
203
|
-
|
|
204
|
-
# SECURITY: Validate voice format (alphanumeric, hyphens, underscores only)
|
|
205
|
-
if [[ ! "$VOICE" =~ ^[a-zA-Z0-9_-]+$ ]]; then
|
|
206
|
-
echo "Error: Invalid voice format" >&2
|
|
207
|
-
exit 1
|
|
208
|
-
fi
|
|
209
|
-
|
|
210
|
-
# SECURITY: Validate volume is a number
|
|
211
|
-
if [[ -n "$BG_VOLUME" ]] && [[ ! "$BG_VOLUME" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
212
|
-
BG_VOLUME="0.10"
|
|
213
|
-
fi
|
|
214
|
-
|
|
215
|
-
# SECURITY: Validate speed is a number (prevents awk injection)
|
|
216
|
-
if [[ -n "$SPEED" ]] && [[ ! "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
217
|
-
SPEED=""
|
|
218
|
-
fi
|
|
219
|
-
|
|
220
|
-
# SECURITY: Validate provider format (known providers only)
|
|
221
|
-
case "$PROVIDER" in
|
|
222
|
-
piper|soprano|macos|windows-sapi) ;;
|
|
223
|
-
*) PROVIDER="piper" ;;
|
|
224
|
-
esac
|
|
225
|
-
|
|
226
|
-
# Prepend pretext if provided
|
|
227
|
-
if [[ -n "$PRETEXT" ]]; then
|
|
228
|
-
TEXT="${PRETEXT}. ${TEXT}"
|
|
229
|
-
fi
|
|
230
|
-
|
|
231
|
-
# ---------------------------------------------------------------------------
|
|
232
|
-
# Structured logging (for receiver tab to display)
|
|
233
|
-
# ---------------------------------------------------------------------------
|
|
234
|
-
|
|
235
|
-
LOG_ID=$(printf '%04x' $((RANDOM % 65536)))
|
|
236
|
-
|
|
237
|
-
log_message() {
|
|
238
|
-
local status="$1"
|
|
239
|
-
local detail="${2:-}"
|
|
240
|
-
local timestamp
|
|
241
|
-
timestamp=$(date '+%Y-%m-%dT%H:%M:%S')
|
|
242
|
-
local log_dir
|
|
243
|
-
log_dir=$(dirname "$LOG_FILE")
|
|
244
|
-
mkdir -p "$log_dir" 2>/dev/null || true
|
|
245
|
-
# Extract sender IP from SSH_CLIENT (set by sshd: "IP PORT PORT")
|
|
246
|
-
local sender_ip="${SSH_CLIENT%% *}"
|
|
247
|
-
[[ -z "$sender_ip" ]] && sender_ip="local"
|
|
248
|
-
# Format: TIMESTAMP|STATUS|PROJECT|VOICE|TEXT_PREVIEW|DETAIL|IP|LOG_ID
|
|
249
|
-
local preview="${TEXT:0:200}"
|
|
250
|
-
printf '%s|%s|%s|%s|%s|%s|%s|%s\n' \
|
|
251
|
-
"$timestamp" "$status" "${PROJECT:-unknown}" "$VOICE" "$preview" "$detail" "$sender_ip" "$LOG_ID" \
|
|
252
|
-
>> "$LOG_FILE" 2>/dev/null || true
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
log_message "RECEIVED" "provider=${PROVIDER} effects=${SOX_EFFECTS:-none} music=${BG_FILE:-none}"
|
|
256
|
-
|
|
257
|
-
# ---------------------------------------------------------------------------
|
|
258
|
-
# Temp files with cleanup
|
|
259
|
-
# ---------------------------------------------------------------------------
|
|
260
|
-
|
|
261
|
-
# Use own runtime dir for temp files (not the desktop user's)
|
|
262
|
-
_TEMP_BASE="/run/user/$(id -u)"
|
|
263
|
-
[[ -d "$_TEMP_BASE" ]] && [[ -w "$_TEMP_BASE" ]] || _TEMP_BASE="/tmp"
|
|
264
|
-
RAW_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-XXXXXX.wav")
|
|
265
|
-
EFFECTS_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-fx-XXXXXX.wav")
|
|
266
|
-
FINAL_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-final-XXXXXX.wav")
|
|
267
|
-
trap 'rm -f "$RAW_WAV" "$EFFECTS_WAV" "$FINAL_WAV"' EXIT
|
|
268
|
-
|
|
269
|
-
# ---------------------------------------------------------------------------
|
|
270
|
-
# Step 1: Generate TTS audio (multi-provider dispatch)
|
|
271
|
-
# ---------------------------------------------------------------------------
|
|
272
|
-
|
|
273
|
-
_generate_tts_piper() {
|
|
274
|
-
local model="$VOICES_DIR/${VOICE}.onnx"
|
|
275
|
-
if [[ ! -f "$model" ]]; then
|
|
276
|
-
# Fallback: try any available voice rather than failing
|
|
277
|
-
local fallback
|
|
278
|
-
fallback=$(find "$VOICES_DIR" -maxdepth 1 -name '*.onnx' -type f 2>/dev/null | head -1)
|
|
279
|
-
if [[ -n "$fallback" ]]; then
|
|
280
|
-
local fallback_name
|
|
281
|
-
fallback_name=$(basename "$fallback" .onnx)
|
|
282
|
-
log_message "WARN" "Voice $VOICE not found, falling back to $fallback_name"
|
|
283
|
-
echo "Warning: Voice $VOICE not found, using $fallback_name" >&2
|
|
284
|
-
VOICE="$fallback_name"
|
|
285
|
-
model="$fallback"
|
|
286
|
-
else
|
|
287
|
-
log_message "ERROR" "No voice models found in $VOICES_DIR"
|
|
288
|
-
echo "Error: No voice models found in $VOICES_DIR" >&2
|
|
289
|
-
return 1
|
|
290
|
-
fi
|
|
291
|
-
fi
|
|
292
|
-
|
|
293
|
-
local args=(--model "$model" --output_file "$RAW_WAV")
|
|
294
|
-
if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
295
|
-
args+=(--length_scale "$SPEED")
|
|
296
|
-
fi
|
|
297
|
-
|
|
298
|
-
echo "$TEXT" | piper "${args[@]}" 2>/dev/null || {
|
|
299
|
-
log_message "ERROR" "Piper TTS failed"
|
|
300
|
-
echo "Error: Piper TTS generation failed" >&2
|
|
301
|
-
return 1
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
_generate_tts_soprano() {
|
|
306
|
-
local soprano_port="${SOPRANO_PORT:-7860}"
|
|
307
|
-
|
|
308
|
-
# Try API mode first (OpenAI-compatible endpoint)
|
|
309
|
-
if curl -sf -X POST "http://127.0.0.1:${soprano_port}/v1/audio/speech" \
|
|
310
|
-
-H "Content-Type: application/json" \
|
|
311
|
-
-d "{\"input\":$(printf '%s' "$TEXT" | jq -Rs .)}" \
|
|
312
|
-
--output "$RAW_WAV" 2>/dev/null; then
|
|
313
|
-
return 0
|
|
314
|
-
fi
|
|
315
|
-
|
|
316
|
-
# Try CLI mode — options before --, text as final positional arg
|
|
317
|
-
if command -v soprano &>/dev/null; then
|
|
318
|
-
soprano -o "$RAW_WAV" -- "$TEXT" 2>/dev/null && return 0
|
|
319
|
-
fi
|
|
320
|
-
|
|
321
|
-
log_message "ERROR" "Soprano TTS failed — is soprano running on port ${soprano_port}?"
|
|
322
|
-
echo "Error: Soprano TTS unavailable (tried API and CLI)" >&2
|
|
323
|
-
return 1
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
_generate_tts_macos() {
|
|
327
|
-
if ! command -v say &>/dev/null; then
|
|
328
|
-
log_message "ERROR" "macOS say command not found"
|
|
329
|
-
echo "Error: macOS say command not available" >&2
|
|
330
|
-
return 1
|
|
331
|
-
fi
|
|
332
|
-
|
|
333
|
-
local say_args=(-v "$VOICE")
|
|
334
|
-
# Convert speed multiplier to WPM (say uses WPM, default ~200)
|
|
335
|
-
if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
336
|
-
local wpm
|
|
337
|
-
wpm=$(awk "BEGIN {printf \"%d\", 200 * $SPEED}")
|
|
338
|
-
say_args+=(-r "$wpm")
|
|
339
|
-
fi
|
|
340
|
-
|
|
341
|
-
# say outputs AIFF — convert to WAV for consistent pipeline
|
|
342
|
-
local aiff_tmp="${RAW_WAV%.wav}.aiff"
|
|
343
|
-
echo "$TEXT" | say "${say_args[@]}" -o "$aiff_tmp" 2>/dev/null || {
|
|
344
|
-
log_message "ERROR" "macOS say failed"
|
|
345
|
-
rm -f "$aiff_tmp"
|
|
346
|
-
return 1
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
if command -v ffmpeg &>/dev/null; then
|
|
350
|
-
ffmpeg -y -i "$aiff_tmp" "$RAW_WAV" </dev/null 2>/dev/null
|
|
351
|
-
rm -f "$aiff_tmp"
|
|
352
|
-
else
|
|
353
|
-
# No ffmpeg — rename and hope player handles AIFF
|
|
354
|
-
mv "$aiff_tmp" "$RAW_WAV"
|
|
355
|
-
fi
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
_generate_tts_windows_sapi() {
|
|
359
|
-
# Windows SAPI via PowerShell (works in WSL2 via powershell.exe)
|
|
360
|
-
local ps_cmd=""
|
|
361
|
-
if command -v powershell.exe &>/dev/null; then
|
|
362
|
-
ps_cmd="powershell.exe"
|
|
363
|
-
elif command -v pwsh &>/dev/null; then
|
|
364
|
-
ps_cmd="pwsh"
|
|
365
|
-
else
|
|
366
|
-
log_message "ERROR" "PowerShell not found for Windows SAPI"
|
|
367
|
-
echo "Error: PowerShell required for Windows SAPI" >&2
|
|
368
|
-
return 1
|
|
369
|
-
fi
|
|
370
|
-
|
|
371
|
-
# SECURITY: Escape text for PowerShell single-quoted string
|
|
372
|
-
local escaped_text
|
|
373
|
-
escaped_text=$(printf '%s' "$TEXT" | sed "s/'/''/g")
|
|
374
|
-
|
|
375
|
-
local rate=0
|
|
376
|
-
if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
377
|
-
# SAPI rate: -10 to 10, 0 is normal. Speed 1.0=0, 2.0=5, 0.5=-5
|
|
378
|
-
rate=$(awk "BEGIN {r = ($SPEED - 1.0) * 10; if (r > 10) r = 10; if (r < -10) r = -10; printf \"%d\", r}")
|
|
379
|
-
fi
|
|
380
|
-
|
|
381
|
-
$ps_cmd -NoProfile -Command "
|
|
382
|
-
Add-Type -AssemblyName System.Speech
|
|
383
|
-
\$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer
|
|
384
|
-
\$synth.Rate = $rate
|
|
385
|
-
\$synth.SetOutputToWaveFile('$(wslpath -w "$RAW_WAV" 2>/dev/null || echo "$RAW_WAV")')
|
|
386
|
-
\$synth.Speak('$escaped_text')
|
|
387
|
-
\$synth.Dispose()
|
|
388
|
-
" 2>/dev/null || {
|
|
389
|
-
log_message "ERROR" "Windows SAPI TTS failed"
|
|
390
|
-
echo "Error: Windows SAPI generation failed" >&2
|
|
391
|
-
return 1
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
# Dispatch to the appropriate TTS provider
|
|
396
|
-
case "$PROVIDER" in
|
|
397
|
-
piper)
|
|
398
|
-
_generate_tts_piper || exit 1
|
|
399
|
-
;;
|
|
400
|
-
soprano)
|
|
401
|
-
_generate_tts_soprano || exit 1
|
|
402
|
-
;;
|
|
403
|
-
macos)
|
|
404
|
-
_generate_tts_macos || exit 1
|
|
405
|
-
;;
|
|
406
|
-
windows-sapi)
|
|
407
|
-
_generate_tts_windows_sapi || exit 1
|
|
408
|
-
;;
|
|
409
|
-
*)
|
|
410
|
-
log_message "ERROR" "Unknown provider: $PROVIDER"
|
|
411
|
-
echo "Error: Unknown TTS provider: $PROVIDER" >&2
|
|
412
|
-
exit 1
|
|
413
|
-
;;
|
|
414
|
-
esac
|
|
415
|
-
|
|
416
|
-
PLAY_FILE="$RAW_WAV"
|
|
417
|
-
|
|
418
|
-
# ---------------------------------------------------------------------------
|
|
419
|
-
# Step 2: Apply sox effects (reverb, EQ, etc.)
|
|
420
|
-
# ---------------------------------------------------------------------------
|
|
421
|
-
|
|
422
|
-
if [[ -n "$SOX_EFFECTS" ]] && command -v sox &>/dev/null; then
|
|
423
|
-
# SECURITY: Validate effects contain only safe characters (alphanumeric, spaces, dots, hyphens, underscores)
|
|
424
|
-
if [[ "$SOX_EFFECTS" =~ ^[a-zA-Z0-9\ ._-]+$ ]]; then
|
|
425
|
-
sox "$RAW_WAV" "$EFFECTS_WAV" $SOX_EFFECTS 2>/dev/null && PLAY_FILE="$EFFECTS_WAV"
|
|
426
|
-
else
|
|
427
|
-
log_message "WARN" "Rejected unsafe sox effects: ${SOX_EFFECTS:0:50}"
|
|
428
|
-
fi
|
|
429
|
-
fi
|
|
430
|
-
|
|
431
|
-
# ---------------------------------------------------------------------------
|
|
432
|
-
# Step 3: Mix background music (if configured)
|
|
433
|
-
# ---------------------------------------------------------------------------
|
|
434
|
-
|
|
435
|
-
if [[ -n "$BG_FILE" ]] && command -v ffmpeg &>/dev/null; then
|
|
436
|
-
BG_PATH="$TRACKS_DIR/$BG_FILE"
|
|
437
|
-
if [[ -f "$BG_PATH" ]]; then
|
|
438
|
-
DURATION=$(ffprobe -v error -show_entries format=duration \
|
|
439
|
-
-of default=noprint_wrappers=1:nokey=1 "$PLAY_FILE" 2>/dev/null || echo "")
|
|
440
|
-
if [[ -n "$DURATION" ]]; then
|
|
441
|
-
TOTAL_DUR=$(awk "BEGIN {printf \"%.2f\", $DURATION + 2}")
|
|
442
|
-
FADE_OUT=$(awk "BEGIN {printf \"%.2f\", $DURATION}")
|
|
443
|
-
timeout 20 ffmpeg -y -i "$PLAY_FILE" -stream_loop -1 -i "$BG_PATH" \
|
|
444
|
-
-filter_complex "[1:a]volume=${BG_VOLUME},afade=t=in:st=0:d=0.3,afade=t=out:st=${FADE_OUT}:d=2[bg];[0:a]adelay=2000|2000[v];[v][bg]amix=inputs=2:duration=longest[out]" \
|
|
445
|
-
-map "[out]" -t "$TOTAL_DUR" "$FINAL_WAV" </dev/null 2>/dev/null && PLAY_FILE="$FINAL_WAV"
|
|
446
|
-
fi
|
|
447
|
-
fi
|
|
448
|
-
fi
|
|
449
|
-
|
|
450
|
-
# ---------------------------------------------------------------------------
|
|
451
|
-
# Step 4: Play audio in foreground (required for SSH — no backgrounding)
|
|
452
|
-
# ---------------------------------------------------------------------------
|
|
453
|
-
|
|
454
|
-
if [[ -z "$AUDIO_PLAYER" ]]; then
|
|
455
|
-
log_message "ERROR" "No audio player found (pw-play, paplay, aplay)"
|
|
456
|
-
echo "Error: No audio player available" >&2
|
|
457
|
-
exit 1
|
|
458
|
-
fi
|
|
459
|
-
|
|
460
|
-
# Save master volume before playback — flat-volumes in PipeWire/PulseAudio
|
|
461
|
-
# can change master volume when a new stream connects from another user.
|
|
462
|
-
_saved_vol=""
|
|
463
|
-
if command -v pactl &>/dev/null; then
|
|
464
|
-
_saved_vol=$(pactl get-sink-volume @DEFAULT_SINK@ 2>/dev/null | grep -o '[0-9]*%' | head -1)
|
|
465
|
-
fi
|
|
466
|
-
|
|
467
|
-
log_message "PLAYING" "player=$AUDIO_PLAYER sink=${_default_sink:-unknown} vol=${_saved_vol:-?} pulse=${PULSE_SERVER:-unset}"
|
|
468
|
-
|
|
469
|
-
_play_err=$($AUDIO_PLAYER "${AUDIO_PLAYER_ARGS[@]}" "$PLAY_FILE" 2>&1) || {
|
|
470
|
-
log_message "ERROR" "Playback failed with $AUDIO_PLAYER: $_play_err"
|
|
471
|
-
echo "Error: Audio playback failed" >&2
|
|
472
|
-
echo "Detail: $_play_err" >&2
|
|
473
|
-
exit 1
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
# Restore master volume to what it was before playback
|
|
477
|
-
if [[ -n "$_saved_vol" ]] && command -v pactl &>/dev/null; then
|
|
478
|
-
pactl set-sink-volume @DEFAULT_SINK@ "$_saved_vol" 2>/dev/null || true
|
|
479
|
-
fi
|
|
480
|
-
|
|
481
|
-
log_message "DONE" ""
|
|
482
|
-
exit 0
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
#
|
|
3
|
+
# File: agentvibes-receiver.sh
|
|
4
|
+
# Location: User installs to ~/.agentvibes/play-remote.sh
|
|
5
|
+
#
|
|
6
|
+
# AgentVibes SSH-TTS Receiver (v2 — self-contained pipeline)
|
|
7
|
+
# Receives TTS requests via SSH, generates and plays audio locally.
|
|
8
|
+
#
|
|
9
|
+
# Supports two payload formats:
|
|
10
|
+
# 1. JSON payload (v2): single base64-encoded JSON with all config
|
|
11
|
+
# 2. Legacy positional args: base64_text voice_name (backward compat)
|
|
12
|
+
#
|
|
13
|
+
# Pipeline: TTS (piper|soprano|macos|windows-sapi) → sox effects → ffmpeg music mix → audio player
|
|
14
|
+
# All steps run in foreground (required for SSH ForceCommand).
|
|
15
|
+
#
|
|
16
|
+
# Installation:
|
|
17
|
+
# curl -sSL https://raw.githubusercontent.com/paulpreibisch/AgentVibes/main/scripts/install-ssh-receiver.sh | bash
|
|
18
|
+
#
|
|
19
|
+
# Copyright (c) 2025 Paul Preibisch
|
|
20
|
+
# Licensed under Apache-2.0
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
set -euo pipefail
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Environment setup for SSH ForceCommand context
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
# ForceCommand passes args via SSH_ORIGINAL_COMMAND env var
|
|
30
|
+
# SECURITY: Use read -ra instead of eval to prevent command injection
|
|
31
|
+
if [[ -n "${SSH_ORIGINAL_COMMAND:-}" ]]; then
|
|
32
|
+
read -ra _ssh_args <<< "$SSH_ORIGINAL_COMMAND"
|
|
33
|
+
set -- "${_ssh_args[@]}"
|
|
34
|
+
fi
|
|
35
|
+
|
|
36
|
+
# Handle -- argument separator (skip it if present)
|
|
37
|
+
if [[ "${1:-}" == "--" ]]; then
|
|
38
|
+
shift
|
|
39
|
+
fi
|
|
40
|
+
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# Configuration — customize these for your installation
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
# Ensure common tool paths are available in restricted SSH context
|
|
46
|
+
export PATH="$HOME/.local/bin:/usr/local/bin:/usr/bin:/bin:$PATH"
|
|
47
|
+
|
|
48
|
+
# All paths use $HOME — the receiver user's own home directory.
|
|
49
|
+
# During install, voices and tracks are symlinked here from the desktop user.
|
|
50
|
+
# This avoids needing access to another user's home directory.
|
|
51
|
+
|
|
52
|
+
# Where piper voice models are stored
|
|
53
|
+
VOICES_DIR="${AGENTVIBES_VOICES_DIR:-$HOME/.claude/piper-voices}"
|
|
54
|
+
|
|
55
|
+
# Where background music tracks are stored
|
|
56
|
+
TRACKS_DIR="${AGENTVIBES_TRACKS_DIR:-$HOME/.claude/audio/tracks}"
|
|
57
|
+
|
|
58
|
+
# Log file — the TUI reads from this location
|
|
59
|
+
LOG_FILE="${AGENTVIBES_RECEIVER_LOG:-$HOME/.agentvibes/receiver.log}"
|
|
60
|
+
|
|
61
|
+
# PipeWire/PulseAudio — connect to the desktop user's audio session.
|
|
62
|
+
# Cross-user audio is tricky: Unix sockets reject different-uid callers
|
|
63
|
+
# even with ACLs. The reliable approach is localhost TCP on a fixed port.
|
|
64
|
+
# The setup script configures PipeWire-Pulse to listen on 127.0.0.1:34567.
|
|
65
|
+
AGENTVIBES_PULSE_PORT="${AGENTVIBES_PULSE_PORT:-34567}"
|
|
66
|
+
|
|
67
|
+
if [[ -z "${PULSE_SERVER:-}" ]]; then
|
|
68
|
+
_own_runtime="/run/user/$(id -u)"
|
|
69
|
+
# Detect if we're the dedicated receiver user — always use TCP to reach
|
|
70
|
+
# the desktop user's audio session, even if we have our own pulse socket.
|
|
71
|
+
_is_receiver_user=false
|
|
72
|
+
[[ "$(whoami)" == "agentvibes-receiver" ]] && _is_receiver_user=true
|
|
73
|
+
|
|
74
|
+
if [[ "$_is_receiver_user" == true ]]; then
|
|
75
|
+
# Dedicated receiver user — must use TCP to desktop user's PipeWire-Pulse
|
|
76
|
+
export PULSE_SERVER="tcp:127.0.0.1:$AGENTVIBES_PULSE_PORT"
|
|
77
|
+
elif [[ -e "$_own_runtime/pulse/native" ]]; then
|
|
78
|
+
# Same user — use own Unix socket (fastest)
|
|
79
|
+
export PULSE_SERVER="unix:$_own_runtime/pulse/native"
|
|
80
|
+
else
|
|
81
|
+
# Different user — use localhost TCP (setup by agentvibes installer)
|
|
82
|
+
export PULSE_SERVER="tcp:127.0.0.1:$AGENTVIBES_PULSE_PORT"
|
|
83
|
+
fi
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
# XDG_RUNTIME_DIR still needed for pipewire tools (pw-play fallback)
|
|
87
|
+
if [[ -z "${XDG_RUNTIME_DIR:-}" ]] || [[ ! -e "$XDG_RUNTIME_DIR/pipewire-0" ]]; then
|
|
88
|
+
for _rd in /run/user/*/; do
|
|
89
|
+
[[ -e "${_rd}pipewire-0" ]] && { export XDG_RUNTIME_DIR="${_rd%/}"; break; }
|
|
90
|
+
done
|
|
91
|
+
fi
|
|
92
|
+
export XDG_RUNTIME_DIR="${XDG_RUNTIME_DIR:-/run/user/$(id -u)}"
|
|
93
|
+
|
|
94
|
+
# Audio playback — detect available player
|
|
95
|
+
# Prefer paplay over pw-play: pw-play from a different user causes
|
|
96
|
+
# PipeWire flat-volume side effects that drop the master volume.
|
|
97
|
+
AUDIO_PLAYER=""
|
|
98
|
+
AUDIO_PLAYER_ARGS=()
|
|
99
|
+
|
|
100
|
+
# Check for user-configured sink (set via TUI receiver tab [S] key)
|
|
101
|
+
SINK_CONFIG="${AGENTVIBES_RECEIVER_SINK:-$HOME/.agentvibes/receiver-sink.txt}"
|
|
102
|
+
_default_sink=""
|
|
103
|
+
if [[ -f "$SINK_CONFIG" ]]; then
|
|
104
|
+
_configured_sink=$(head -1 "$SINK_CONFIG" 2>/dev/null | tr -d '[:space:]')
|
|
105
|
+
# Validate sink name format (alphanumeric, hyphens, underscores, dots)
|
|
106
|
+
if [[ -n "$_configured_sink" ]] && [[ "$_configured_sink" =~ ^[a-zA-Z0-9._-]+$ ]]; then
|
|
107
|
+
_default_sink="$_configured_sink"
|
|
108
|
+
fi
|
|
109
|
+
fi
|
|
110
|
+
# Fall back to system default if no valid config
|
|
111
|
+
if [[ -z "$_default_sink" ]]; then
|
|
112
|
+
_default_sink=$(pactl get-default-sink 2>/dev/null || true)
|
|
113
|
+
fi
|
|
114
|
+
|
|
115
|
+
if command -v paplay &>/dev/null; then
|
|
116
|
+
AUDIO_PLAYER="paplay"
|
|
117
|
+
[[ -n "$_default_sink" ]] && AUDIO_PLAYER_ARGS=(--device="$_default_sink")
|
|
118
|
+
elif command -v pw-play &>/dev/null; then
|
|
119
|
+
AUDIO_PLAYER="pw-play"
|
|
120
|
+
[[ -n "$_default_sink" ]] && AUDIO_PLAYER_ARGS=(--target="$_default_sink")
|
|
121
|
+
elif command -v aplay &>/dev/null; then
|
|
122
|
+
AUDIO_PLAYER="aplay"
|
|
123
|
+
fi
|
|
124
|
+
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
# Input parsing
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
ENCODED_PAYLOAD="${1:-}"
|
|
130
|
+
|
|
131
|
+
if [[ -z "$ENCODED_PAYLOAD" ]]; then
|
|
132
|
+
echo "Error: No payload provided" >&2
|
|
133
|
+
echo "Usage: $0 <base64-encoded-json-or-text> [voice]" >&2
|
|
134
|
+
exit 1
|
|
135
|
+
fi
|
|
136
|
+
|
|
137
|
+
# SECURITY: Validate base64 format (reject shell metacharacters)
|
|
138
|
+
if [[ ! "$ENCODED_PAYLOAD" =~ ^[A-Za-z0-9+/=]+$ ]]; then
|
|
139
|
+
echo "Error: Payload must be base64-encoded" >&2
|
|
140
|
+
exit 1
|
|
141
|
+
fi
|
|
142
|
+
|
|
143
|
+
# Decode base64
|
|
144
|
+
DECODED=$(printf '%s' "$ENCODED_PAYLOAD" | base64 -d 2>/dev/null) || {
|
|
145
|
+
echo "Error: Failed to decode base64 payload" >&2
|
|
146
|
+
exit 1
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
# Parse payload — JSON (v2) or plain text (legacy)
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
TEXT=""
|
|
154
|
+
VOICE="en_US-lessac-medium"
|
|
155
|
+
SOX_EFFECTS=""
|
|
156
|
+
BG_FILE=""
|
|
157
|
+
BG_VOLUME="0.10"
|
|
158
|
+
PROJECT=""
|
|
159
|
+
PRETEXT=""
|
|
160
|
+
SPEED=""
|
|
161
|
+
PROVIDER="piper"
|
|
162
|
+
|
|
163
|
+
# Detect JSON payload (starts with '{')
|
|
164
|
+
if [[ "$DECODED" == "{"* ]]; then
|
|
165
|
+
# JSON v2 payload — extract fields with lightweight parsing
|
|
166
|
+
# SECURITY: Use parameter extraction, not eval
|
|
167
|
+
if command -v jq &>/dev/null; then
|
|
168
|
+
TEXT=$(printf '%s' "$DECODED" | jq -r '.text // empty' 2>/dev/null) || TEXT=""
|
|
169
|
+
VOICE=$(printf '%s' "$DECODED" | jq -r '.voice // "en_US-lessac-medium"' 2>/dev/null) || VOICE="en_US-lessac-medium"
|
|
170
|
+
SOX_EFFECTS=$(printf '%s' "$DECODED" | jq -r '.effects // empty' 2>/dev/null) || SOX_EFFECTS=""
|
|
171
|
+
BG_FILE=$(printf '%s' "$DECODED" | jq -r '.music // empty' 2>/dev/null) || BG_FILE=""
|
|
172
|
+
BG_VOLUME=$(printf '%s' "$DECODED" | jq -r '.volume // "0.10"' 2>/dev/null) || BG_VOLUME="0.10"
|
|
173
|
+
PROJECT=$(printf '%s' "$DECODED" | jq -r '.project // empty' 2>/dev/null) || PROJECT=""
|
|
174
|
+
PRETEXT=$(printf '%s' "$DECODED" | jq -r '.pretext // empty' 2>/dev/null) || PRETEXT=""
|
|
175
|
+
SPEED=$(printf '%s' "$DECODED" | jq -r '.speed // empty' 2>/dev/null) || SPEED=""
|
|
176
|
+
PROVIDER=$(printf '%s' "$DECODED" | jq -r '.provider // "piper"' 2>/dev/null) || PROVIDER="piper"
|
|
177
|
+
else
|
|
178
|
+
# Fallback: extract with grep/sed (no jq available)
|
|
179
|
+
TEXT=$(printf '%s' "$DECODED" | grep -o '"text"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
180
|
+
VOICE=$(printf '%s' "$DECODED" | grep -o '"voice"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
181
|
+
SOX_EFFECTS=$(printf '%s' "$DECODED" | grep -o '"effects"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
182
|
+
BG_FILE=$(printf '%s' "$DECODED" | grep -o '"music"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
183
|
+
BG_VOLUME=$(printf '%s' "$DECODED" | grep -o '"volume"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
184
|
+
PROJECT=$(printf '%s' "$DECODED" | grep -o '"project"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
185
|
+
PRETEXT=$(printf '%s' "$DECODED" | grep -o '"pretext"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
186
|
+
SPEED=$(printf '%s' "$DECODED" | grep -o '"speed"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
187
|
+
PROVIDER=$(printf '%s' "$DECODED" | grep -o '"provider"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 | sed 's/.*: *"//;s/"$//' || true)
|
|
188
|
+
[[ -z "$VOICE" ]] && VOICE="en_US-lessac-medium"
|
|
189
|
+
[[ -z "$BG_VOLUME" ]] && BG_VOLUME="0.10"
|
|
190
|
+
[[ -z "$PROVIDER" ]] && PROVIDER="piper"
|
|
191
|
+
fi
|
|
192
|
+
else
|
|
193
|
+
# Legacy format: plain text, voice from positional arg
|
|
194
|
+
TEXT="$DECODED"
|
|
195
|
+
VOICE="${2:-en_US-lessac-medium}"
|
|
196
|
+
fi
|
|
197
|
+
|
|
198
|
+
# Validate required text
|
|
199
|
+
if [[ -z "$TEXT" ]]; then
|
|
200
|
+
echo "Error: No text in payload" >&2
|
|
201
|
+
exit 1
|
|
202
|
+
fi
|
|
203
|
+
|
|
204
|
+
# SECURITY: Validate voice format (alphanumeric, hyphens, underscores only)
|
|
205
|
+
if [[ ! "$VOICE" =~ ^[a-zA-Z0-9_-]+$ ]]; then
|
|
206
|
+
echo "Error: Invalid voice format" >&2
|
|
207
|
+
exit 1
|
|
208
|
+
fi
|
|
209
|
+
|
|
210
|
+
# SECURITY: Validate volume is a number
|
|
211
|
+
if [[ -n "$BG_VOLUME" ]] && [[ ! "$BG_VOLUME" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
212
|
+
BG_VOLUME="0.10"
|
|
213
|
+
fi
|
|
214
|
+
|
|
215
|
+
# SECURITY: Validate speed is a number (prevents awk injection)
|
|
216
|
+
if [[ -n "$SPEED" ]] && [[ ! "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
217
|
+
SPEED=""
|
|
218
|
+
fi
|
|
219
|
+
|
|
220
|
+
# SECURITY: Validate provider format (known providers only)
|
|
221
|
+
case "$PROVIDER" in
|
|
222
|
+
piper|soprano|macos|windows-sapi) ;;
|
|
223
|
+
*) PROVIDER="piper" ;;
|
|
224
|
+
esac
|
|
225
|
+
|
|
226
|
+
# Prepend pretext if provided
|
|
227
|
+
if [[ -n "$PRETEXT" ]]; then
|
|
228
|
+
TEXT="${PRETEXT}. ${TEXT}"
|
|
229
|
+
fi
|
|
230
|
+
|
|
231
|
+
# ---------------------------------------------------------------------------
|
|
232
|
+
# Structured logging (for receiver tab to display)
|
|
233
|
+
# ---------------------------------------------------------------------------
|
|
234
|
+
|
|
235
|
+
LOG_ID=$(printf '%04x' $((RANDOM % 65536)))
|
|
236
|
+
|
|
237
|
+
log_message() {
|
|
238
|
+
local status="$1"
|
|
239
|
+
local detail="${2:-}"
|
|
240
|
+
local timestamp
|
|
241
|
+
timestamp=$(date '+%Y-%m-%dT%H:%M:%S')
|
|
242
|
+
local log_dir
|
|
243
|
+
log_dir=$(dirname "$LOG_FILE")
|
|
244
|
+
mkdir -p "$log_dir" 2>/dev/null || true
|
|
245
|
+
# Extract sender IP from SSH_CLIENT (set by sshd: "IP PORT PORT")
|
|
246
|
+
local sender_ip="${SSH_CLIENT%% *}"
|
|
247
|
+
[[ -z "$sender_ip" ]] && sender_ip="local"
|
|
248
|
+
# Format: TIMESTAMP|STATUS|PROJECT|VOICE|TEXT_PREVIEW|DETAIL|IP|LOG_ID
|
|
249
|
+
local preview="${TEXT:0:200}"
|
|
250
|
+
printf '%s|%s|%s|%s|%s|%s|%s|%s\n' \
|
|
251
|
+
"$timestamp" "$status" "${PROJECT:-unknown}" "$VOICE" "$preview" "$detail" "$sender_ip" "$LOG_ID" \
|
|
252
|
+
>> "$LOG_FILE" 2>/dev/null || true
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
log_message "RECEIVED" "provider=${PROVIDER} effects=${SOX_EFFECTS:-none} music=${BG_FILE:-none}"
|
|
256
|
+
|
|
257
|
+
# ---------------------------------------------------------------------------
|
|
258
|
+
# Temp files with cleanup
|
|
259
|
+
# ---------------------------------------------------------------------------
|
|
260
|
+
|
|
261
|
+
# Use own runtime dir for temp files (not the desktop user's)
|
|
262
|
+
_TEMP_BASE="/run/user/$(id -u)"
|
|
263
|
+
[[ -d "$_TEMP_BASE" ]] && [[ -w "$_TEMP_BASE" ]] || _TEMP_BASE="/tmp"
|
|
264
|
+
RAW_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-XXXXXX.wav")
|
|
265
|
+
EFFECTS_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-fx-XXXXXX.wav")
|
|
266
|
+
FINAL_WAV=$(mktemp "$_TEMP_BASE/agentvibes-recv-final-XXXXXX.wav")
|
|
267
|
+
trap 'rm -f "$RAW_WAV" "$EFFECTS_WAV" "$FINAL_WAV"' EXIT
|
|
268
|
+
|
|
269
|
+
# ---------------------------------------------------------------------------
|
|
270
|
+
# Step 1: Generate TTS audio (multi-provider dispatch)
|
|
271
|
+
# ---------------------------------------------------------------------------
|
|
272
|
+
|
|
273
|
+
_generate_tts_piper() {
|
|
274
|
+
local model="$VOICES_DIR/${VOICE}.onnx"
|
|
275
|
+
if [[ ! -f "$model" ]]; then
|
|
276
|
+
# Fallback: try any available voice rather than failing
|
|
277
|
+
local fallback
|
|
278
|
+
fallback=$(find "$VOICES_DIR" -maxdepth 1 -name '*.onnx' -type f 2>/dev/null | head -1)
|
|
279
|
+
if [[ -n "$fallback" ]]; then
|
|
280
|
+
local fallback_name
|
|
281
|
+
fallback_name=$(basename "$fallback" .onnx)
|
|
282
|
+
log_message "WARN" "Voice $VOICE not found, falling back to $fallback_name"
|
|
283
|
+
echo "Warning: Voice $VOICE not found, using $fallback_name" >&2
|
|
284
|
+
VOICE="$fallback_name"
|
|
285
|
+
model="$fallback"
|
|
286
|
+
else
|
|
287
|
+
log_message "ERROR" "No voice models found in $VOICES_DIR"
|
|
288
|
+
echo "Error: No voice models found in $VOICES_DIR" >&2
|
|
289
|
+
return 1
|
|
290
|
+
fi
|
|
291
|
+
fi
|
|
292
|
+
|
|
293
|
+
local args=(--model "$model" --output_file "$RAW_WAV")
|
|
294
|
+
if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
295
|
+
args+=(--length_scale "$SPEED")
|
|
296
|
+
fi
|
|
297
|
+
|
|
298
|
+
echo "$TEXT" | piper "${args[@]}" 2>/dev/null || {
|
|
299
|
+
log_message "ERROR" "Piper TTS failed"
|
|
300
|
+
echo "Error: Piper TTS generation failed" >&2
|
|
301
|
+
return 1
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
_generate_tts_soprano() {
|
|
306
|
+
local soprano_port="${SOPRANO_PORT:-7860}"
|
|
307
|
+
|
|
308
|
+
# Try API mode first (OpenAI-compatible endpoint)
|
|
309
|
+
if curl -sf -X POST "http://127.0.0.1:${soprano_port}/v1/audio/speech" \
|
|
310
|
+
-H "Content-Type: application/json" \
|
|
311
|
+
-d "{\"input\":$(printf '%s' "$TEXT" | jq -Rs .)}" \
|
|
312
|
+
--output "$RAW_WAV" 2>/dev/null; then
|
|
313
|
+
return 0
|
|
314
|
+
fi
|
|
315
|
+
|
|
316
|
+
# Try CLI mode — options before --, text as final positional arg
|
|
317
|
+
if command -v soprano &>/dev/null; then
|
|
318
|
+
soprano -o "$RAW_WAV" -- "$TEXT" 2>/dev/null && return 0
|
|
319
|
+
fi
|
|
320
|
+
|
|
321
|
+
log_message "ERROR" "Soprano TTS failed — is soprano running on port ${soprano_port}?"
|
|
322
|
+
echo "Error: Soprano TTS unavailable (tried API and CLI)" >&2
|
|
323
|
+
return 1
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
_generate_tts_macos() {
|
|
327
|
+
if ! command -v say &>/dev/null; then
|
|
328
|
+
log_message "ERROR" "macOS say command not found"
|
|
329
|
+
echo "Error: macOS say command not available" >&2
|
|
330
|
+
return 1
|
|
331
|
+
fi
|
|
332
|
+
|
|
333
|
+
local say_args=(-v "$VOICE")
|
|
334
|
+
# Convert speed multiplier to WPM (say uses WPM, default ~200)
|
|
335
|
+
if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
336
|
+
local wpm
|
|
337
|
+
wpm=$(awk "BEGIN {printf \"%d\", 200 * $SPEED}")
|
|
338
|
+
say_args+=(-r "$wpm")
|
|
339
|
+
fi
|
|
340
|
+
|
|
341
|
+
# say outputs AIFF — convert to WAV for consistent pipeline
|
|
342
|
+
local aiff_tmp="${RAW_WAV%.wav}.aiff"
|
|
343
|
+
echo "$TEXT" | say "${say_args[@]}" -o "$aiff_tmp" 2>/dev/null || {
|
|
344
|
+
log_message "ERROR" "macOS say failed"
|
|
345
|
+
rm -f "$aiff_tmp"
|
|
346
|
+
return 1
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
if command -v ffmpeg &>/dev/null; then
|
|
350
|
+
ffmpeg -y -i "$aiff_tmp" "$RAW_WAV" </dev/null 2>/dev/null
|
|
351
|
+
rm -f "$aiff_tmp"
|
|
352
|
+
else
|
|
353
|
+
# No ffmpeg — rename and hope player handles AIFF
|
|
354
|
+
mv "$aiff_tmp" "$RAW_WAV"
|
|
355
|
+
fi
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
_generate_tts_windows_sapi() {
|
|
359
|
+
# Windows SAPI via PowerShell (works in WSL2 via powershell.exe)
|
|
360
|
+
local ps_cmd=""
|
|
361
|
+
if command -v powershell.exe &>/dev/null; then
|
|
362
|
+
ps_cmd="powershell.exe"
|
|
363
|
+
elif command -v pwsh &>/dev/null; then
|
|
364
|
+
ps_cmd="pwsh"
|
|
365
|
+
else
|
|
366
|
+
log_message "ERROR" "PowerShell not found for Windows SAPI"
|
|
367
|
+
echo "Error: PowerShell required for Windows SAPI" >&2
|
|
368
|
+
return 1
|
|
369
|
+
fi
|
|
370
|
+
|
|
371
|
+
# SECURITY: Escape text for PowerShell single-quoted string
|
|
372
|
+
local escaped_text
|
|
373
|
+
escaped_text=$(printf '%s' "$TEXT" | sed "s/'/''/g")
|
|
374
|
+
|
|
375
|
+
local rate=0
|
|
376
|
+
if [[ -n "$SPEED" ]] && [[ "$SPEED" =~ ^[0-9]+\.?[0-9]*$ ]]; then
|
|
377
|
+
# SAPI rate: -10 to 10, 0 is normal. Speed 1.0=0, 2.0=5, 0.5=-5
|
|
378
|
+
rate=$(awk "BEGIN {r = ($SPEED - 1.0) * 10; if (r > 10) r = 10; if (r < -10) r = -10; printf \"%d\", r}")
|
|
379
|
+
fi
|
|
380
|
+
|
|
381
|
+
$ps_cmd -NoProfile -Command "
|
|
382
|
+
Add-Type -AssemblyName System.Speech
|
|
383
|
+
\$synth = New-Object System.Speech.Synthesis.SpeechSynthesizer
|
|
384
|
+
\$synth.Rate = $rate
|
|
385
|
+
\$synth.SetOutputToWaveFile('$(wslpath -w "$RAW_WAV" 2>/dev/null || echo "$RAW_WAV")')
|
|
386
|
+
\$synth.Speak('$escaped_text')
|
|
387
|
+
\$synth.Dispose()
|
|
388
|
+
" 2>/dev/null || {
|
|
389
|
+
log_message "ERROR" "Windows SAPI TTS failed"
|
|
390
|
+
echo "Error: Windows SAPI generation failed" >&2
|
|
391
|
+
return 1
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
# Dispatch to the appropriate TTS provider
|
|
396
|
+
case "$PROVIDER" in
|
|
397
|
+
piper)
|
|
398
|
+
_generate_tts_piper || exit 1
|
|
399
|
+
;;
|
|
400
|
+
soprano)
|
|
401
|
+
_generate_tts_soprano || exit 1
|
|
402
|
+
;;
|
|
403
|
+
macos)
|
|
404
|
+
_generate_tts_macos || exit 1
|
|
405
|
+
;;
|
|
406
|
+
windows-sapi)
|
|
407
|
+
_generate_tts_windows_sapi || exit 1
|
|
408
|
+
;;
|
|
409
|
+
*)
|
|
410
|
+
log_message "ERROR" "Unknown provider: $PROVIDER"
|
|
411
|
+
echo "Error: Unknown TTS provider: $PROVIDER" >&2
|
|
412
|
+
exit 1
|
|
413
|
+
;;
|
|
414
|
+
esac
|
|
415
|
+
|
|
416
|
+
PLAY_FILE="$RAW_WAV"
|
|
417
|
+
|
|
418
|
+
# ---------------------------------------------------------------------------
|
|
419
|
+
# Step 2: Apply sox effects (reverb, EQ, etc.)
|
|
420
|
+
# ---------------------------------------------------------------------------
|
|
421
|
+
|
|
422
|
+
if [[ -n "$SOX_EFFECTS" ]] && command -v sox &>/dev/null; then
|
|
423
|
+
# SECURITY: Validate effects contain only safe characters (alphanumeric, spaces, dots, hyphens, underscores)
|
|
424
|
+
if [[ "$SOX_EFFECTS" =~ ^[a-zA-Z0-9\ ._-]+$ ]]; then
|
|
425
|
+
sox "$RAW_WAV" "$EFFECTS_WAV" $SOX_EFFECTS 2>/dev/null && PLAY_FILE="$EFFECTS_WAV"
|
|
426
|
+
else
|
|
427
|
+
log_message "WARN" "Rejected unsafe sox effects: ${SOX_EFFECTS:0:50}"
|
|
428
|
+
fi
|
|
429
|
+
fi
|
|
430
|
+
|
|
431
|
+
# ---------------------------------------------------------------------------
|
|
432
|
+
# Step 3: Mix background music (if configured)
|
|
433
|
+
# ---------------------------------------------------------------------------
|
|
434
|
+
|
|
435
|
+
if [[ -n "$BG_FILE" ]] && command -v ffmpeg &>/dev/null; then
|
|
436
|
+
BG_PATH="$TRACKS_DIR/$BG_FILE"
|
|
437
|
+
if [[ -f "$BG_PATH" ]]; then
|
|
438
|
+
DURATION=$(ffprobe -v error -show_entries format=duration \
|
|
439
|
+
-of default=noprint_wrappers=1:nokey=1 "$PLAY_FILE" 2>/dev/null || echo "")
|
|
440
|
+
if [[ -n "$DURATION" ]]; then
|
|
441
|
+
TOTAL_DUR=$(awk "BEGIN {printf \"%.2f\", $DURATION + 2}")
|
|
442
|
+
FADE_OUT=$(awk "BEGIN {printf \"%.2f\", $DURATION}")
|
|
443
|
+
timeout 20 ffmpeg -y -i "$PLAY_FILE" -stream_loop -1 -i "$BG_PATH" \
|
|
444
|
+
-filter_complex "[1:a]volume=${BG_VOLUME},afade=t=in:st=0:d=0.3,afade=t=out:st=${FADE_OUT}:d=2[bg];[0:a]adelay=2000|2000[v];[v][bg]amix=inputs=2:duration=longest[out]" \
|
|
445
|
+
-map "[out]" -t "$TOTAL_DUR" "$FINAL_WAV" </dev/null 2>/dev/null && PLAY_FILE="$FINAL_WAV"
|
|
446
|
+
fi
|
|
447
|
+
fi
|
|
448
|
+
fi
|
|
449
|
+
|
|
450
|
+
# ---------------------------------------------------------------------------
|
|
451
|
+
# Step 4: Play audio in foreground (required for SSH — no backgrounding)
|
|
452
|
+
# ---------------------------------------------------------------------------
|
|
453
|
+
|
|
454
|
+
if [[ -z "$AUDIO_PLAYER" ]]; then
|
|
455
|
+
log_message "ERROR" "No audio player found (pw-play, paplay, aplay)"
|
|
456
|
+
echo "Error: No audio player available" >&2
|
|
457
|
+
exit 1
|
|
458
|
+
fi
|
|
459
|
+
|
|
460
|
+
# Save master volume before playback — flat-volumes in PipeWire/PulseAudio
|
|
461
|
+
# can change master volume when a new stream connects from another user.
|
|
462
|
+
_saved_vol=""
|
|
463
|
+
if command -v pactl &>/dev/null; then
|
|
464
|
+
_saved_vol=$(pactl get-sink-volume @DEFAULT_SINK@ 2>/dev/null | grep -o '[0-9]*%' | head -1)
|
|
465
|
+
fi
|
|
466
|
+
|
|
467
|
+
log_message "PLAYING" "player=$AUDIO_PLAYER sink=${_default_sink:-unknown} vol=${_saved_vol:-?} pulse=${PULSE_SERVER:-unset}"
|
|
468
|
+
|
|
469
|
+
_play_err=$($AUDIO_PLAYER "${AUDIO_PLAYER_ARGS[@]}" "$PLAY_FILE" 2>&1) || {
|
|
470
|
+
log_message "ERROR" "Playback failed with $AUDIO_PLAYER: $_play_err"
|
|
471
|
+
echo "Error: Audio playback failed" >&2
|
|
472
|
+
echo "Detail: $_play_err" >&2
|
|
473
|
+
exit 1
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
# Restore master volume to what it was before playback
|
|
477
|
+
if [[ -n "$_saved_vol" ]] && command -v pactl &>/dev/null; then
|
|
478
|
+
pactl set-sink-volume @DEFAULT_SINK@ "$_saved_vol" 2>/dev/null || true
|
|
479
|
+
fi
|
|
480
|
+
|
|
481
|
+
log_message "DONE" ""
|
|
482
|
+
exit 0
|