agentvibes 5.2.1 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/config/audio-effects.cfg +1 -1
- package/.claude/hooks/play-tts-ssh-remote.sh +104 -10
- package/.claude/hooks/play-tts.sh +17 -6
- package/.claude/hooks-windows/bmad-party-speak.ps1 +5 -1
- package/.claude/hooks-windows/play-tts.ps1 +89 -57
- package/README.md +12 -2
- package/RELEASE_NOTES.md +92 -0
- package/package.json +1 -1
- package/src/console/tabs/setup-tab.js +9 -6
- package/src/console/tabs/voices-tab.js +9 -3
- package/src/services/llm-provider-service.js +13 -0
|
@@ -54,6 +54,6 @@ default||agent_vibes_chillwave_v2_loop.mp3|0.15
|
|
|
54
54
|
# Claude Code LLM default - pretext + music for remote receivers without per-LLM config|||
|
|
55
55
|
llm:default|reverb 20 50 50|agent_vibes_chillwave_v2_loop.mp3|0.15||AgentVibes here
|
|
56
56
|
analyst|reverb 70 50 100|agentvibes_soft_flamenco_loop.mp3|0.30
|
|
57
|
-
llm:claude-code|light|
|
|
57
|
+
llm:claude-code|light|agent_vibes_celtic_harp_v1_loop.mp3|0.15|en_US-libritts-high::Leo-7|Agent Vibes Local Here|piper
|
|
58
58
|
llm:copilot|light|agent_vibes_bossa_nova_v2_loop.mp3|0.15|en_US-libritts-high::Anna-11|Copilot here|piper
|
|
59
59
|
llm:codex|light|agent_vibes_chillwave_v2_loop.mp3|0.15|en_US-lessac-high|Codex here|piper
|
|
@@ -14,13 +14,63 @@
|
|
|
14
14
|
|
|
15
15
|
set -euo pipefail
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
# ---------------------------------------------------------------------------
|
|
18
|
+
# Argument parsing — supports both positional (backward compat) and flags.
|
|
19
|
+
#
|
|
20
|
+
# Positional (legacy):
|
|
21
|
+
# play-tts-ssh-remote.sh "text" [voice] [agent_name]
|
|
22
|
+
#
|
|
23
|
+
# Flag-based (full per-call control):
|
|
24
|
+
# play-tts-ssh-remote.sh --text "hello" --voice "en_US-ryan-high" \
|
|
25
|
+
# --pretext "Winston here" --music "track.mp3" \
|
|
26
|
+
# --volume 0.25 --effects "reverb 40 60 80" \
|
|
27
|
+
# --speed 1.1 --provider piper --agent "winston"
|
|
28
|
+
#
|
|
29
|
+
# Flags override config-file values. Any flag omitted → falls back to config.
|
|
30
|
+
# --pretext "" (empty string) explicitly suppresses pretext (no fallback).
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
TEXT=""
|
|
34
|
+
VOICE=""
|
|
35
|
+
AGENT_NAME=""
|
|
36
|
+
PRETEXT_OVERRIDE=""
|
|
37
|
+
PRETEXT_SET=0 # whether --pretext was explicitly provided
|
|
38
|
+
MUSIC_OVERRIDE=""
|
|
39
|
+
VOLUME_OVERRIDE=""
|
|
40
|
+
EFFECTS_OVERRIDE=""
|
|
41
|
+
SPEED_OVERRIDE=""
|
|
42
|
+
PROVIDER_OVERRIDE=""
|
|
43
|
+
|
|
44
|
+
# Detect flag-based vs positional usage: if first arg starts with --, use flags.
|
|
45
|
+
if [[ "${1:-}" == --* ]]; then
|
|
46
|
+
while [[ $# -gt 0 ]]; do
|
|
47
|
+
case "$1" in
|
|
48
|
+
--text) TEXT="${2:-}"; shift 2 ;;
|
|
49
|
+
--voice) VOICE="${2:-}"; shift 2 ;;
|
|
50
|
+
--agent) AGENT_NAME="${2:-}"; shift 2 ;;
|
|
51
|
+
--pretext) PRETEXT_OVERRIDE="${2:-}"; PRETEXT_SET=1; shift 2 ;;
|
|
52
|
+
--music) MUSIC_OVERRIDE="${2:-}"; shift 2 ;;
|
|
53
|
+
--volume) VOLUME_OVERRIDE="${2:-}"; shift 2 ;;
|
|
54
|
+
--effects) EFFECTS_OVERRIDE="${2:-}"; shift 2 ;;
|
|
55
|
+
--speed) SPEED_OVERRIDE="${2:-}"; shift 2 ;;
|
|
56
|
+
--provider) PROVIDER_OVERRIDE="${2:-}"; shift 2 ;;
|
|
57
|
+
*) echo "Unknown flag: $1" >&2; exit 1 ;;
|
|
58
|
+
esac
|
|
59
|
+
done
|
|
60
|
+
else
|
|
61
|
+
TEXT="${1:-}"
|
|
62
|
+
VOICE="${2:-}"
|
|
63
|
+
AGENT_NAME="${3:-}"
|
|
64
|
+
fi
|
|
65
|
+
|
|
66
|
+
# Defaults for still-empty values
|
|
67
|
+
VOICE="${VOICE:-en_US-lessac-medium}"
|
|
68
|
+
AGENT_NAME="${AGENT_NAME:-default}"
|
|
20
69
|
|
|
21
70
|
# Validate required input
|
|
22
71
|
if [[ -z "$TEXT" ]]; then
|
|
23
|
-
echo "Usage: $0 <text> [voice] [agent_name]" >&2
|
|
72
|
+
echo "Usage (positional): $0 <text> [voice] [agent_name]" >&2
|
|
73
|
+
echo "Usage (flags): $0 --text <text> [--voice X --pretext Y --music Z --volume N --effects E --speed S --provider P --agent A]" >&2
|
|
24
74
|
exit 1
|
|
25
75
|
fi
|
|
26
76
|
|
|
@@ -115,6 +165,49 @@ case "${PROVIDER:-}" in
|
|
|
115
165
|
*) PROVIDER="piper" ;;
|
|
116
166
|
esac
|
|
117
167
|
|
|
168
|
+
# ---------------------------------------------------------------------------
|
|
169
|
+
# Apply CLI flag overrides (flags win over config files)
|
|
170
|
+
# ---------------------------------------------------------------------------
|
|
171
|
+
# Validate effects (prevent injection — only alphanumeric, spaces, dots, hyphens)
|
|
172
|
+
if [[ -n "$EFFECTS_OVERRIDE" ]]; then
|
|
173
|
+
if [[ "$EFFECTS_OVERRIDE" =~ ^[a-zA-Z0-9\ ._-]+$ ]]; then
|
|
174
|
+
SOX_EFFECTS="$EFFECTS_OVERRIDE"
|
|
175
|
+
else
|
|
176
|
+
echo "Invalid effects format: $EFFECTS_OVERRIDE (alphanumeric/space/.-_ only)" >&2
|
|
177
|
+
exit 1
|
|
178
|
+
fi
|
|
179
|
+
fi
|
|
180
|
+
[[ -n "$MUSIC_OVERRIDE" ]] && BG_FILE="$MUSIC_OVERRIDE"
|
|
181
|
+
[[ -n "$VOLUME_OVERRIDE" ]] && BG_VOLUME="$VOLUME_OVERRIDE"
|
|
182
|
+
[[ -n "$SPEED_OVERRIDE" ]] && SPEED="$SPEED_OVERRIDE"
|
|
183
|
+
[[ -n "$PROVIDER_OVERRIDE" ]] && PROVIDER="$PROVIDER_OVERRIDE"
|
|
184
|
+
# Pretext: explicit --pretext wins even when empty string (suppresses pretext)
|
|
185
|
+
if [[ "$PRETEXT_SET" == "1" ]]; then
|
|
186
|
+
PRETEXT="$PRETEXT_OVERRIDE"
|
|
187
|
+
fi
|
|
188
|
+
|
|
189
|
+
# Re-validate provider after override (in case user passed bad value)
|
|
190
|
+
case "${PROVIDER:-}" in
|
|
191
|
+
piper|soprano|macos|windows-sapi) ;;
|
|
192
|
+
*) PROVIDER="piper" ;;
|
|
193
|
+
esac
|
|
194
|
+
# Validate music filename (prevent path injection through JSON → receiver)
|
|
195
|
+
# Allows spaces in track names like "Late Night Hip Hop Groove.mp3"
|
|
196
|
+
if [[ -n "$BG_FILE" && ! "$BG_FILE" =~ ^[a-zA-Z0-9_\.\ -]+$ ]]; then
|
|
197
|
+
echo "Invalid music filename format: $BG_FILE (alphanumeric/space/.-_ only)" >&2
|
|
198
|
+
exit 1
|
|
199
|
+
fi
|
|
200
|
+
# Validate volume
|
|
201
|
+
if [[ -n "$BG_VOLUME" && ! "$BG_VOLUME" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
|
|
202
|
+
echo "Invalid volume: $BG_VOLUME (numeric only)" >&2
|
|
203
|
+
exit 1
|
|
204
|
+
fi
|
|
205
|
+
# Validate speed
|
|
206
|
+
if [[ -n "$SPEED" && ! "$SPEED" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
|
|
207
|
+
echo "Invalid speed: $SPEED (numeric only)" >&2
|
|
208
|
+
exit 1
|
|
209
|
+
fi
|
|
210
|
+
|
|
118
211
|
# ---------------------------------------------------------------------------
|
|
119
212
|
# Build JSON payload
|
|
120
213
|
# ---------------------------------------------------------------------------
|
|
@@ -134,13 +227,14 @@ build_json_payload() {
|
|
|
134
227
|
--arg provider "$PROVIDER" \
|
|
135
228
|
'{text: $text, voice: $voice, effects: $effects, music: $music, volume: $volume, project: $project, pretext: $pretext, speed: $speed, provider: $provider}'
|
|
136
229
|
else
|
|
137
|
-
# Manual JSON — escape
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
230
|
+
# Manual JSON — escape all interpolated fields (backslash, double-quote, newline, tab)
|
|
231
|
+
_esc() { printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\t/\\t/g' | tr '\n' ' '; }
|
|
232
|
+
local e_text e_voice e_effects e_music e_vol e_proj e_pre e_spd e_prov
|
|
233
|
+
e_text=$(_esc "$TEXT"); e_voice=$(_esc "$VOICE"); e_effects=$(_esc "$SOX_EFFECTS")
|
|
234
|
+
e_music=$(_esc "$BG_FILE"); e_vol=$(_esc "$BG_VOLUME"); e_proj=$(_esc "$PROJECT_NAME")
|
|
235
|
+
e_pre=$(_esc "$PRETEXT"); e_spd=$(_esc "$SPEED"); e_prov=$(_esc "$PROVIDER")
|
|
142
236
|
printf '{"text":"%s","voice":"%s","effects":"%s","music":"%s","volume":"%s","project":"%s","pretext":"%s","speed":"%s","provider":"%s"}' \
|
|
143
|
-
"$
|
|
237
|
+
"$e_text" "$e_voice" "$e_effects" "$e_music" "$e_vol" "$e_proj" "$e_pre" "$e_spd" "$e_prov"
|
|
144
238
|
fi
|
|
145
239
|
}
|
|
146
240
|
|
|
@@ -200,12 +200,23 @@ fi
|
|
|
200
200
|
# Source provider manager to get active provider
|
|
201
201
|
source "$SCRIPT_DIR/provider-manager.sh"
|
|
202
202
|
|
|
203
|
-
# Get active provider
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
203
|
+
# Get active provider.
|
|
204
|
+
# Per-LLM engine (from audio-effects.cfg `llm:<key>` row column 7) overrides
|
|
205
|
+
# the global tts-provider.txt — UNLESS the global is a transport provider
|
|
206
|
+
# (ssh-remote, agentvibes-receiver, termux-ssh). Transport providers
|
|
207
|
+
# forward TTS to a remote receiver which picks its OWN engine; overriding
|
|
208
|
+
# them with a local engine like piper would synthesize on the wrong host.
|
|
209
|
+
ACTIVE_PROVIDER=$(get_active_provider)
|
|
210
|
+
case "$ACTIVE_PROVIDER" in
|
|
211
|
+
ssh-remote|agentvibes-receiver|termux-ssh)
|
|
212
|
+
# Transport — keep it. The receiver's audio-effects.cfg picks the engine.
|
|
213
|
+
;;
|
|
214
|
+
*)
|
|
215
|
+
if [[ -n "$_LLM_ENGINE" ]]; then
|
|
216
|
+
ACTIVE_PROVIDER="$_LLM_ENGINE"
|
|
217
|
+
fi
|
|
218
|
+
;;
|
|
219
|
+
esac
|
|
209
220
|
|
|
210
221
|
# Show GitHub star reminder (once per day)
|
|
211
222
|
bash "$SCRIPT_DIR/github-star-reminder.sh" 2>/dev/null || true
|
|
@@ -242,7 +242,11 @@ try {
|
|
|
242
242
|
try {
|
|
243
243
|
# WaitOne throws AbandonedMutexException if prior process crashed while holding it.
|
|
244
244
|
# That exception means we DID acquire the mutex — treat it as success (fixes M2).
|
|
245
|
-
|
|
245
|
+
# 600s timeout covers worst-case party-mode queue depth (9 agents x
|
|
246
|
+
# ~60s of speech each). Shorter timeouts silently dropped the last
|
|
247
|
+
# agents to speak. play-tts.ps1 bounds true stuck processes via its
|
|
248
|
+
# playback watchdog, so waiting longer here is safe.
|
|
249
|
+
$acquired = $mutex.WaitOne(600000)
|
|
246
250
|
} catch [System.Threading.AbandonedMutexException] {
|
|
247
251
|
$acquired = $true # abandoned = we now own it
|
|
248
252
|
}
|
|
@@ -16,6 +16,20 @@ param(
|
|
|
16
16
|
[string]$llm = ""
|
|
17
17
|
)
|
|
18
18
|
|
|
19
|
+
# Text-file handoff: Windows command-line arg passing mangles text with
|
|
20
|
+
# quotes, newlines, or non-ASCII characters. The SSH receiver watcher
|
|
21
|
+
# (setup-ssh-receiver.ps1) writes long/special-char text to a UTF-8 temp
|
|
22
|
+
# file and passes the sentinel "__from_file__" + AGENTVIBES_TEXT_FILE env
|
|
23
|
+
# var. Load the real text here before any validation or synthesis.
|
|
24
|
+
if ($Text -eq "__from_file__" -and $env:AGENTVIBES_TEXT_FILE) {
|
|
25
|
+
if (Test-Path $env:AGENTVIBES_TEXT_FILE) {
|
|
26
|
+
$Text = [System.IO.File]::ReadAllText($env:AGENTVIBES_TEXT_FILE, [System.Text.UTF8Encoding]::new($false))
|
|
27
|
+
} else {
|
|
28
|
+
Write-Error "AGENTVIBES_TEXT_FILE set to missing path: $($env:AGENTVIBES_TEXT_FILE)"
|
|
29
|
+
exit 1
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
19
33
|
# Security: Validate LLM provider name (alphanumeric, hyphens, underscores
|
|
20
34
|
# only) -- mirrors play-tts.sh line 92. This prevents weird values from
|
|
21
35
|
# poisoning the audio-effects.cfg lookup or the AGENTVIBES_LLM_KEY env var
|
|
@@ -53,63 +67,42 @@ if (-not $llm) {
|
|
|
53
67
|
# process's playback.
|
|
54
68
|
$_PlaybackMutex = New-Object System.Threading.Mutex($false, "AgentVibesPlaybackLock")
|
|
55
69
|
|
|
56
|
-
# ---
|
|
57
|
-
# If
|
|
58
|
-
#
|
|
59
|
-
# and force-kills this process. Without this, a stuck
|
|
60
|
-
# the playback mutex forever and silently blocks every
|
|
61
|
-
# call across all LLMs.
|
|
70
|
+
# --- Playback watchdog ---
|
|
71
|
+
# If playback itself hangs (SoundPlayer deadlock, audio device locked,
|
|
72
|
+
# etc.), a sibling PowerShell job waits 120 seconds from the moment
|
|
73
|
+
# playback STARTS and force-kills this process. Without this, a stuck
|
|
74
|
+
# play-tts.ps1 holds the playback mutex forever and silently blocks every
|
|
75
|
+
# subsequent TTS call across all LLMs.
|
|
62
76
|
#
|
|
63
|
-
#
|
|
64
|
-
#
|
|
65
|
-
#
|
|
66
|
-
#
|
|
67
|
-
#
|
|
68
|
-
#
|
|
69
|
-
# kills playback mid-sentence, which is acceptable degradation vs. a
|
|
70
|
-
# deadlocked queue.
|
|
71
|
-
$_WatchdogJob = $null
|
|
72
|
-
try {
|
|
73
|
-
$_WatchdogJob = Start-Job -ArgumentList $PID -ScriptBlock {
|
|
74
|
-
param($parentPid)
|
|
75
|
-
Start-Sleep -Seconds 25
|
|
76
|
-
try {
|
|
77
|
-
# Only kill if still alive -- harmless if already exited
|
|
78
|
-
$p = Get-Process -Id $parentPid -ErrorAction SilentlyContinue
|
|
79
|
-
if ($p) {
|
|
80
|
-
[Console]::Error.WriteLine("[AgentVibes] play-tts.ps1 watchdog fired -- force-killing pid $parentPid after 25s")
|
|
81
|
-
Stop-Process -Id $parentPid -Force -ErrorAction SilentlyContinue
|
|
82
|
-
}
|
|
83
|
-
} catch { }
|
|
84
|
-
}
|
|
85
|
-
} catch {
|
|
86
|
-
# If Start-Job fails (rare), just continue without the watchdog -- no
|
|
87
|
-
# regression from pre-watchdog behavior.
|
|
88
|
-
$_WatchdogJob = $null
|
|
89
|
-
}
|
|
77
|
+
# IMPORTANT: the watchdog is started AFTER mutex acquisition (inside
|
|
78
|
+
# Invoke-SerializedPlay), not at script entry. Starting it at script
|
|
79
|
+
# entry caused round-robin / party-mode cut-offs: when 9 agents fire
|
|
80
|
+
# text_to_speech in quick succession, later calls spend most of their
|
|
81
|
+
# 120s budget waiting for the mutex, then get killed mid-playback.
|
|
82
|
+
# The mutex WaitOne() bounds queue waiting separately.
|
|
90
83
|
|
|
91
84
|
function Invoke-SerializedPlay {
|
|
92
85
|
param([Parameter(Mandatory)][string]$WavPath)
|
|
93
86
|
$acquired = $false
|
|
87
|
+
$watchdogJob = $null
|
|
94
88
|
try {
|
|
95
89
|
try {
|
|
96
|
-
#
|
|
97
|
-
#
|
|
98
|
-
#
|
|
99
|
-
#
|
|
100
|
-
$acquired = $_PlaybackMutex.WaitOne(
|
|
90
|
+
# 600s timeout to acquire the playback mutex. Covers worst-case
|
|
91
|
+
# queue depth (round-robin with 9 agents x ~60s of playback each).
|
|
92
|
+
# AbandonedMutexException means the holder's process actually
|
|
93
|
+
# died -- we inherit ownership.
|
|
94
|
+
$acquired = $_PlaybackMutex.WaitOne(600000)
|
|
101
95
|
} catch [System.Threading.AbandonedMutexException] {
|
|
102
96
|
$acquired = $true
|
|
103
97
|
}
|
|
104
98
|
if (-not $acquired) {
|
|
105
99
|
# Self-heal: kill any stuck play-tts.ps1 processes (other than
|
|
106
|
-
# ourselves) that have been alive longer than
|
|
107
|
-
#
|
|
108
|
-
#
|
|
109
|
-
# this call's audio is lost, but the queue recovers immediately.
|
|
100
|
+
# ourselves) that have been alive longer than 10 minutes. Past
|
|
101
|
+
# any legitimate playback window, so only truly stuck processes
|
|
102
|
+
# get killed.
|
|
110
103
|
try {
|
|
111
104
|
$myPid = $PID
|
|
112
|
-
$cutoff = (Get-Date).AddSeconds(-
|
|
105
|
+
$cutoff = (Get-Date).AddSeconds(-600)
|
|
113
106
|
$stuck = Get-CimInstance Win32_Process -ErrorAction SilentlyContinue |
|
|
114
107
|
Where-Object {
|
|
115
108
|
$_.Name -eq 'powershell.exe' -and
|
|
@@ -122,9 +115,28 @@ function Invoke-SerializedPlay {
|
|
|
122
115
|
Stop-Process -Id $p.ProcessId -Force -ErrorAction SilentlyContinue
|
|
123
116
|
}
|
|
124
117
|
} catch { }
|
|
125
|
-
[Console]::Error.WriteLine("[AgentVibes] ERROR: play-tts.ps1 could not acquire playback mutex within
|
|
118
|
+
[Console]::Error.WriteLine("[AgentVibes] ERROR: play-tts.ps1 could not acquire playback mutex within 600s. A prior play-tts.ps1 process was stuck holding it and has been killed; the next TTS call should succeed.")
|
|
126
119
|
exit 2
|
|
127
120
|
}
|
|
121
|
+
|
|
122
|
+
# Start the watchdog NOW (after mutex acquisition) so its 120s
|
|
123
|
+
# budget covers only the playback itself, not time spent queued.
|
|
124
|
+
try {
|
|
125
|
+
$watchdogJob = Start-Job -ArgumentList $PID -ScriptBlock {
|
|
126
|
+
param($parentPid)
|
|
127
|
+
Start-Sleep -Seconds 120
|
|
128
|
+
try {
|
|
129
|
+
$p = Get-Process -Id $parentPid -ErrorAction SilentlyContinue
|
|
130
|
+
if ($p) {
|
|
131
|
+
[Console]::Error.WriteLine("[AgentVibes] play-tts.ps1 playback watchdog fired -- force-killing pid $parentPid after 120s of playback")
|
|
132
|
+
Stop-Process -Id $parentPid -Force -ErrorAction SilentlyContinue
|
|
133
|
+
}
|
|
134
|
+
} catch { }
|
|
135
|
+
}
|
|
136
|
+
} catch {
|
|
137
|
+
$watchdogJob = $null
|
|
138
|
+
}
|
|
139
|
+
|
|
128
140
|
$player = $null
|
|
129
141
|
try {
|
|
130
142
|
$player = New-Object System.Media.SoundPlayer $WavPath
|
|
@@ -133,23 +145,18 @@ function Invoke-SerializedPlay {
|
|
|
133
145
|
if ($player) { $player.Dispose() }
|
|
134
146
|
}
|
|
135
147
|
} finally {
|
|
148
|
+
if ($watchdogJob) {
|
|
149
|
+
try {
|
|
150
|
+
Stop-Job -Job $watchdogJob -ErrorAction SilentlyContinue
|
|
151
|
+
Remove-Job -Job $watchdogJob -Force -ErrorAction SilentlyContinue
|
|
152
|
+
} catch { }
|
|
153
|
+
}
|
|
136
154
|
if ($acquired) {
|
|
137
155
|
try { $_PlaybackMutex.ReleaseMutex() } catch { }
|
|
138
156
|
}
|
|
139
157
|
}
|
|
140
158
|
}
|
|
141
159
|
|
|
142
|
-
# Register an exit handler that stops the watchdog job on normal exit so
|
|
143
|
-
# it doesn't fire on successful short runs.
|
|
144
|
-
Register-EngineEvent -SourceIdentifier PowerShell.Exiting -Action {
|
|
145
|
-
try {
|
|
146
|
-
if ($_WatchdogJob) {
|
|
147
|
-
Stop-Job -Job $_WatchdogJob -ErrorAction SilentlyContinue
|
|
148
|
-
Remove-Job -Job $_WatchdogJob -Force -ErrorAction SilentlyContinue
|
|
149
|
-
}
|
|
150
|
-
} catch { }
|
|
151
|
-
} | Out-Null
|
|
152
|
-
|
|
153
160
|
# Configuration paths
|
|
154
161
|
# Priority: CLAUDE_PROJECT_DIR env var -> script's parent project -> user profile
|
|
155
162
|
# Local project settings ALWAYS override global (~/.claude)
|
|
@@ -249,11 +256,26 @@ if ($llm) {
|
|
|
249
256
|
$env:AGENTVIBES_LLM_KEY = "llm:$llm"
|
|
250
257
|
}
|
|
251
258
|
|
|
259
|
+
# ---------------------------------------------------------------------------
|
|
260
|
+
# Per-call env-var overrides (set by the SSH watcher from queue JSON).
|
|
261
|
+
# These win over audio-effects.cfg lookup results for this call only.
|
|
262
|
+
# ---------------------------------------------------------------------------
|
|
263
|
+
if ($env:AGENTVIBES_OVERRIDE_MUSIC) { $LlmBgTrack = $env:AGENTVIBES_OVERRIDE_MUSIC }
|
|
264
|
+
if ($env:AGENTVIBES_OVERRIDE_VOLUME) { $LlmBgVolume = $env:AGENTVIBES_OVERRIDE_VOLUME }
|
|
265
|
+
if ($env:AGENTVIBES_OVERRIDE_EFFECTS) { $LlmReverb = $env:AGENTVIBES_OVERRIDE_EFFECTS }
|
|
266
|
+
|
|
252
267
|
# Prepend pretext if configured
|
|
253
268
|
# Priority: LLM-specific pretext -> project .agentvibes/config.json -> project .claude/config/tts-pretext.txt
|
|
254
269
|
# -> global ~/.agentvibes/config.json -> global ~/.claude/config/tts-pretext.txt
|
|
255
|
-
|
|
256
|
-
|
|
270
|
+
#
|
|
271
|
+
# Honor AGENTVIBES_NO_PRETEXT=1 for callers that already prepended a pretext
|
|
272
|
+
# (e.g., the SSH receiver watcher — server already added its own pretext
|
|
273
|
+
# before sending; double-prepending here would say "AgentVibes here, server-pretext, message").
|
|
274
|
+
$Pretext = ""
|
|
275
|
+
if ($env:AGENTVIBES_NO_PRETEXT -ne "1") {
|
|
276
|
+
$Pretext = $LlmPretext
|
|
277
|
+
}
|
|
278
|
+
if (-not $Pretext -and $env:AGENTVIBES_NO_PRETEXT -ne "1") {
|
|
257
279
|
$PretextSources = @(
|
|
258
280
|
(Join-Path $ProjectRoot ".agentvibes\config.json"),
|
|
259
281
|
"$ClaudeDir\config\tts-pretext.txt",
|
|
@@ -569,6 +591,16 @@ if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
|
|
|
569
591
|
$DefaultTrack = "agent_vibes_celtic_harp_v1_loop.mp3"
|
|
570
592
|
}
|
|
571
593
|
|
|
594
|
+
# Per-call env-var overrides (set by SSH watcher from queue JSON).
|
|
595
|
+
# Win over audio-effects.cfg lookup above. Validate filename to
|
|
596
|
+
# prevent path traversal before accepting.
|
|
597
|
+
if ($env:AGENTVIBES_OVERRIDE_MUSIC -and $env:AGENTVIBES_OVERRIDE_MUSIC -match '^[a-zA-Z0-9_\-\. ]+$') {
|
|
598
|
+
$DefaultTrack = $env:AGENTVIBES_OVERRIDE_MUSIC
|
|
599
|
+
}
|
|
600
|
+
if ($env:AGENTVIBES_OVERRIDE_VOLUME -and $env:AGENTVIBES_OVERRIDE_VOLUME -match '^\d+\.?\d*$') {
|
|
601
|
+
$BgVolume = $env:AGENTVIBES_OVERRIDE_VOLUME
|
|
602
|
+
}
|
|
603
|
+
|
|
572
604
|
$BgTrackPath = Join-Path $TracksDir $DefaultTrack
|
|
573
605
|
# Path containment: verify resolved path stays within tracks directory
|
|
574
606
|
$ResolvedBgTrack = [System.IO.Path]::GetFullPath($BgTrackPath)
|
package/README.md
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
[](https://github.com/paulpreibisch/AgentVibes/actions/workflows/publish.yml)
|
|
12
12
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
13
13
|
|
|
14
|
-
**Author**: Paul Preibisch ([@997Fire](https://x.com/997Fire)) | **Version**: v5.
|
|
14
|
+
**Author**: Paul Preibisch ([@997Fire](https://x.com/997Fire)) | **Version**: v5.3.0
|
|
15
15
|
|
|
16
16
|
---
|
|
17
17
|
|
|
@@ -43,7 +43,17 @@ Whether you're using Claude Code, GitHub Copilot, OpenAI Codex, Claude Desktop,
|
|
|
43
43
|
|
|
44
44
|
---
|
|
45
45
|
|
|
46
|
-
## 🎯 NEW IN v5.
|
|
46
|
+
## 🎯 NEW IN v5.3.0 — Take Control of Remote Voices
|
|
47
|
+
|
|
48
|
+
- **Customize every remote announcement individually** — pass `--voice`, `--pretext`, `--music`, `--volume`, `--effects`, `--speed`, `--provider` on the command line for just that one message. No more editing config files and changing them back.
|
|
49
|
+
- **Skip the intro phrase on demand** — `--pretext ""` suppresses the pretext for a single message.
|
|
50
|
+
- **Long messages and special characters work correctly on Windows** — text with quotes, apostrophes, emoji, or multi-line content no longer gets truncated on its way to the voice engine.
|
|
51
|
+
- **Voice playback works on Windows servers with no monitor** — a background helper runs in your user session and picks up announcements from a queue, so audio plays even when SSH'ing in headless.
|
|
52
|
+
- **Voice preview on remote servers streams to the right device** — TUI preview no longer falls back to local audio on machines without speakers.
|
|
53
|
+
- **No more double intro phrases** when both sender and receiver have pretext configured.
|
|
54
|
+
- **55 new tests** for BMAD party mode voice assignment and agent isolation.
|
|
55
|
+
|
|
56
|
+
## 🎯 v5.2.1 — Multi-LLM Identity & Install Polish
|
|
47
57
|
|
|
48
58
|
- **Copilot gets its own voice + pretext + music** — "Copilot here" with bossa nova, fully distinct from Claude Code and Codex.
|
|
49
59
|
- **Per-tool MCP configs with explicit identity** — `.vscode/mcp.json`, `.codex/config.toml`, `~/.copilot/mcp-config.json` each set their own `AGENTVIBES_LLM`.
|
package/RELEASE_NOTES.md
CHANGED
|
@@ -1,5 +1,97 @@
|
|
|
1
1
|
# AgentVibes Release Notes
|
|
2
2
|
|
|
3
|
+
## 🎯 v5.3.0 — Take Control of Remote Voices
|
|
4
|
+
|
|
5
|
+
**Release Date:** April 2026
|
|
6
|
+
|
|
7
|
+
If you're using AgentVibes to send voice announcements from a server to
|
|
8
|
+
your phone, laptop, or another machine, this release puts you in the
|
|
9
|
+
driver's seat. Every call can now pick its own voice, background music,
|
|
10
|
+
intro phrase, reverb, volume, and speed — right from the command line,
|
|
11
|
+
for just that one message.
|
|
12
|
+
|
|
13
|
+
### ✨ What's New
|
|
14
|
+
|
|
15
|
+
#### You can now customize every announcement individually
|
|
16
|
+
|
|
17
|
+
Before, if you wanted a different voice or music for one specific
|
|
18
|
+
message, you had to change a config file (and remember to change it
|
|
19
|
+
back). Now you just add a flag to the command.
|
|
20
|
+
|
|
21
|
+
Want Winston to speak in his British accent with jazz playing for this
|
|
22
|
+
one deploy notification? Easy:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
bash .claude/hooks/play-tts-ssh-remote.sh \
|
|
26
|
+
--text "Deploy complete" \
|
|
27
|
+
--voice "en_US-ryan-high" \
|
|
28
|
+
--pretext "Winston here" \
|
|
29
|
+
--music "Late Night Hip Hop Groove.mp3" \
|
|
30
|
+
--volume 0.25
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Anything you don't specify falls back to your normal settings. Want to
|
|
34
|
+
skip the intro phrase just this once? Pass `--pretext ""` and it stays
|
|
35
|
+
silent before the message.
|
|
36
|
+
|
|
37
|
+
**Available flags:**
|
|
38
|
+
- `--voice` — which Piper voice to use
|
|
39
|
+
- `--pretext` — the intro phrase before the message (pass `""` to skip it)
|
|
40
|
+
- `--music` — background music track (filenames with spaces now work!)
|
|
41
|
+
- `--volume` — how loud the background music is (0.0 to 1.0)
|
|
42
|
+
- `--effects` — sound effects chain like reverb
|
|
43
|
+
- `--speed` — how fast the voice speaks
|
|
44
|
+
- `--provider` — which TTS engine to use
|
|
45
|
+
- `--agent` — which agent personality to use
|
|
46
|
+
|
|
47
|
+
The old way of calling the script still works, so nothing you've already
|
|
48
|
+
set up will break.
|
|
49
|
+
|
|
50
|
+
### 🛠 Reliability Fixes
|
|
51
|
+
|
|
52
|
+
- **Long messages and special characters no longer get cut off.** On
|
|
53
|
+
Windows, long announcements or text with quotes, apostrophes, or
|
|
54
|
+
emoji were getting mangled before they reached the voice engine.
|
|
55
|
+
Fixed — your message now arrives exactly as you sent it, no matter
|
|
56
|
+
how long or weird.
|
|
57
|
+
|
|
58
|
+
- **Voice announcements now work on Windows servers with no monitor.**
|
|
59
|
+
Windows refuses to play audio in the "service" session that SSH
|
|
60
|
+
normally uses. A small background helper now runs in your regular
|
|
61
|
+
user session and picks up announcements from a queue, so audio plays
|
|
62
|
+
correctly even on headless servers.
|
|
63
|
+
|
|
64
|
+
- **Voice preview in the TUI works on remote servers.** Before, if you
|
|
65
|
+
previewed a voice from a server with no speakers, it would try to
|
|
66
|
+
play locally (and fail). Now it correctly streams to whatever remote
|
|
67
|
+
device you've configured.
|
|
68
|
+
|
|
69
|
+
- **No more double intro phrases.** If you set a pretext on both the
|
|
70
|
+
sending server and the receiving machine, you used to hear it twice.
|
|
71
|
+
The sender's version wins now — the receiver won't add its own on top.
|
|
72
|
+
|
|
73
|
+
- **Remote streaming settings now actually stick.** A recent change
|
|
74
|
+
accidentally caused remote-streaming setups (`ssh-remote`,
|
|
75
|
+
`agentvibes-receiver`) to get overridden and fall back to local
|
|
76
|
+
playback. Fixed.
|
|
77
|
+
|
|
78
|
+
- **Long announcements don't get killed mid-sentence.** The safety
|
|
79
|
+
timeout that stops stuck audio was too aggressive for long messages.
|
|
80
|
+
It's now generous enough to handle paragraph-length announcements.
|
|
81
|
+
|
|
82
|
+
- **Cleaner installer state** — when you install AgentVibes for Claude
|
|
83
|
+
Code, it now writes its TTS provider file explicitly instead of
|
|
84
|
+
relying on implicit state.
|
|
85
|
+
|
|
86
|
+
### 🧪 Testing
|
|
87
|
+
|
|
88
|
+
55 new tests make sure BMAD party mode keeps working: each agent gets
|
|
89
|
+
their unique voice and music, agents don't accidentally share the same
|
|
90
|
+
Piper speaker ID, and the installer always points party mode at the
|
|
91
|
+
cross-platform entry point.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
3
95
|
## 🎯 v5.2.1 — Multi-LLM Identity & Install Polish
|
|
4
96
|
|
|
5
97
|
**Release Date:** April 2026
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://json.schemastore.org/package.json",
|
|
3
3
|
"name": "agentvibes",
|
|
4
|
-
"version": "5.
|
|
4
|
+
"version": "5.3.0",
|
|
5
5
|
"description": "Now your AI Agents can finally talk back! Professional TTS voice for Claude Code, Claude Desktop (via MCP), and Clawdbot with multi-provider support.",
|
|
6
6
|
"homepage": "https://agentvibes.org",
|
|
7
7
|
"keywords": [
|
|
@@ -1278,29 +1278,32 @@ export function createSetupTab(screen, services) {
|
|
|
1278
1278
|
const phrase = SAMPLE_PHRASES[Math.floor(Math.random() * SAMPLE_PHRASES.length)];
|
|
1279
1279
|
|
|
1280
1280
|
// Route through remote provider if active
|
|
1281
|
+
// Search order: CLAUDE_PROJECT_DIR → cwd → package root → home
|
|
1281
1282
|
const _remoteProviders = ['ssh-remote', 'agentvibes-receiver'];
|
|
1282
1283
|
let _activeProvider = '';
|
|
1283
1284
|
try {
|
|
1284
|
-
const
|
|
1285
|
+
const _pkgRoot = path.resolve(__dirname, '..', '..');
|
|
1285
1286
|
const _provPaths = [
|
|
1286
|
-
path.join(
|
|
1287
|
+
process.env.CLAUDE_PROJECT_DIR && path.join(process.env.CLAUDE_PROJECT_DIR, '.claude', 'tts-provider.txt'),
|
|
1288
|
+
path.join(process.cwd(), '.claude', 'tts-provider.txt'),
|
|
1289
|
+
path.join(_pkgRoot, '.claude', 'tts-provider.txt'),
|
|
1287
1290
|
path.join(os.homedir(), '.claude', 'tts-provider.txt'),
|
|
1288
|
-
];
|
|
1291
|
+
].filter(Boolean);
|
|
1289
1292
|
for (const p of _provPaths) {
|
|
1290
1293
|
if (fs.existsSync(p)) { _activeProvider = fs.readFileSync(p, 'utf8').trim(); break; }
|
|
1291
1294
|
}
|
|
1292
1295
|
} catch {}
|
|
1293
1296
|
|
|
1294
1297
|
if (_remoteProviders.includes(_activeProvider)) {
|
|
1295
|
-
const
|
|
1298
|
+
const _hooksBase = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
1296
1299
|
let rProc;
|
|
1297
1300
|
if (_isWin) {
|
|
1298
|
-
const _playTts = path.join(
|
|
1301
|
+
const _playTts = path.join(_hooksBase, '.claude', 'hooks-windows', 'play-tts.ps1');
|
|
1299
1302
|
rProc = spawn('powershell', ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', _playTts, phrase, voiceId], {
|
|
1300
1303
|
stdio: 'ignore', detached: false, windowsHide: true, env: _spawnEnv,
|
|
1301
1304
|
});
|
|
1302
1305
|
} else {
|
|
1303
|
-
const _playTts = path.join(
|
|
1306
|
+
const _playTts = path.join(_hooksBase, '.claude', 'hooks', 'play-tts.sh');
|
|
1304
1307
|
rProc = spawn('bash', [_playTts, phrase, voiceId], {
|
|
1305
1308
|
stdio: 'ignore', detached: true, env: _spawnEnv,
|
|
1306
1309
|
});
|
|
@@ -868,14 +868,17 @@ export function createVoicesTab(screen, services) {
|
|
|
868
868
|
_playingVoiceId = null;
|
|
869
869
|
|
|
870
870
|
// Check if we should route through remote provider (ssh-remote / agentvibes-receiver)
|
|
871
|
+
// Search order: CLAUDE_PROJECT_DIR (actual project) → cwd → package root → home
|
|
871
872
|
const projectRoot = path.resolve(__dirname, '..', '..');
|
|
872
873
|
const remoteProviders = ['ssh-remote', 'agentvibes-receiver'];
|
|
873
874
|
let activeProvider = '';
|
|
874
875
|
try {
|
|
875
876
|
const providerPaths = [
|
|
877
|
+
process.env.CLAUDE_PROJECT_DIR && path.join(process.env.CLAUDE_PROJECT_DIR, '.claude', 'tts-provider.txt'),
|
|
878
|
+
path.join(process.cwd(), '.claude', 'tts-provider.txt'),
|
|
876
879
|
path.join(projectRoot, '.claude', 'tts-provider.txt'),
|
|
877
880
|
path.join(os.homedir(), '.claude', 'tts-provider.txt'),
|
|
878
|
-
];
|
|
881
|
+
].filter(Boolean);
|
|
879
882
|
for (const p of providerPaths) {
|
|
880
883
|
if (fs.existsSync(p)) { activeProvider = fs.readFileSync(p, 'utf8').trim(); break; }
|
|
881
884
|
}
|
|
@@ -884,14 +887,17 @@ export function createVoicesTab(screen, services) {
|
|
|
884
887
|
if (remoteProviders.includes(activeProvider)) {
|
|
885
888
|
const isWindows = process.platform === 'win32' && !process.env.WSL_DISTRO_NAME;
|
|
886
889
|
const phrase = SAMPLE_PHRASES[Math.floor(Math.random() * SAMPLE_PHRASES.length)];
|
|
890
|
+
// Resolve play-tts from the actual project (CLAUDE_PROJECT_DIR / cwd),
|
|
891
|
+
// not the npm package root — hooks live in the user's project dir.
|
|
892
|
+
const hooksBase = process.env.CLAUDE_PROJECT_DIR || process.cwd();
|
|
887
893
|
let proc;
|
|
888
894
|
if (isWindows) {
|
|
889
|
-
const playTts = path.join(
|
|
895
|
+
const playTts = path.join(hooksBase, '.claude', 'hooks-windows', 'play-tts.ps1');
|
|
890
896
|
proc = spawn('powershell', ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', playTts, phrase, voiceId], {
|
|
891
897
|
stdio: 'ignore', detached: false, windowsHide: true, env: _spawnEnv,
|
|
892
898
|
});
|
|
893
899
|
} else {
|
|
894
|
-
const playTts = path.join(
|
|
900
|
+
const playTts = path.join(hooksBase, '.claude', 'hooks', 'play-tts.sh');
|
|
895
901
|
proc = spawn('bash', [playTts, phrase, voiceId], {
|
|
896
902
|
stdio: 'ignore', detached: true, env: _spawnEnv,
|
|
897
903
|
});
|
|
@@ -209,6 +209,19 @@ export async function installClaudeMcp(targetDir) {
|
|
|
209
209
|
await installer.copyBackgroundMusicFiles(targetDir, silentSpinner);
|
|
210
210
|
ensureDefaultLlmConfigSync('claude-code', targetDir);
|
|
211
211
|
|
|
212
|
+
// Explicitly write tts-provider.txt so `get_active_provider()` in
|
|
213
|
+
// provider-manager.sh doesn't silently fall back to "piper". Without
|
|
214
|
+
// this, headless servers with no audio device hit a confusing failure
|
|
215
|
+
// mode where TTS tries to synth locally and fails silently. Users
|
|
216
|
+
// can still change the provider via the Setup TUI or slash command.
|
|
217
|
+
const ttsProviderPath = path.join(targetDir, '.claude', 'tts-provider.txt');
|
|
218
|
+
try {
|
|
219
|
+
await fs.access(ttsProviderPath);
|
|
220
|
+
// Already exists — user has explicitly set a provider, don't clobber
|
|
221
|
+
} catch {
|
|
222
|
+
await fs.writeFile(ttsProviderPath, 'piper\n');
|
|
223
|
+
}
|
|
224
|
+
|
|
212
225
|
return { success: true, mcpCreated, mcpError };
|
|
213
226
|
} catch (err) {
|
|
214
227
|
return { success: false, error: err.message, mcpError };
|