agentvibes 5.2.1 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,6 +54,6 @@ default||agent_vibes_chillwave_v2_loop.mp3|0.15
54
54
  # Claude Code LLM default - pretext + music for remote receivers without per-LLM config|||
55
55
  llm:default|reverb 20 50 50|agent_vibes_chillwave_v2_loop.mp3|0.15||AgentVibes here
56
56
  analyst|reverb 70 50 100|agentvibes_soft_flamenco_loop.mp3|0.30
57
- llm:claude-code|light|agent_vibes_chillwave_v2_loop.mp3|0.15|en_US-libritts-high::Leo-7|Claude Code here|piper
57
+ llm:claude-code|light|agent_vibes_celtic_harp_v1_loop.mp3|0.15|en_US-libritts-high::Leo-7|Agent Vibes Local Here|piper
58
58
  llm:copilot|light|agent_vibes_bossa_nova_v2_loop.mp3|0.15|en_US-libritts-high::Anna-11|Copilot here|piper
59
59
  llm:codex|light|agent_vibes_chillwave_v2_loop.mp3|0.15|en_US-lessac-high|Codex here|piper
@@ -14,13 +14,63 @@
14
14
 
15
15
  set -euo pipefail
16
16
 
17
- TEXT="${1:-}"
18
- VOICE="${2:-en_US-lessac-medium}"
19
- AGENT_NAME="${3:-default}"
17
+ # ---------------------------------------------------------------------------
18
+ # Argument parsing — supports both positional (backward compat) and flags.
19
+ #
20
+ # Positional (legacy):
21
+ # play-tts-ssh-remote.sh "text" [voice] [agent_name]
22
+ #
23
+ # Flag-based (full per-call control):
24
+ # play-tts-ssh-remote.sh --text "hello" --voice "en_US-ryan-high" \
25
+ # --pretext "Winston here" --music "track.mp3" \
26
+ # --volume 0.25 --effects "reverb 40 60 80" \
27
+ # --speed 1.1 --provider piper --agent "winston"
28
+ #
29
+ # Flags override config-file values. Any flag omitted → falls back to config.
30
+ # --pretext "" (empty string) explicitly suppresses pretext (no fallback).
31
+ # ---------------------------------------------------------------------------
32
+
33
+ TEXT=""
34
+ VOICE=""
35
+ AGENT_NAME=""
36
+ PRETEXT_OVERRIDE=""
37
+ PRETEXT_SET=0 # whether --pretext was explicitly provided
38
+ MUSIC_OVERRIDE=""
39
+ VOLUME_OVERRIDE=""
40
+ EFFECTS_OVERRIDE=""
41
+ SPEED_OVERRIDE=""
42
+ PROVIDER_OVERRIDE=""
43
+
44
+ # Detect flag-based vs positional usage: if first arg starts with --, use flags.
45
+ if [[ "${1:-}" == --* ]]; then
46
+ while [[ $# -gt 0 ]]; do
47
+ case "$1" in
48
+ --text) TEXT="${2:-}"; shift 2 ;;
49
+ --voice) VOICE="${2:-}"; shift 2 ;;
50
+ --agent) AGENT_NAME="${2:-}"; shift 2 ;;
51
+ --pretext) PRETEXT_OVERRIDE="${2:-}"; PRETEXT_SET=1; shift 2 ;;
52
+ --music) MUSIC_OVERRIDE="${2:-}"; shift 2 ;;
53
+ --volume) VOLUME_OVERRIDE="${2:-}"; shift 2 ;;
54
+ --effects) EFFECTS_OVERRIDE="${2:-}"; shift 2 ;;
55
+ --speed) SPEED_OVERRIDE="${2:-}"; shift 2 ;;
56
+ --provider) PROVIDER_OVERRIDE="${2:-}"; shift 2 ;;
57
+ *) echo "Unknown flag: $1" >&2; exit 1 ;;
58
+ esac
59
+ done
60
+ else
61
+ TEXT="${1:-}"
62
+ VOICE="${2:-}"
63
+ AGENT_NAME="${3:-}"
64
+ fi
65
+
66
+ # Defaults for still-empty values
67
+ VOICE="${VOICE:-en_US-lessac-medium}"
68
+ AGENT_NAME="${AGENT_NAME:-default}"
20
69
 
21
70
  # Validate required input
22
71
  if [[ -z "$TEXT" ]]; then
23
- echo "Usage: $0 <text> [voice] [agent_name]" >&2
72
+ echo "Usage (positional): $0 <text> [voice] [agent_name]" >&2
73
+ echo "Usage (flags): $0 --text <text> [--voice X --pretext Y --music Z --volume N --effects E --speed S --provider P --agent A]" >&2
24
74
  exit 1
25
75
  fi
26
76
 
@@ -115,6 +165,49 @@ case "${PROVIDER:-}" in
115
165
  *) PROVIDER="piper" ;;
116
166
  esac
117
167
 
168
+ # ---------------------------------------------------------------------------
169
+ # Apply CLI flag overrides (flags win over config files)
170
+ # ---------------------------------------------------------------------------
171
+ # Validate effects (prevent injection — only alphanumeric, spaces, dots, hyphens)
172
+ if [[ -n "$EFFECTS_OVERRIDE" ]]; then
173
+ if [[ "$EFFECTS_OVERRIDE" =~ ^[a-zA-Z0-9\ ._-]+$ ]]; then
174
+ SOX_EFFECTS="$EFFECTS_OVERRIDE"
175
+ else
176
+ echo "Invalid effects format: $EFFECTS_OVERRIDE (alphanumeric/space/.-_ only)" >&2
177
+ exit 1
178
+ fi
179
+ fi
180
+ [[ -n "$MUSIC_OVERRIDE" ]] && BG_FILE="$MUSIC_OVERRIDE"
181
+ [[ -n "$VOLUME_OVERRIDE" ]] && BG_VOLUME="$VOLUME_OVERRIDE"
182
+ [[ -n "$SPEED_OVERRIDE" ]] && SPEED="$SPEED_OVERRIDE"
183
+ [[ -n "$PROVIDER_OVERRIDE" ]] && PROVIDER="$PROVIDER_OVERRIDE"
184
+ # Pretext: explicit --pretext wins even when empty string (suppresses pretext)
185
+ if [[ "$PRETEXT_SET" == "1" ]]; then
186
+ PRETEXT="$PRETEXT_OVERRIDE"
187
+ fi
188
+
189
+ # Re-validate provider after override (in case user passed bad value)
190
+ case "${PROVIDER:-}" in
191
+ piper|soprano|macos|windows-sapi) ;;
192
+ *) PROVIDER="piper" ;;
193
+ esac
194
+ # Validate music filename (prevent path injection through JSON → receiver)
195
+ # Allows spaces in track names like "Late Night Hip Hop Groove.mp3"
196
+ if [[ -n "$BG_FILE" && ! "$BG_FILE" =~ ^[a-zA-Z0-9_\.\ -]+$ ]]; then
197
+ echo "Invalid music filename format: $BG_FILE (alphanumeric/space/.-_ only)" >&2
198
+ exit 1
199
+ fi
200
+ # Validate volume
201
+ if [[ -n "$BG_VOLUME" && ! "$BG_VOLUME" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
202
+ echo "Invalid volume: $BG_VOLUME (numeric only)" >&2
203
+ exit 1
204
+ fi
205
+ # Validate speed
206
+ if [[ -n "$SPEED" && ! "$SPEED" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
207
+ echo "Invalid speed: $SPEED (numeric only)" >&2
208
+ exit 1
209
+ fi
210
+
118
211
  # ---------------------------------------------------------------------------
119
212
  # Build JSON payload
120
213
  # ---------------------------------------------------------------------------
@@ -134,13 +227,14 @@ build_json_payload() {
134
227
  --arg provider "$PROVIDER" \
135
228
  '{text: $text, voice: $voice, effects: $effects, music: $music, volume: $volume, project: $project, pretext: $pretext, speed: $speed, provider: $provider}'
136
229
  else
137
- # Manual JSON — escape double quotes and backslashes in text
138
- local escaped_text
139
- escaped_text=$(printf '%s' "$TEXT" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\t/\\t/g')
140
- local escaped_pretext
141
- escaped_pretext=$(printf '%s' "$PRETEXT" | sed 's/\\/\\\\/g; s/"/\\"/g')
230
+ # Manual JSON — escape all interpolated fields (backslash, double-quote, newline, tab)
231
+ _esc() { printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\t/\\t/g' | tr '\n' ' '; }
232
+ local e_text e_voice e_effects e_music e_vol e_proj e_pre e_spd e_prov
233
+ e_text=$(_esc "$TEXT"); e_voice=$(_esc "$VOICE"); e_effects=$(_esc "$SOX_EFFECTS")
234
+ e_music=$(_esc "$BG_FILE"); e_vol=$(_esc "$BG_VOLUME"); e_proj=$(_esc "$PROJECT_NAME")
235
+ e_pre=$(_esc "$PRETEXT"); e_spd=$(_esc "$SPEED"); e_prov=$(_esc "$PROVIDER")
142
236
  printf '{"text":"%s","voice":"%s","effects":"%s","music":"%s","volume":"%s","project":"%s","pretext":"%s","speed":"%s","provider":"%s"}' \
143
- "$escaped_text" "$VOICE" "$SOX_EFFECTS" "$BG_FILE" "$BG_VOLUME" "$PROJECT_NAME" "$escaped_pretext" "$SPEED" "$PROVIDER"
237
+ "$e_text" "$e_voice" "$e_effects" "$e_music" "$e_vol" "$e_proj" "$e_pre" "$e_spd" "$e_prov"
144
238
  fi
145
239
  }
146
240
 
@@ -200,12 +200,23 @@ fi
200
200
  # Source provider manager to get active provider
201
201
  source "$SCRIPT_DIR/provider-manager.sh"
202
202
 
203
- # Get active provider (LLM-specific engine overrides global)
204
- if [[ -n "$_LLM_ENGINE" ]]; then
205
- ACTIVE_PROVIDER="$_LLM_ENGINE"
206
- else
207
- ACTIVE_PROVIDER=$(get_active_provider)
208
- fi
203
+ # Get active provider.
204
+ # Per-LLM engine (from audio-effects.cfg `llm:<key>` row column 7) overrides
205
+ # the global tts-provider.txt — UNLESS the global is a transport provider
206
+ # (ssh-remote, agentvibes-receiver, termux-ssh). Transport providers
207
+ # forward TTS to a remote receiver which picks its OWN engine; overriding
208
+ # them with a local engine like piper would synthesize on the wrong host.
209
+ ACTIVE_PROVIDER=$(get_active_provider)
210
+ case "$ACTIVE_PROVIDER" in
211
+ ssh-remote|agentvibes-receiver|termux-ssh)
212
+ # Transport — keep it. The receiver's audio-effects.cfg picks the engine.
213
+ ;;
214
+ *)
215
+ if [[ -n "$_LLM_ENGINE" ]]; then
216
+ ACTIVE_PROVIDER="$_LLM_ENGINE"
217
+ fi
218
+ ;;
219
+ esac
209
220
 
210
221
  # Show GitHub star reminder (once per day)
211
222
  bash "$SCRIPT_DIR/github-star-reminder.sh" 2>/dev/null || true
@@ -242,7 +242,11 @@ try {
242
242
  try {
243
243
  # WaitOne throws AbandonedMutexException if prior process crashed while holding it.
244
244
  # That exception means we DID acquire the mutex — treat it as success (fixes M2).
245
- $acquired = $mutex.WaitOne(60000)
245
+ # 600s timeout covers worst-case party-mode queue depth (9 agents x
246
+ # ~60s of speech each). Shorter timeouts silently dropped the last
247
+ # agents to speak. play-tts.ps1 bounds true stuck processes via its
248
+ # playback watchdog, so waiting longer here is safe.
249
+ $acquired = $mutex.WaitOne(600000)
246
250
  } catch [System.Threading.AbandonedMutexException] {
247
251
  $acquired = $true # abandoned = we now own it
248
252
  }
@@ -16,6 +16,20 @@ param(
16
16
  [string]$llm = ""
17
17
  )
18
18
 
19
+ # Text-file handoff: Windows command-line arg passing mangles text with
20
+ # quotes, newlines, or non-ASCII characters. The SSH receiver watcher
21
+ # (setup-ssh-receiver.ps1) writes long/special-char text to a UTF-8 temp
22
+ # file and passes the sentinel "__from_file__" + AGENTVIBES_TEXT_FILE env
23
+ # var. Load the real text here before any validation or synthesis.
24
+ if ($Text -eq "__from_file__" -and $env:AGENTVIBES_TEXT_FILE) {
25
+ if (Test-Path $env:AGENTVIBES_TEXT_FILE) {
26
+ $Text = [System.IO.File]::ReadAllText($env:AGENTVIBES_TEXT_FILE, [System.Text.UTF8Encoding]::new($false))
27
+ } else {
28
+ Write-Error "AGENTVIBES_TEXT_FILE set to missing path: $($env:AGENTVIBES_TEXT_FILE)"
29
+ exit 1
30
+ }
31
+ }
32
+
19
33
  # Security: Validate LLM provider name (alphanumeric, hyphens, underscores
20
34
  # only) -- mirrors play-tts.sh line 92. This prevents weird values from
21
35
  # poisoning the audio-effects.cfg lookup or the AGENTVIBES_LLM_KEY env var
@@ -53,63 +67,42 @@ if (-not $llm) {
53
67
  # process's playback.
54
68
  $_PlaybackMutex = New-Object System.Threading.Mutex($false, "AgentVibesPlaybackLock")
55
69
 
56
- # --- Script-level watchdog ---
57
- # If anything in this script hangs (SoundPlayer deadlock, audio device
58
- # locked, ffmpeg stuck, etc.), a sibling PowerShell job waits 25 seconds
59
- # and force-kills this process. Without this, a stuck play-tts.ps1 holds
60
- # the playback mutex forever and silently blocks every subsequent TTS
61
- # call across all LLMs. The watchdog guarantees forward progress.
70
+ # --- Playback watchdog ---
71
+ # If playback itself hangs (SoundPlayer deadlock, audio device locked,
72
+ # etc.), a sibling PowerShell job waits 120 seconds from the moment
73
+ # playback STARTS and force-kills this process. Without this, a stuck
74
+ # play-tts.ps1 holds the playback mutex forever and silently blocks every
75
+ # subsequent TTS call across all LLMs.
62
76
  #
63
- # 25s is chosen to be LONGER than the mutex timeout (15s) but SHORT
64
- # enough that a stuck process clears before the user's next turn. If
65
- # you fire two calls per turn and the first is stuck, the watchdog kills
66
- # it before the second turn arrives so the audio subsystem recovers
67
- # without manual intervention. Long legitimate messages (>25s of speech)
68
- # are rare at default verbosity levels; when they do occur the watchdog
69
- # kills playback mid-sentence, which is acceptable degradation vs. a
70
- # deadlocked queue.
71
- $_WatchdogJob = $null
72
- try {
73
- $_WatchdogJob = Start-Job -ArgumentList $PID -ScriptBlock {
74
- param($parentPid)
75
- Start-Sleep -Seconds 25
76
- try {
77
- # Only kill if still alive -- harmless if already exited
78
- $p = Get-Process -Id $parentPid -ErrorAction SilentlyContinue
79
- if ($p) {
80
- [Console]::Error.WriteLine("[AgentVibes] play-tts.ps1 watchdog fired -- force-killing pid $parentPid after 25s")
81
- Stop-Process -Id $parentPid -Force -ErrorAction SilentlyContinue
82
- }
83
- } catch { }
84
- }
85
- } catch {
86
- # If Start-Job fails (rare), just continue without the watchdog -- no
87
- # regression from pre-watchdog behavior.
88
- $_WatchdogJob = $null
89
- }
77
+ # IMPORTANT: the watchdog is started AFTER mutex acquisition (inside
78
+ # Invoke-SerializedPlay), not at script entry. Starting it at script
79
+ # entry caused round-robin / party-mode cut-offs: when 9 agents fire
80
+ # text_to_speech in quick succession, later calls spend most of their
81
+ # 120s budget waiting for the mutex, then get killed mid-playback.
82
+ # The mutex WaitOne() bounds queue waiting separately.
90
83
 
91
84
  function Invoke-SerializedPlay {
92
85
  param([Parameter(Mandatory)][string]$WavPath)
93
86
  $acquired = $false
87
+ $watchdogJob = $null
94
88
  try {
95
89
  try {
96
- # 15s timeout to acquire the playback mutex. If we can't get
97
- # it in 15s, the holder is almost certainly a stuck/crashed
98
- # prior run. AbandonedMutexException means the holder's
99
- # process actually died -- we inherit ownership.
100
- $acquired = $_PlaybackMutex.WaitOne(15000)
90
+ # 600s timeout to acquire the playback mutex. Covers worst-case
91
+ # queue depth (round-robin with 9 agents x ~60s of playback each).
92
+ # AbandonedMutexException means the holder's process actually
93
+ # died -- we inherit ownership.
94
+ $acquired = $_PlaybackMutex.WaitOne(600000)
101
95
  } catch [System.Threading.AbandonedMutexException] {
102
96
  $acquired = $true
103
97
  }
104
98
  if (-not $acquired) {
105
99
  # Self-heal: kill any stuck play-tts.ps1 processes (other than
106
- # ourselves) that have been alive longer than 20 seconds. This
107
- # frees the mutex so the NEXT call can succeed without the user
108
- # running taskkill manually. We still exit with code 2 because
109
- # this call's audio is lost, but the queue recovers immediately.
100
+ # ourselves) that have been alive longer than 10 minutes. Past
101
+ # any legitimate playback window, so only truly stuck processes
102
+ # get killed.
110
103
  try {
111
104
  $myPid = $PID
112
- $cutoff = (Get-Date).AddSeconds(-20)
105
+ $cutoff = (Get-Date).AddSeconds(-600)
113
106
  $stuck = Get-CimInstance Win32_Process -ErrorAction SilentlyContinue |
114
107
  Where-Object {
115
108
  $_.Name -eq 'powershell.exe' -and
@@ -122,9 +115,28 @@ function Invoke-SerializedPlay {
122
115
  Stop-Process -Id $p.ProcessId -Force -ErrorAction SilentlyContinue
123
116
  }
124
117
  } catch { }
125
- [Console]::Error.WriteLine("[AgentVibes] ERROR: play-tts.ps1 could not acquire playback mutex within 15s. A prior play-tts.ps1 process was stuck holding it and has been killed; the next TTS call should succeed.")
118
+ [Console]::Error.WriteLine("[AgentVibes] ERROR: play-tts.ps1 could not acquire playback mutex within 600s. A prior play-tts.ps1 process was stuck holding it and has been killed; the next TTS call should succeed.")
126
119
  exit 2
127
120
  }
121
+
122
+ # Start the watchdog NOW (after mutex acquisition) so its 120s
123
+ # budget covers only the playback itself, not time spent queued.
124
+ try {
125
+ $watchdogJob = Start-Job -ArgumentList $PID -ScriptBlock {
126
+ param($parentPid)
127
+ Start-Sleep -Seconds 120
128
+ try {
129
+ $p = Get-Process -Id $parentPid -ErrorAction SilentlyContinue
130
+ if ($p) {
131
+ [Console]::Error.WriteLine("[AgentVibes] play-tts.ps1 playback watchdog fired -- force-killing pid $parentPid after 120s of playback")
132
+ Stop-Process -Id $parentPid -Force -ErrorAction SilentlyContinue
133
+ }
134
+ } catch { }
135
+ }
136
+ } catch {
137
+ $watchdogJob = $null
138
+ }
139
+
128
140
  $player = $null
129
141
  try {
130
142
  $player = New-Object System.Media.SoundPlayer $WavPath
@@ -133,23 +145,18 @@ function Invoke-SerializedPlay {
133
145
  if ($player) { $player.Dispose() }
134
146
  }
135
147
  } finally {
148
+ if ($watchdogJob) {
149
+ try {
150
+ Stop-Job -Job $watchdogJob -ErrorAction SilentlyContinue
151
+ Remove-Job -Job $watchdogJob -Force -ErrorAction SilentlyContinue
152
+ } catch { }
153
+ }
136
154
  if ($acquired) {
137
155
  try { $_PlaybackMutex.ReleaseMutex() } catch { }
138
156
  }
139
157
  }
140
158
  }
141
159
 
142
- # Register an exit handler that stops the watchdog job on normal exit so
143
- # it doesn't fire on successful short runs.
144
- Register-EngineEvent -SourceIdentifier PowerShell.Exiting -Action {
145
- try {
146
- if ($_WatchdogJob) {
147
- Stop-Job -Job $_WatchdogJob -ErrorAction SilentlyContinue
148
- Remove-Job -Job $_WatchdogJob -Force -ErrorAction SilentlyContinue
149
- }
150
- } catch { }
151
- } | Out-Null
152
-
153
160
  # Configuration paths
154
161
  # Priority: CLAUDE_PROJECT_DIR env var -> script's parent project -> user profile
155
162
  # Local project settings ALWAYS override global (~/.claude)
@@ -249,11 +256,26 @@ if ($llm) {
249
256
  $env:AGENTVIBES_LLM_KEY = "llm:$llm"
250
257
  }
251
258
 
259
+ # ---------------------------------------------------------------------------
260
+ # Per-call env-var overrides (set by the SSH watcher from queue JSON).
261
+ # These win over audio-effects.cfg lookup results for this call only.
262
+ # ---------------------------------------------------------------------------
263
+ if ($env:AGENTVIBES_OVERRIDE_MUSIC) { $LlmBgTrack = $env:AGENTVIBES_OVERRIDE_MUSIC }
264
+ if ($env:AGENTVIBES_OVERRIDE_VOLUME) { $LlmBgVolume = $env:AGENTVIBES_OVERRIDE_VOLUME }
265
+ if ($env:AGENTVIBES_OVERRIDE_EFFECTS) { $LlmReverb = $env:AGENTVIBES_OVERRIDE_EFFECTS }
266
+
252
267
  # Prepend pretext if configured
253
268
  # Priority: LLM-specific pretext -> project .agentvibes/config.json -> project .claude/config/tts-pretext.txt
254
269
  # -> global ~/.agentvibes/config.json -> global ~/.claude/config/tts-pretext.txt
255
- $Pretext = $LlmPretext
256
- if (-not $Pretext) {
270
+ #
271
+ # Honor AGENTVIBES_NO_PRETEXT=1 for callers that already prepended a pretext
272
+ # (e.g., the SSH receiver watcher — server already added its own pretext
273
+ # before sending; double-prepending here would say "AgentVibes here, server-pretext, message").
274
+ $Pretext = ""
275
+ if ($env:AGENTVIBES_NO_PRETEXT -ne "1") {
276
+ $Pretext = $LlmPretext
277
+ }
278
+ if (-not $Pretext -and $env:AGENTVIBES_NO_PRETEXT -ne "1") {
257
279
  $PretextSources = @(
258
280
  (Join-Path $ProjectRoot ".agentvibes\config.json"),
259
281
  "$ClaudeDir\config\tts-pretext.txt",
@@ -569,6 +591,16 @@ if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
569
591
  $DefaultTrack = "agent_vibes_celtic_harp_v1_loop.mp3"
570
592
  }
571
593
 
594
+ # Per-call env-var overrides (set by SSH watcher from queue JSON).
595
+ # Win over audio-effects.cfg lookup above. Validate filename to
596
+ # prevent path traversal before accepting.
597
+ if ($env:AGENTVIBES_OVERRIDE_MUSIC -and $env:AGENTVIBES_OVERRIDE_MUSIC -match '^[a-zA-Z0-9_\-\. ]+$') {
598
+ $DefaultTrack = $env:AGENTVIBES_OVERRIDE_MUSIC
599
+ }
600
+ if ($env:AGENTVIBES_OVERRIDE_VOLUME -and $env:AGENTVIBES_OVERRIDE_VOLUME -match '^\d+\.?\d*$') {
601
+ $BgVolume = $env:AGENTVIBES_OVERRIDE_VOLUME
602
+ }
603
+
572
604
  $BgTrackPath = Join-Path $TracksDir $DefaultTrack
573
605
  # Path containment: verify resolved path stays within tracks directory
574
606
  $ResolvedBgTrack = [System.IO.Path]::GetFullPath($BgTrackPath)
package/README.md CHANGED
@@ -11,7 +11,7 @@
11
11
  [![Publish](https://github.com/paulpreibisch/AgentVibes/actions/workflows/publish.yml/badge.svg)](https://github.com/paulpreibisch/AgentVibes/actions/workflows/publish.yml)
12
12
  [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
13
13
 
14
- **Author**: Paul Preibisch ([@997Fire](https://x.com/997Fire)) | **Version**: v5.2.1
14
+ **Author**: Paul Preibisch ([@997Fire](https://x.com/997Fire)) | **Version**: v5.3.0
15
15
 
16
16
  ---
17
17
 
@@ -43,7 +43,17 @@ Whether you're using Claude Code, GitHub Copilot, OpenAI Codex, Claude Desktop,
43
43
 
44
44
  ---
45
45
 
46
- ## 🎯 NEW IN v5.2.1Multi-LLM Identity & Install Polish
46
+ ## 🎯 NEW IN v5.3.0Take Control of Remote Voices
47
+
48
+ - **Customize every remote announcement individually** — pass `--voice`, `--pretext`, `--music`, `--volume`, `--effects`, `--speed`, `--provider` on the command line for just that one message. No more editing config files and changing them back.
49
+ - **Skip the intro phrase on demand** — `--pretext ""` suppresses the pretext for a single message.
50
+ - **Long messages and special characters work correctly on Windows** — text with quotes, apostrophes, emoji, or multi-line content no longer gets truncated on its way to the voice engine.
51
+ - **Voice playback works on Windows servers with no monitor** — a background helper runs in your user session and picks up announcements from a queue, so audio plays even when SSH'ing in headless.
52
+ - **Voice preview on remote servers streams to the right device** — TUI preview no longer falls back to local audio on machines without speakers.
53
+ - **No more double intro phrases** when both sender and receiver have pretext configured.
54
+ - **55 new tests** for BMAD party mode voice assignment and agent isolation.
55
+
56
+ ## 🎯 v5.2.1 — Multi-LLM Identity & Install Polish
47
57
 
48
58
  - **Copilot gets its own voice + pretext + music** — "Copilot here" with bossa nova, fully distinct from Claude Code and Codex.
49
59
  - **Per-tool MCP configs with explicit identity** — `.vscode/mcp.json`, `.codex/config.toml`, `~/.copilot/mcp-config.json` each set their own `AGENTVIBES_LLM`.
package/RELEASE_NOTES.md CHANGED
@@ -1,5 +1,97 @@
1
1
  # AgentVibes Release Notes
2
2
 
3
+ ## 🎯 v5.3.0 — Take Control of Remote Voices
4
+
5
+ **Release Date:** April 2026
6
+
7
+ If you're using AgentVibes to send voice announcements from a server to
8
+ your phone, laptop, or another machine, this release puts you in the
9
+ driver's seat. Every call can now pick its own voice, background music,
10
+ intro phrase, reverb, volume, and speed — right from the command line,
11
+ for just that one message.
12
+
13
+ ### ✨ What's New
14
+
15
+ #### You can now customize every announcement individually
16
+
17
+ Before, if you wanted a different voice or music for one specific
18
+ message, you had to change a config file (and remember to change it
19
+ back). Now you just add a flag to the command.
20
+
21
+ Want Winston to speak in his British accent with jazz playing for this
22
+ one deploy notification? Easy:
23
+
24
+ ```bash
25
+ bash .claude/hooks/play-tts-ssh-remote.sh \
26
+ --text "Deploy complete" \
27
+ --voice "en_US-ryan-high" \
28
+ --pretext "Winston here" \
29
+ --music "Late Night Hip Hop Groove.mp3" \
30
+ --volume 0.25
31
+ ```
32
+
33
+ Anything you don't specify falls back to your normal settings. Want to
34
+ skip the intro phrase just this once? Pass `--pretext ""` and it stays
35
+ silent before the message.
36
+
37
+ **Available flags:**
38
+ - `--voice` — which Piper voice to use
39
+ - `--pretext` — the intro phrase before the message (pass `""` to skip it)
40
+ - `--music` — background music track (filenames with spaces now work!)
41
+ - `--volume` — how loud the background music is (0.0 to 1.0)
42
+ - `--effects` — sound effects chain like reverb
43
+ - `--speed` — how fast the voice speaks
44
+ - `--provider` — which TTS engine to use
45
+ - `--agent` — which agent personality to use
46
+
47
+ The old way of calling the script still works, so nothing you've already
48
+ set up will break.
49
+
50
+ ### 🛠 Reliability Fixes
51
+
52
+ - **Long messages and special characters no longer get cut off.** On
53
+ Windows, long announcements or text with quotes, apostrophes, or
54
+ emoji were getting mangled before they reached the voice engine.
55
+ Fixed — your message now arrives exactly as you sent it, no matter
56
+ how long or weird.
57
+
58
+ - **Voice announcements now work on Windows servers with no monitor.**
59
+ Windows refuses to play audio in the "service" session that SSH
60
+ normally uses. A small background helper now runs in your regular
61
+ user session and picks up announcements from a queue, so audio plays
62
+ correctly even on headless servers.
63
+
64
+ - **Voice preview in the TUI works on remote servers.** Before, if you
65
+ previewed a voice from a server with no speakers, it would try to
66
+ play locally (and fail). Now it correctly streams to whatever remote
67
+ device you've configured.
68
+
69
+ - **No more double intro phrases.** If you set a pretext on both the
70
+ sending server and the receiving machine, you used to hear it twice.
71
+ The sender's version wins now — the receiver won't add its own on top.
72
+
73
+ - **Remote streaming settings now actually stick.** A recent change
74
+ accidentally caused remote-streaming setups (`ssh-remote`,
75
+ `agentvibes-receiver`) to get overridden and fall back to local
76
+ playback. Fixed.
77
+
78
+ - **Long announcements don't get killed mid-sentence.** The safety
79
+ timeout that stops stuck audio was too aggressive for long messages.
80
+ It's now generous enough to handle paragraph-length announcements.
81
+
82
+ - **Cleaner installer state** — when you install AgentVibes for Claude
83
+ Code, it now writes its TTS provider file explicitly instead of
84
+ relying on implicit state.
85
+
86
+ ### 🧪 Testing
87
+
88
+ 55 new tests make sure BMAD party mode keeps working: each agent gets
89
+ their unique voice and music, agents don't accidentally share the same
90
+ Piper speaker ID, and the installer always points party mode at the
91
+ cross-platform entry point.
92
+
93
+ ---
94
+
3
95
  ## 🎯 v5.2.1 — Multi-LLM Identity & Install Polish
4
96
 
5
97
  **Release Date:** April 2026
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "$schema": "https://json.schemastore.org/package.json",
3
3
  "name": "agentvibes",
4
- "version": "5.2.1",
4
+ "version": "5.3.0",
5
5
  "description": "Now your AI Agents can finally talk back! Professional TTS voice for Claude Code, Claude Desktop (via MCP), and Clawdbot with multi-provider support.",
6
6
  "homepage": "https://agentvibes.org",
7
7
  "keywords": [
@@ -1278,29 +1278,32 @@ export function createSetupTab(screen, services) {
1278
1278
  const phrase = SAMPLE_PHRASES[Math.floor(Math.random() * SAMPLE_PHRASES.length)];
1279
1279
 
1280
1280
  // Route through remote provider if active
1281
+ // Search order: CLAUDE_PROJECT_DIR → cwd → package root → home
1281
1282
  const _remoteProviders = ['ssh-remote', 'agentvibes-receiver'];
1282
1283
  let _activeProvider = '';
1283
1284
  try {
1284
- const _projectRoot = path.resolve(__dirname, '..', '..');
1285
+ const _pkgRoot = path.resolve(__dirname, '..', '..');
1285
1286
  const _provPaths = [
1286
- path.join(_projectRoot, '.claude', 'tts-provider.txt'),
1287
+ process.env.CLAUDE_PROJECT_DIR && path.join(process.env.CLAUDE_PROJECT_DIR, '.claude', 'tts-provider.txt'),
1288
+ path.join(process.cwd(), '.claude', 'tts-provider.txt'),
1289
+ path.join(_pkgRoot, '.claude', 'tts-provider.txt'),
1287
1290
  path.join(os.homedir(), '.claude', 'tts-provider.txt'),
1288
- ];
1291
+ ].filter(Boolean);
1289
1292
  for (const p of _provPaths) {
1290
1293
  if (fs.existsSync(p)) { _activeProvider = fs.readFileSync(p, 'utf8').trim(); break; }
1291
1294
  }
1292
1295
  } catch {}
1293
1296
 
1294
1297
  if (_remoteProviders.includes(_activeProvider)) {
1295
- const _projectRoot = path.resolve(__dirname, '..', '..');
1298
+ const _hooksBase = process.env.CLAUDE_PROJECT_DIR || process.cwd();
1296
1299
  let rProc;
1297
1300
  if (_isWin) {
1298
- const _playTts = path.join(_projectRoot, '.claude', 'hooks-windows', 'play-tts.ps1');
1301
+ const _playTts = path.join(_hooksBase, '.claude', 'hooks-windows', 'play-tts.ps1');
1299
1302
  rProc = spawn('powershell', ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', _playTts, phrase, voiceId], {
1300
1303
  stdio: 'ignore', detached: false, windowsHide: true, env: _spawnEnv,
1301
1304
  });
1302
1305
  } else {
1303
- const _playTts = path.join(_projectRoot, '.claude', 'hooks', 'play-tts.sh');
1306
+ const _playTts = path.join(_hooksBase, '.claude', 'hooks', 'play-tts.sh');
1304
1307
  rProc = spawn('bash', [_playTts, phrase, voiceId], {
1305
1308
  stdio: 'ignore', detached: true, env: _spawnEnv,
1306
1309
  });
@@ -868,14 +868,17 @@ export function createVoicesTab(screen, services) {
868
868
  _playingVoiceId = null;
869
869
 
870
870
  // Check if we should route through remote provider (ssh-remote / agentvibes-receiver)
871
+ // Search order: CLAUDE_PROJECT_DIR (actual project) → cwd → package root → home
871
872
  const projectRoot = path.resolve(__dirname, '..', '..');
872
873
  const remoteProviders = ['ssh-remote', 'agentvibes-receiver'];
873
874
  let activeProvider = '';
874
875
  try {
875
876
  const providerPaths = [
877
+ process.env.CLAUDE_PROJECT_DIR && path.join(process.env.CLAUDE_PROJECT_DIR, '.claude', 'tts-provider.txt'),
878
+ path.join(process.cwd(), '.claude', 'tts-provider.txt'),
876
879
  path.join(projectRoot, '.claude', 'tts-provider.txt'),
877
880
  path.join(os.homedir(), '.claude', 'tts-provider.txt'),
878
- ];
881
+ ].filter(Boolean);
879
882
  for (const p of providerPaths) {
880
883
  if (fs.existsSync(p)) { activeProvider = fs.readFileSync(p, 'utf8').trim(); break; }
881
884
  }
@@ -884,14 +887,17 @@ export function createVoicesTab(screen, services) {
884
887
  if (remoteProviders.includes(activeProvider)) {
885
888
  const isWindows = process.platform === 'win32' && !process.env.WSL_DISTRO_NAME;
886
889
  const phrase = SAMPLE_PHRASES[Math.floor(Math.random() * SAMPLE_PHRASES.length)];
890
+ // Resolve play-tts from the actual project (CLAUDE_PROJECT_DIR / cwd),
891
+ // not the npm package root — hooks live in the user's project dir.
892
+ const hooksBase = process.env.CLAUDE_PROJECT_DIR || process.cwd();
887
893
  let proc;
888
894
  if (isWindows) {
889
- const playTts = path.join(projectRoot, '.claude', 'hooks-windows', 'play-tts.ps1');
895
+ const playTts = path.join(hooksBase, '.claude', 'hooks-windows', 'play-tts.ps1');
890
896
  proc = spawn('powershell', ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', playTts, phrase, voiceId], {
891
897
  stdio: 'ignore', detached: false, windowsHide: true, env: _spawnEnv,
892
898
  });
893
899
  } else {
894
- const playTts = path.join(projectRoot, '.claude', 'hooks', 'play-tts.sh');
900
+ const playTts = path.join(hooksBase, '.claude', 'hooks', 'play-tts.sh');
895
901
  proc = spawn('bash', [playTts, phrase, voiceId], {
896
902
  stdio: 'ignore', detached: true, env: _spawnEnv,
897
903
  });
@@ -209,6 +209,19 @@ export async function installClaudeMcp(targetDir) {
209
209
  await installer.copyBackgroundMusicFiles(targetDir, silentSpinner);
210
210
  ensureDefaultLlmConfigSync('claude-code', targetDir);
211
211
 
212
+ // Explicitly write tts-provider.txt so `get_active_provider()` in
213
+ // provider-manager.sh doesn't silently fall back to "piper". Without
214
+ // this, headless servers with no audio device hit a confusing failure
215
+ // mode where TTS tries to synth locally and fails silently. Users
216
+ // can still change the provider via the Setup TUI or slash command.
217
+ const ttsProviderPath = path.join(targetDir, '.claude', 'tts-provider.txt');
218
+ try {
219
+ await fs.access(ttsProviderPath);
220
+ // Already exists — user has explicitly set a provider, don't clobber
221
+ } catch {
222
+ await fs.writeFile(ttsProviderPath, 'piper\n');
223
+ }
224
+
212
225
  return { success: true, mcpCreated, mcpError };
213
226
  } catch (err) {
214
227
  return { success: false, error: err.message, mcpError };