npm - agentvibes - Versions diffs - 5.2.1 → 5.3.0 - Mend

agentvibes 5.2.1 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/.claude/config/audio-effects.cfg +1 -1
package/.claude/hooks/play-tts-ssh-remote.sh +104 -10
package/.claude/hooks/play-tts.sh +17 -6
package/.claude/hooks-windows/bmad-party-speak.ps1 +5 -1
package/.claude/hooks-windows/play-tts.ps1 +89 -57
package/README.md +12 -2
package/RELEASE_NOTES.md +92 -0
package/package.json +1 -1
package/src/console/tabs/setup-tab.js +9 -6
package/src/console/tabs/voices-tab.js +9 -3
package/src/services/llm-provider-service.js +13 -0

package/.claude/config/audio-effects.cfg CHANGED Viewed

@@ -54,6 +54,6 @@ default||agent_vibes_chillwave_v2_loop.mp3|0.15
 # Claude Code LLM default - pretext + music for remote receivers without per-LLM config|||
 llm:default|reverb 20 50 50|agent_vibes_chillwave_v2_loop.mp3|0.15||AgentVibes here
 analyst|reverb 70 50 100|agentvibes_soft_flamenco_loop.mp3|0.30
-llm:claude-code|light|agent_vibes_chillwave_v2_loop.mp3|0.15|en_US-libritts-high::Leo-7|Claude Code here|piper
+llm:claude-code|light|agent_vibes_celtic_harp_v1_loop.mp3|0.15|en_US-libritts-high::Leo-7|Agent Vibes Local Here|piper
 llm:copilot|light|agent_vibes_bossa_nova_v2_loop.mp3|0.15|en_US-libritts-high::Anna-11|Copilot here|piper
 llm:codex|light|agent_vibes_chillwave_v2_loop.mp3|0.15|en_US-lessac-high|Codex here|piper

package/.claude/hooks/play-tts-ssh-remote.sh CHANGED Viewed

@@ -14,13 +14,63 @@
 set -euo pipefail
-TEXT="${1:-}"
-VOICE="${2:-en_US-lessac-medium}"
-AGENT_NAME="${3:-default}"
+# ---------------------------------------------------------------------------
+# Argument parsing — supports both positional (backward compat) and flags.
+#
+# Positional (legacy):
+#   play-tts-ssh-remote.sh "text" [voice] [agent_name]
+#
+# Flag-based (full per-call control):
+#   play-tts-ssh-remote.sh --text "hello" --voice "en_US-ryan-high" \
+#                          --pretext "Winston here" --music "track.mp3" \
+#                          --volume 0.25 --effects "reverb 40 60 80" \
+#                          --speed 1.1 --provider piper --agent "winston"
+#
+# Flags override config-file values.  Any flag omitted → falls back to config.
+# --pretext "" (empty string) explicitly suppresses pretext (no fallback).
+# ---------------------------------------------------------------------------
+TEXT=""
+VOICE=""
+AGENT_NAME=""
+PRETEXT_OVERRIDE=""
+PRETEXT_SET=0           # whether --pretext was explicitly provided
+MUSIC_OVERRIDE=""
+VOLUME_OVERRIDE=""
+EFFECTS_OVERRIDE=""
+SPEED_OVERRIDE=""
+PROVIDER_OVERRIDE=""
+# Detect flag-based vs positional usage: if first arg starts with --, use flags.
+if [[ "${1:-}" == --* ]]; then
+  while [[ $# -gt 0 ]]; do
+    case "$1" in
+      --text)     TEXT="${2:-}"; shift 2 ;;
+      --voice)    VOICE="${2:-}"; shift 2 ;;
+      --agent)    AGENT_NAME="${2:-}"; shift 2 ;;
+      --pretext)  PRETEXT_OVERRIDE="${2:-}"; PRETEXT_SET=1; shift 2 ;;
+      --music)    MUSIC_OVERRIDE="${2:-}"; shift 2 ;;
+      --volume)   VOLUME_OVERRIDE="${2:-}"; shift 2 ;;
+      --effects)  EFFECTS_OVERRIDE="${2:-}"; shift 2 ;;
+      --speed)    SPEED_OVERRIDE="${2:-}"; shift 2 ;;
+      --provider) PROVIDER_OVERRIDE="${2:-}"; shift 2 ;;
+      *)          echo "Unknown flag: $1" >&2; exit 1 ;;
+    esac
+  done
+else
+  TEXT="${1:-}"
+  VOICE="${2:-}"
+  AGENT_NAME="${3:-}"
+fi
+# Defaults for still-empty values
+VOICE="${VOICE:-en_US-lessac-medium}"
+AGENT_NAME="${AGENT_NAME:-default}"
 # Validate required input
 if [[ -z "$TEXT" ]]; then
-  echo "Usage: $0 <text> [voice] [agent_name]" >&2
+  echo "Usage (positional): $0 <text> [voice] [agent_name]" >&2
+  echo "Usage (flags):      $0 --text <text> [--voice X --pretext Y --music Z --volume N --effects E --speed S --provider P --agent A]" >&2
   exit 1
 fi
@@ -115,6 +165,49 @@ case "${PROVIDER:-}" in
   *) PROVIDER="piper" ;;
 esac
+# ---------------------------------------------------------------------------
+# Apply CLI flag overrides (flags win over config files)
+# ---------------------------------------------------------------------------
+# Validate effects (prevent injection — only alphanumeric, spaces, dots, hyphens)
+if [[ -n "$EFFECTS_OVERRIDE" ]]; then
+  if [[ "$EFFECTS_OVERRIDE" =~ ^[a-zA-Z0-9\ ._-]+$ ]]; then
+    SOX_EFFECTS="$EFFECTS_OVERRIDE"
+  else
+    echo "Invalid effects format: $EFFECTS_OVERRIDE (alphanumeric/space/.-_ only)" >&2
+    exit 1
+  fi
+fi
+[[ -n "$MUSIC_OVERRIDE"    ]] && BG_FILE="$MUSIC_OVERRIDE"
+[[ -n "$VOLUME_OVERRIDE"   ]] && BG_VOLUME="$VOLUME_OVERRIDE"
+[[ -n "$SPEED_OVERRIDE"    ]] && SPEED="$SPEED_OVERRIDE"
+[[ -n "$PROVIDER_OVERRIDE" ]] && PROVIDER="$PROVIDER_OVERRIDE"
+# Pretext: explicit --pretext wins even when empty string (suppresses pretext)
+if [[ "$PRETEXT_SET" == "1" ]]; then
+  PRETEXT="$PRETEXT_OVERRIDE"
+fi
+# Re-validate provider after override (in case user passed bad value)
+case "${PROVIDER:-}" in
+  piper|soprano|macos|windows-sapi) ;;
+  *) PROVIDER="piper" ;;
+esac
+# Validate music filename (prevent path injection through JSON → receiver)
+# Allows spaces in track names like "Late Night Hip Hop Groove.mp3"
+if [[ -n "$BG_FILE" && ! "$BG_FILE" =~ ^[a-zA-Z0-9_\.\ -]+$ ]]; then
+  echo "Invalid music filename format: $BG_FILE (alphanumeric/space/.-_ only)" >&2
+  exit 1
+fi
+# Validate volume
+if [[ -n "$BG_VOLUME" && ! "$BG_VOLUME" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+  echo "Invalid volume: $BG_VOLUME (numeric only)" >&2
+  exit 1
+fi
+# Validate speed
+if [[ -n "$SPEED" && ! "$SPEED" =~ ^[0-9]+(\.[0-9]+)?$ ]]; then
+  echo "Invalid speed: $SPEED (numeric only)" >&2
+  exit 1
+fi
 # ---------------------------------------------------------------------------
 # Build JSON payload
 # ---------------------------------------------------------------------------
@@ -134,13 +227,14 @@ build_json_payload() {
       --arg provider "$PROVIDER" \
       '{text: $text, voice: $voice, effects: $effects, music: $music, volume: $volume, project: $project, pretext: $pretext, speed: $speed, provider: $provider}'
   else
-    # Manual JSON — escape double quotes and backslashes in text
-    local escaped_text
-    escaped_text=$(printf '%s' "$TEXT" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\t/\\t/g')
-    local escaped_pretext
-    escaped_pretext=$(printf '%s' "$PRETEXT" | sed 's/\\/\\\\/g; s/"/\\"/g')
+    # Manual JSON — escape all interpolated fields (backslash, double-quote, newline, tab)
+    _esc() { printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\t/\\t/g' | tr '\n' ' '; }
+    local e_text e_voice e_effects e_music e_vol e_proj e_pre e_spd e_prov
+    e_text=$(_esc "$TEXT"); e_voice=$(_esc "$VOICE"); e_effects=$(_esc "$SOX_EFFECTS")
+    e_music=$(_esc "$BG_FILE"); e_vol=$(_esc "$BG_VOLUME"); e_proj=$(_esc "$PROJECT_NAME")
+    e_pre=$(_esc "$PRETEXT"); e_spd=$(_esc "$SPEED"); e_prov=$(_esc "$PROVIDER")
     printf '{"text":"%s","voice":"%s","effects":"%s","music":"%s","volume":"%s","project":"%s","pretext":"%s","speed":"%s","provider":"%s"}' \
-      "$escaped_text" "$VOICE" "$SOX_EFFECTS" "$BG_FILE" "$BG_VOLUME" "$PROJECT_NAME" "$escaped_pretext" "$SPEED" "$PROVIDER"
+      "$e_text" "$e_voice" "$e_effects" "$e_music" "$e_vol" "$e_proj" "$e_pre" "$e_spd" "$e_prov"
   fi
 }

package/.claude/hooks/play-tts.sh CHANGED Viewed

@@ -200,12 +200,23 @@ fi
 # Source provider manager to get active provider
 source "$SCRIPT_DIR/provider-manager.sh"
-# Get active provider (LLM-specific engine overrides global)
-if [[ -n "$_LLM_ENGINE" ]]; then
-  ACTIVE_PROVIDER="$_LLM_ENGINE"
-else
-  ACTIVE_PROVIDER=$(get_active_provider)
-fi
+# Get active provider.
+# Per-LLM engine (from audio-effects.cfg `llm:<key>` row column 7) overrides
+# the global tts-provider.txt — UNLESS the global is a transport provider
+# (ssh-remote, agentvibes-receiver, termux-ssh).  Transport providers
+# forward TTS to a remote receiver which picks its OWN engine; overriding
+# them with a local engine like piper would synthesize on the wrong host.
+ACTIVE_PROVIDER=$(get_active_provider)
+case "$ACTIVE_PROVIDER" in
+  ssh-remote|agentvibes-receiver|termux-ssh)
+    # Transport — keep it.  The receiver's audio-effects.cfg picks the engine.
+    ;;
+  *)
+    if [[ -n "$_LLM_ENGINE" ]]; then
+      ACTIVE_PROVIDER="$_LLM_ENGINE"
+    fi
+    ;;
+esac
 # Show GitHub star reminder (once per day)
 bash "$SCRIPT_DIR/github-star-reminder.sh" 2>/dev/null || true

package/.claude/hooks-windows/bmad-party-speak.ps1 CHANGED Viewed

@@ -242,7 +242,11 @@ try {
         try {
             # WaitOne throws AbandonedMutexException if prior process crashed while holding it.
             # That exception means we DID acquire the mutex — treat it as success (fixes M2).
-            $acquired = $mutex.WaitOne(60000)
+            # 600s timeout covers worst-case party-mode queue depth (9 agents x
+            # ~60s of speech each).  Shorter timeouts silently dropped the last
+            # agents to speak.  play-tts.ps1 bounds true stuck processes via its
+            # playback watchdog, so waiting longer here is safe.
+            $acquired = $mutex.WaitOne(600000)
         } catch [System.Threading.AbandonedMutexException] {
             $acquired = $true  # abandoned = we now own it
         }

package/.claude/hooks-windows/play-tts.ps1 CHANGED Viewed

@@ -16,6 +16,20 @@ param(
     [string]$llm = ""
 )
+# Text-file handoff: Windows command-line arg passing mangles text with
+# quotes, newlines, or non-ASCII characters. The SSH receiver watcher
+# (setup-ssh-receiver.ps1) writes long/special-char text to a UTF-8 temp
+# file and passes the sentinel "__from_file__" + AGENTVIBES_TEXT_FILE env
+# var. Load the real text here before any validation or synthesis.
+if ($Text -eq "__from_file__" -and $env:AGENTVIBES_TEXT_FILE) {
+    if (Test-Path $env:AGENTVIBES_TEXT_FILE) {
+        $Text = [System.IO.File]::ReadAllText($env:AGENTVIBES_TEXT_FILE, [System.Text.UTF8Encoding]::new($false))
+    } else {
+        Write-Error "AGENTVIBES_TEXT_FILE set to missing path: $($env:AGENTVIBES_TEXT_FILE)"
+        exit 1
+    }
+}
 # Security: Validate LLM provider name (alphanumeric, hyphens, underscores
 # only) -- mirrors play-tts.sh line 92.  This prevents weird values from
 # poisoning the audio-effects.cfg lookup or the AGENTVIBES_LLM_KEY env var
@@ -53,63 +67,42 @@ if (-not $llm) {
 # process's playback.
 $_PlaybackMutex = New-Object System.Threading.Mutex($false, "AgentVibesPlaybackLock")
-# --- Script-level watchdog ---
-# If anything in this script hangs (SoundPlayer deadlock, audio device
-# locked, ffmpeg stuck, etc.), a sibling PowerShell job waits 25 seconds
-# and force-kills this process.  Without this, a stuck play-tts.ps1 holds
-# the playback mutex forever and silently blocks every subsequent TTS
-# call across all LLMs.  The watchdog guarantees forward progress.
+# --- Playback watchdog ---
+# If playback itself hangs (SoundPlayer deadlock, audio device locked,
+# etc.), a sibling PowerShell job waits 120 seconds from the moment
+# playback STARTS and force-kills this process.  Without this, a stuck
+# play-tts.ps1 holds the playback mutex forever and silently blocks every
+# subsequent TTS call across all LLMs.
 #
-# 25s is chosen to be LONGER than the mutex timeout (15s) but SHORT
-# enough that a stuck process clears before the user's next turn.  If
-# you fire two calls per turn and the first is stuck, the watchdog kills
-# it before the second turn arrives so the audio subsystem recovers
-# without manual intervention.  Long legitimate messages (>25s of speech)
-# are rare at default verbosity levels; when they do occur the watchdog
-# kills playback mid-sentence, which is acceptable degradation vs. a
-# deadlocked queue.
-$_WatchdogJob = $null
-try {
-    $_WatchdogJob = Start-Job -ArgumentList $PID -ScriptBlock {
-        param($parentPid)
-        Start-Sleep -Seconds 25
-        try {
-            # Only kill if still alive -- harmless if already exited
-            $p = Get-Process -Id $parentPid -ErrorAction SilentlyContinue
-            if ($p) {
-                [Console]::Error.WriteLine("[AgentVibes] play-tts.ps1 watchdog fired -- force-killing pid $parentPid after 25s")
-                Stop-Process -Id $parentPid -Force -ErrorAction SilentlyContinue
-            }
-        } catch { }
-    }
-} catch {
-    # If Start-Job fails (rare), just continue without the watchdog -- no
-    # regression from pre-watchdog behavior.
-    $_WatchdogJob = $null
-}
+# IMPORTANT: the watchdog is started AFTER mutex acquisition (inside
+# Invoke-SerializedPlay), not at script entry.  Starting it at script
+# entry caused round-robin / party-mode cut-offs: when 9 agents fire
+# text_to_speech in quick succession, later calls spend most of their
+# 120s budget waiting for the mutex, then get killed mid-playback.
+# The mutex WaitOne() bounds queue waiting separately.
 function Invoke-SerializedPlay {
     param([Parameter(Mandatory)][string]$WavPath)
     $acquired = $false
+    $watchdogJob = $null
     try {
         try {
-            # 15s timeout to acquire the playback mutex.  If we can't get
-            # it in 15s, the holder is almost certainly a stuck/crashed
-            # prior run.  AbandonedMutexException means the holder's
-            # process actually died -- we inherit ownership.
-            $acquired = $_PlaybackMutex.WaitOne(15000)
+            # 600s timeout to acquire the playback mutex.  Covers worst-case
+            # queue depth (round-robin with 9 agents x ~60s of playback each).
+            # AbandonedMutexException means the holder's process actually
+            # died -- we inherit ownership.
+            $acquired = $_PlaybackMutex.WaitOne(600000)
         } catch [System.Threading.AbandonedMutexException] {
             $acquired = $true
         }
         if (-not $acquired) {
             # Self-heal: kill any stuck play-tts.ps1 processes (other than
-            # ourselves) that have been alive longer than 20 seconds.  This
-            # frees the mutex so the NEXT call can succeed without the user
-            # running taskkill manually.  We still exit with code 2 because
-            # this call's audio is lost, but the queue recovers immediately.
+            # ourselves) that have been alive longer than 10 minutes.  Past
+            # any legitimate playback window, so only truly stuck processes
+            # get killed.
             try {
                 $myPid = $PID
-                $cutoff = (Get-Date).AddSeconds(-20)
+                $cutoff = (Get-Date).AddSeconds(-600)
                 $stuck = Get-CimInstance Win32_Process -ErrorAction SilentlyContinue |
                     Where-Object {
                         $_.Name -eq 'powershell.exe' -and
@@ -122,9 +115,28 @@ function Invoke-SerializedPlay {
                     Stop-Process -Id $p.ProcessId -Force -ErrorAction SilentlyContinue
                 }
             } catch { }
-            [Console]::Error.WriteLine("[AgentVibes] ERROR: play-tts.ps1 could not acquire playback mutex within 15s. A prior play-tts.ps1 process was stuck holding it and has been killed; the next TTS call should succeed.")
+            [Console]::Error.WriteLine("[AgentVibes] ERROR: play-tts.ps1 could not acquire playback mutex within 600s. A prior play-tts.ps1 process was stuck holding it and has been killed; the next TTS call should succeed.")
             exit 2
         }
+        # Start the watchdog NOW (after mutex acquisition) so its 120s
+        # budget covers only the playback itself, not time spent queued.
+        try {
+            $watchdogJob = Start-Job -ArgumentList $PID -ScriptBlock {
+                param($parentPid)
+                Start-Sleep -Seconds 120
+                try {
+                    $p = Get-Process -Id $parentPid -ErrorAction SilentlyContinue
+                    if ($p) {
+                        [Console]::Error.WriteLine("[AgentVibes] play-tts.ps1 playback watchdog fired -- force-killing pid $parentPid after 120s of playback")
+                        Stop-Process -Id $parentPid -Force -ErrorAction SilentlyContinue
+                    }
+                } catch { }
+            }
+        } catch {
+            $watchdogJob = $null
+        }
         $player = $null
         try {
             $player = New-Object System.Media.SoundPlayer $WavPath
@@ -133,23 +145,18 @@ function Invoke-SerializedPlay {
             if ($player) { $player.Dispose() }
         }
     } finally {
+        if ($watchdogJob) {
+            try {
+                Stop-Job -Job $watchdogJob -ErrorAction SilentlyContinue
+                Remove-Job -Job $watchdogJob -Force -ErrorAction SilentlyContinue
+            } catch { }
+        }
         if ($acquired) {
             try { $_PlaybackMutex.ReleaseMutex() } catch { }
         }
     }
 }
-# Register an exit handler that stops the watchdog job on normal exit so
-# it doesn't fire on successful short runs.
-Register-EngineEvent -SourceIdentifier PowerShell.Exiting -Action {
-    try {
-        if ($_WatchdogJob) {
-            Stop-Job -Job $_WatchdogJob -ErrorAction SilentlyContinue
-            Remove-Job -Job $_WatchdogJob -Force -ErrorAction SilentlyContinue
-        }
-    } catch { }
-} | Out-Null
 # Configuration paths
 # Priority: CLAUDE_PROJECT_DIR env var -> script's parent project -> user profile
 # Local project settings ALWAYS override global (~/.claude)
@@ -249,11 +256,26 @@ if ($llm) {
     $env:AGENTVIBES_LLM_KEY = "llm:$llm"
 }
+# ---------------------------------------------------------------------------
+# Per-call env-var overrides (set by the SSH watcher from queue JSON).
+# These win over audio-effects.cfg lookup results for this call only.
+# ---------------------------------------------------------------------------
+if ($env:AGENTVIBES_OVERRIDE_MUSIC)   { $LlmBgTrack  = $env:AGENTVIBES_OVERRIDE_MUSIC }
+if ($env:AGENTVIBES_OVERRIDE_VOLUME)  { $LlmBgVolume = $env:AGENTVIBES_OVERRIDE_VOLUME }
+if ($env:AGENTVIBES_OVERRIDE_EFFECTS) { $LlmReverb   = $env:AGENTVIBES_OVERRIDE_EFFECTS }
 # Prepend pretext if configured
 # Priority: LLM-specific pretext -> project .agentvibes/config.json -> project .claude/config/tts-pretext.txt
 #           -> global ~/.agentvibes/config.json -> global ~/.claude/config/tts-pretext.txt
-$Pretext = $LlmPretext
-if (-not $Pretext) {
+#
+# Honor AGENTVIBES_NO_PRETEXT=1 for callers that already prepended a pretext
+# (e.g., the SSH receiver watcher — server already added its own pretext
+# before sending; double-prepending here would say "AgentVibes here, server-pretext, message").
+$Pretext = ""
+if ($env:AGENTVIBES_NO_PRETEXT -ne "1") {
+    $Pretext = $LlmPretext
+}
+if (-not $Pretext -and $env:AGENTVIBES_NO_PRETEXT -ne "1") {
     $PretextSources = @(
         (Join-Path $ProjectRoot ".agentvibes\config.json"),
         "$ClaudeDir\config\tts-pretext.txt",
@@ -569,6 +591,16 @@ if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
                 $DefaultTrack = "agent_vibes_celtic_harp_v1_loop.mp3"
             }
+            # Per-call env-var overrides (set by SSH watcher from queue JSON).
+            # Win over audio-effects.cfg lookup above.  Validate filename to
+            # prevent path traversal before accepting.
+            if ($env:AGENTVIBES_OVERRIDE_MUSIC -and $env:AGENTVIBES_OVERRIDE_MUSIC -match '^[a-zA-Z0-9_\-\. ]+$') {
+                $DefaultTrack = $env:AGENTVIBES_OVERRIDE_MUSIC
+            }
+            if ($env:AGENTVIBES_OVERRIDE_VOLUME -and $env:AGENTVIBES_OVERRIDE_VOLUME -match '^\d+\.?\d*$') {
+                $BgVolume = $env:AGENTVIBES_OVERRIDE_VOLUME
+            }
             $BgTrackPath = Join-Path $TracksDir $DefaultTrack
             # Path containment: verify resolved path stays within tracks directory
             $ResolvedBgTrack = [System.IO.Path]::GetFullPath($BgTrackPath)

package/README.md CHANGED Viewed

@@ -11,7 +11,7 @@
 [![Publish](https://github.com/paulpreibisch/AgentVibes/actions/workflows/publish.yml/badge.svg)](https://github.com/paulpreibisch/AgentVibes/actions/workflows/publish.yml)
 [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
-**Author**: Paul Preibisch ([@997Fire](https://x.com/997Fire)) | **Version**: v5.2.1
+**Author**: Paul Preibisch ([@997Fire](https://x.com/997Fire)) | **Version**: v5.3.0
 ---
@@ -43,7 +43,17 @@ Whether you're using Claude Code, GitHub Copilot, OpenAI Codex, Claude Desktop,
 ---
-## 🎯 NEW IN v5.2.1 — Multi-LLM Identity & Install Polish
+## 🎯 NEW IN v5.3.0 — Take Control of Remote Voices
+- **Customize every remote announcement individually** — pass `--voice`, `--pretext`, `--music`, `--volume`, `--effects`, `--speed`, `--provider` on the command line for just that one message. No more editing config files and changing them back.
+- **Skip the intro phrase on demand** — `--pretext ""` suppresses the pretext for a single message.
+- **Long messages and special characters work correctly on Windows** — text with quotes, apostrophes, emoji, or multi-line content no longer gets truncated on its way to the voice engine.
+- **Voice playback works on Windows servers with no monitor** — a background helper runs in your user session and picks up announcements from a queue, so audio plays even when SSH'ing in headless.
+- **Voice preview on remote servers streams to the right device** — TUI preview no longer falls back to local audio on machines without speakers.
+- **No more double intro phrases** when both sender and receiver have pretext configured.
+- **55 new tests** for BMAD party mode voice assignment and agent isolation.
+## 🎯 v5.2.1 — Multi-LLM Identity & Install Polish
 - **Copilot gets its own voice + pretext + music** — "Copilot here" with bossa nova, fully distinct from Claude Code and Codex.
 - **Per-tool MCP configs with explicit identity** — `.vscode/mcp.json`, `.codex/config.toml`, `~/.copilot/mcp-config.json` each set their own `AGENTVIBES_LLM`.

package/RELEASE_NOTES.md CHANGED Viewed

@@ -1,5 +1,97 @@
 # AgentVibes Release Notes
+## 🎯 v5.3.0 — Take Control of Remote Voices
+**Release Date:** April 2026
+If you're using AgentVibes to send voice announcements from a server to
+your phone, laptop, or another machine, this release puts you in the
+driver's seat. Every call can now pick its own voice, background music,
+intro phrase, reverb, volume, and speed — right from the command line,
+for just that one message.
+### ✨ What's New
+#### You can now customize every announcement individually
+Before, if you wanted a different voice or music for one specific
+message, you had to change a config file (and remember to change it
+back). Now you just add a flag to the command.
+Want Winston to speak in his British accent with jazz playing for this
+one deploy notification? Easy:
+```bash
+bash .claude/hooks/play-tts-ssh-remote.sh \
+  --text "Deploy complete" \
+  --voice "en_US-ryan-high" \
+  --pretext "Winston here" \
+  --music "Late Night Hip Hop Groove.mp3" \
+  --volume 0.25
+```
+Anything you don't specify falls back to your normal settings. Want to
+skip the intro phrase just this once? Pass `--pretext ""` and it stays
+silent before the message.
+**Available flags:**
+- `--voice` — which Piper voice to use
+- `--pretext` — the intro phrase before the message (pass `""` to skip it)
+- `--music` — background music track (filenames with spaces now work!)
+- `--volume` — how loud the background music is (0.0 to 1.0)
+- `--effects` — sound effects chain like reverb
+- `--speed` — how fast the voice speaks
+- `--provider` — which TTS engine to use
+- `--agent` — which agent personality to use
+The old way of calling the script still works, so nothing you've already
+set up will break.
+### 🛠 Reliability Fixes
+- **Long messages and special characters no longer get cut off.** On
+  Windows, long announcements or text with quotes, apostrophes, or
+  emoji were getting mangled before they reached the voice engine.
+  Fixed — your message now arrives exactly as you sent it, no matter
+  how long or weird.
+- **Voice announcements now work on Windows servers with no monitor.**
+  Windows refuses to play audio in the "service" session that SSH
+  normally uses. A small background helper now runs in your regular
+  user session and picks up announcements from a queue, so audio plays
+  correctly even on headless servers.
+- **Voice preview in the TUI works on remote servers.** Before, if you
+  previewed a voice from a server with no speakers, it would try to
+  play locally (and fail). Now it correctly streams to whatever remote
+  device you've configured.
+- **No more double intro phrases.** If you set a pretext on both the
+  sending server and the receiving machine, you used to hear it twice.
+  The sender's version wins now — the receiver won't add its own on top.
+- **Remote streaming settings now actually stick.** A recent change
+  accidentally caused remote-streaming setups (`ssh-remote`,
+  `agentvibes-receiver`) to get overridden and fall back to local
+  playback. Fixed.
+- **Long announcements don't get killed mid-sentence.** The safety
+  timeout that stops stuck audio was too aggressive for long messages.
+  It's now generous enough to handle paragraph-length announcements.
+- **Cleaner installer state** — when you install AgentVibes for Claude
+  Code, it now writes its TTS provider file explicitly instead of
+  relying on implicit state.
+### 🧪 Testing
+55 new tests make sure BMAD party mode keeps working: each agent gets
+their unique voice and music, agents don't accidentally share the same
+Piper speaker ID, and the installer always points party mode at the
+cross-platform entry point.
+---
 ## 🎯 v5.2.1 — Multi-LLM Identity & Install Polish
 **Release Date:** April 2026

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "$schema": "https://json.schemastore.org/package.json",
   "name": "agentvibes",
-  "version": "5.2.1",
+  "version": "5.3.0",
   "description": "Now your AI Agents can finally talk back! Professional TTS voice for Claude Code, Claude Desktop (via MCP), and Clawdbot with multi-provider support.",
   "homepage": "https://agentvibes.org",
   "keywords": [

package/src/console/tabs/setup-tab.js CHANGED Viewed

@@ -1278,29 +1278,32 @@ export function createSetupTab(screen, services) {
       const phrase = SAMPLE_PHRASES[Math.floor(Math.random() * SAMPLE_PHRASES.length)];
       // Route through remote provider if active
+      // Search order: CLAUDE_PROJECT_DIR → cwd → package root → home
       const _remoteProviders = ['ssh-remote', 'agentvibes-receiver'];
       let _activeProvider = '';
       try {
-        const _projectRoot = path.resolve(__dirname, '..', '..');
+        const _pkgRoot = path.resolve(__dirname, '..', '..');
         const _provPaths = [
-          path.join(_projectRoot, '.claude', 'tts-provider.txt'),
+          process.env.CLAUDE_PROJECT_DIR && path.join(process.env.CLAUDE_PROJECT_DIR, '.claude', 'tts-provider.txt'),
+          path.join(process.cwd(), '.claude', 'tts-provider.txt'),
+          path.join(_pkgRoot, '.claude', 'tts-provider.txt'),
           path.join(os.homedir(), '.claude', 'tts-provider.txt'),
-        ];
+        ].filter(Boolean);
         for (const p of _provPaths) {
           if (fs.existsSync(p)) { _activeProvider = fs.readFileSync(p, 'utf8').trim(); break; }
         }
       } catch {}
       if (_remoteProviders.includes(_activeProvider)) {
-        const _projectRoot = path.resolve(__dirname, '..', '..');
+        const _hooksBase = process.env.CLAUDE_PROJECT_DIR || process.cwd();
         let rProc;
         if (_isWin) {
-          const _playTts = path.join(_projectRoot, '.claude', 'hooks-windows', 'play-tts.ps1');
+          const _playTts = path.join(_hooksBase, '.claude', 'hooks-windows', 'play-tts.ps1');
           rProc = spawn('powershell', ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', _playTts, phrase, voiceId], {
             stdio: 'ignore', detached: false, windowsHide: true, env: _spawnEnv,
           });
         } else {
-          const _playTts = path.join(_projectRoot, '.claude', 'hooks', 'play-tts.sh');
+          const _playTts = path.join(_hooksBase, '.claude', 'hooks', 'play-tts.sh');
           rProc = spawn('bash', [_playTts, phrase, voiceId], {
             stdio: 'ignore', detached: true, env: _spawnEnv,
           });

package/src/console/tabs/voices-tab.js CHANGED Viewed

@@ -868,14 +868,17 @@ export function createVoicesTab(screen, services) {
     _playingVoiceId = null;
     // Check if we should route through remote provider (ssh-remote / agentvibes-receiver)
+    // Search order: CLAUDE_PROJECT_DIR (actual project) → cwd → package root → home
     const projectRoot = path.resolve(__dirname, '..', '..');
     const remoteProviders = ['ssh-remote', 'agentvibes-receiver'];
     let activeProvider = '';
     try {
       const providerPaths = [
+        process.env.CLAUDE_PROJECT_DIR && path.join(process.env.CLAUDE_PROJECT_DIR, '.claude', 'tts-provider.txt'),
+        path.join(process.cwd(), '.claude', 'tts-provider.txt'),
         path.join(projectRoot, '.claude', 'tts-provider.txt'),
         path.join(os.homedir(), '.claude', 'tts-provider.txt'),
-      ];
+      ].filter(Boolean);
       for (const p of providerPaths) {
         if (fs.existsSync(p)) { activeProvider = fs.readFileSync(p, 'utf8').trim(); break; }
       }
@@ -884,14 +887,17 @@ export function createVoicesTab(screen, services) {
     if (remoteProviders.includes(activeProvider)) {
       const isWindows = process.platform === 'win32' && !process.env.WSL_DISTRO_NAME;
       const phrase = SAMPLE_PHRASES[Math.floor(Math.random() * SAMPLE_PHRASES.length)];
+      // Resolve play-tts from the actual project (CLAUDE_PROJECT_DIR / cwd),
+      // not the npm package root — hooks live in the user's project dir.
+      const hooksBase = process.env.CLAUDE_PROJECT_DIR || process.cwd();
       let proc;
       if (isWindows) {
-        const playTts = path.join(projectRoot, '.claude', 'hooks-windows', 'play-tts.ps1');
+        const playTts = path.join(hooksBase, '.claude', 'hooks-windows', 'play-tts.ps1');
         proc = spawn('powershell', ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', playTts, phrase, voiceId], {
           stdio: 'ignore', detached: false, windowsHide: true, env: _spawnEnv,
         });
       } else {
-        const playTts = path.join(projectRoot, '.claude', 'hooks', 'play-tts.sh');
+        const playTts = path.join(hooksBase, '.claude', 'hooks', 'play-tts.sh');
         proc = spawn('bash', [playTts, phrase, voiceId], {
           stdio: 'ignore', detached: true, env: _spawnEnv,
         });

package/src/services/llm-provider-service.js CHANGED Viewed

@@ -209,6 +209,19 @@ export async function installClaudeMcp(targetDir) {
     await installer.copyBackgroundMusicFiles(targetDir, silentSpinner);
     ensureDefaultLlmConfigSync('claude-code', targetDir);
+    // Explicitly write tts-provider.txt so `get_active_provider()` in
+    // provider-manager.sh doesn't silently fall back to "piper".  Without
+    // this, headless servers with no audio device hit a confusing failure
+    // mode where TTS tries to synth locally and fails silently.  Users
+    // can still change the provider via the Setup TUI or slash command.
+    const ttsProviderPath = path.join(targetDir, '.claude', 'tts-provider.txt');
+    try {
+      await fs.access(ttsProviderPath);
+      // Already exists — user has explicitly set a provider, don't clobber
+    } catch {
+      await fs.writeFile(ttsProviderPath, 'piper\n');
+    }
     return { success: true, mcpCreated, mcpError };
   } catch (err) {
     return { success: false, error: err.message, mcpError };