agentvibes 4.0.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/.claude/config/audio-effects.cfg +3 -2
  2. package/.claude/config/background-music-position.txt +1 -1
  3. package/.claude/hooks/audio-processor.sh +87 -43
  4. package/.claude/hooks/bmad-speak.sh +184 -27
  5. package/.claude/hooks/play-tts-enhanced.sh +40 -5
  6. package/.claude/hooks/play-tts-macos.sh +29 -6
  7. package/.claude/hooks/play-tts-piper.sh +174 -67
  8. package/.claude/hooks/play-tts-soprano.sh +42 -6
  9. package/.claude/hooks/play-tts-ssh-remote.sh +117 -38
  10. package/.claude/hooks/play-tts.sh +12 -9
  11. package/.claude/hooks/session-start-tts.sh +10 -0
  12. package/.claude/hooks/stop-tts.sh +84 -0
  13. package/.claude/hooks/tts-queue-worker.sh +51 -20
  14. package/.claude/hooks/tts-queue.sh +37 -8
  15. package/.claude/hooks/voice-manager.sh +5 -1
  16. package/CLAUDE.md +0 -11
  17. package/README.md +176 -78
  18. package/RELEASE_NOTES.md +1197 -60
  19. package/bin/agentvibes-voice-browser.js +35 -21
  20. package/mcp-server/server.py +36 -0
  21. package/package.json +1 -3
  22. package/src/console/app.js +23 -5
  23. package/src/console/constants/personalities.js +44 -0
  24. package/src/console/footer-config.js +8 -0
  25. package/src/console/navigation.js +3 -1
  26. package/src/console/tabs/agents-tab.js +1219 -72
  27. package/src/console/tabs/install-tab.js +2 -1
  28. package/src/console/tabs/placeholder-tab.js +9 -1
  29. package/src/console/tabs/receiver-tab.js +1212 -0
  30. package/src/console/tabs/settings-tab.js +33 -323
  31. package/src/console/widgets/destroy-list.js +25 -0
  32. package/src/console/widgets/format-utils.js +89 -0
  33. package/src/console/widgets/notice.js +55 -0
  34. package/src/console/widgets/personality-picker.js +185 -0
  35. package/src/console/widgets/reverb-picker.js +94 -0
  36. package/src/console/widgets/track-picker.js +285 -0
  37. package/src/installer.js +54 -2
  38. package/src/services/agent-voice-store.js +282 -22
  39. package/src/services/config-service.js +24 -0
  40. package/src/services/navigation-service.js +1 -1
  41. package/src/utils/music-file-validator.js +41 -31
  42. package/templates/agentvibes-receiver.sh +431 -111
@@ -78,7 +78,8 @@ elif [[ -f "$GLOBAL_MUTE_FILE" ]]; then
78
78
  fi
79
79
 
80
80
  TEXT="${1:-}"
81
- VOICE_OVERRIDE="${2:-}" # Optional: voice name or ID
81
+ VOICE_OVERRIDE="${2:-}" # Optional: voice name or ID
82
+ AGENT_PROFILE_FILE="${3:-}" # Optional: path to per-agent profile JSON (from bmad-speak.sh)
82
83
 
83
84
  # Security: Validate inputs
84
85
  if [[ -z "$TEXT" ]]; then
@@ -92,14 +93,13 @@ if [[ -n "$VOICE_OVERRIDE" ]] && [[ "$VOICE_OVERRIDE" =~ [';|&$`<>(){}'] ]]; the
92
93
  exit 1
93
94
  fi
94
95
 
95
- # Remove backslash escaping that Claude might add for special chars
96
- # In single quotes these don't need escaping, but Claude sometimes adds backslashes
96
+ # Remove backslash escaping that Claude might add for SAFE special chars only
97
+ # SECURITY: Only unescape punctuation chars that cannot form shell commands (#127)
98
+ # Never unescape $, `, \, or other shell metacharacters
97
99
  TEXT="${TEXT//\\!/!}" # Remove \!
98
- TEXT="${TEXT//\\\$/\$}" # Remove \$
99
100
  TEXT="${TEXT//\\?/?}" # Remove \?
100
101
  TEXT="${TEXT//\\,/,}" # Remove \,
101
102
  TEXT="${TEXT//\\./.}" # Remove \. (keep the period)
102
- TEXT="${TEXT//\\\\/\\}" # Remove \\ (escaped backslash)
103
103
 
104
104
  # Prepend intro text (pretext) if configured
105
105
  # Check project-local first, then global
@@ -154,10 +154,11 @@ speak_text() {
154
154
  local text="$1"
155
155
  local voice="${2:-}"
156
156
  local provider="${3:-$ACTIVE_PROVIDER}"
157
+ local profile_file="${4:-$AGENT_PROFILE_FILE}"
157
158
 
158
159
  case "$provider" in
159
160
  piper)
160
- "$SCRIPT_DIR/play-tts-piper.sh" "$text" "$voice"
161
+ "$SCRIPT_DIR/play-tts-piper.sh" "$text" "$voice" "$profile_file"
161
162
  ;;
162
163
  soprano)
163
164
  "$SCRIPT_DIR/play-tts-soprano.sh" "$text" "$voice"
@@ -206,7 +207,8 @@ handle_learning_mode() {
206
207
 
207
208
  # 2. Auto-translate to target language
208
209
  local translated
209
- translated=$(python3 "$SCRIPT_DIR/translator.py" "$TEXT" "$target_lang" 2>/dev/null) || translated="$TEXT"
210
+ # SECURITY: Add timeout to prevent hanging (#134)
211
+ translated=$(timeout 5 python3 "$SCRIPT_DIR/translator.py" "$TEXT" "$target_lang" 2>/dev/null) || translated="$TEXT"
210
212
 
211
213
  # Small pause between languages
212
214
  sleep 0.5
@@ -241,7 +243,8 @@ handle_translation_mode() {
241
243
 
242
244
  # Translate text
243
245
  local translated
244
- translated=$(python3 "$SCRIPT_DIR/translator.py" "$TEXT" "$translate_to" 2>/dev/null) || translated="$TEXT"
246
+ # SECURITY: Add timeout to prevent hanging (#134)
247
+ translated=$(timeout 5 python3 "$SCRIPT_DIR/translator.py" "$TEXT" "$translate_to" 2>/dev/null) || translated="$TEXT"
245
248
 
246
249
  # Get voice for target language if no override specified
247
250
  local voice_to_use="$VOICE_OVERRIDE"
@@ -279,7 +282,7 @@ fi
279
282
  # Normal single-language mode - route to appropriate provider implementation
280
283
  case "$ACTIVE_PROVIDER" in
281
284
  piper)
282
- exec "$SCRIPT_DIR/play-tts-piper.sh" "$TEXT" "$VOICE_OVERRIDE"
285
+ exec "$SCRIPT_DIR/play-tts-piper.sh" "$TEXT" "$VOICE_OVERRIDE" "$AGENT_PROFILE_FILE"
283
286
  ;;
284
287
  soprano)
285
288
  exec "$SCRIPT_DIR/play-tts-soprano.sh" "$TEXT" "$VOICE_OVERRIDE"
@@ -64,6 +64,16 @@ CONTEXT+="4. Keep under 150 chars\n"
64
64
  CONTEXT+="5. Always include errors\n\n"
65
65
  CONTEXT+="Quick Ref: low=action+result | medium=+key decisions | high=+full reasoning"
66
66
 
67
+ # Add BMAD agent voice routing instructions
68
+ CONTEXT+="\n\n## BMAD Agent Voice Routing\n"
69
+ CONTEXT+="If \`.bmad-agent-context\` exists, check its content:\n\n"
70
+ CONTEXT+="**Single agent mode** (content is an agent ID like \"analyst\"):\n"
71
+ CONTEXT+="Use \`bmad-speak.sh\` instead of \`play-tts.sh\`:\n\n\n"
72
+ CONTEXT+="**Party mode** (content is \"party-mode\"):\n"
73
+ CONTEXT+="Each agent speaks via \`bmad-speak.sh\` with their display name. The queue system ensures sequential playback — agents speak one at a time, not overlapping:\n\n"
74
+ CONTEXT+="IMPORTANT: In party mode, do NOT use \`play-tts.sh\`. Always use \`bmad-speak.sh\` with the agent's display name. Do NOT call multiple bmad-speak.sh in parallel — call them sequentially so the queue processes them in order.\n\n"
75
+ CONTEXT+="If \`.bmad-agent-context\` does NOT exist, use \`play-tts.sh\` as normal."
76
+
67
77
  # Escape for JSON (handle newlines, quotes, backslashes)
68
78
  ESCAPED=$(printf '%s' "$CONTEXT" | sed 's/\\/\\\\/g; s/"/\\"/g; s/\t/\\t/g')
69
79
 
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env bash
2
+ set -eo pipefail
3
+ #
4
+ # File: .claude/hooks/stop-tts.sh
5
+ #
6
+ # AgentVibes Stop Hook — Auto-speak Claude's response via TTS
7
+ # Reads last_assistant_message from stdin JSON and speaks it.
8
+ #
9
+
10
+ # Fix locale warnings
11
+ export LC_ALL=C
12
+
13
+ # Get script directory
14
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
15
+
16
+ # Check if AgentVibes play-tts.sh exists
17
+ if [[ ! -f "$SCRIPT_DIR/play-tts.sh" ]]; then
18
+ exit 0
19
+ fi
20
+
21
+ # Check if muted
22
+ if [[ -f "${CLAUDE_PROJECT_DIR:-.}/.claude/tts-muted.txt" ]] || [[ -f "$HOME/.claude/tts-muted.txt" ]]; then
23
+ MUTED=$(cat "${CLAUDE_PROJECT_DIR:-.}/.claude/tts-muted.txt" 2>/dev/null || cat "$HOME/.claude/tts-muted.txt" 2>/dev/null || echo "")
24
+ if [[ "$MUTED" == "true" ]]; then
25
+ exit 0
26
+ fi
27
+ fi
28
+
29
+ # Read JSON from stdin
30
+ INPUT=$(cat)
31
+
32
+ # Extract last_assistant_message using node (available in Claude Code env)
33
+ MESSAGE=$(echo "$INPUT" | node -e "
34
+ let d = '';
35
+ process.stdin.on('data', c => d += c);
36
+ process.stdin.on('end', () => {
37
+ try {
38
+ const j = JSON.parse(d);
39
+ const msg = j.last_assistant_message || '';
40
+ // Strip markdown before TTS — prevent "asterisk asterisk" being spoken literally
41
+ const stripped = msg
42
+ .replace(/\*\*/g, '').replace(/\*/g, '')
43
+ .replace(/`[^`]*`/g, '').replace(/`/g, '')
44
+ .replace(/#+\s*/g, '')
45
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // [text](url) → text
46
+ // Truncate to 150 chars for TTS
47
+ const trimmed = stripped.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim();
48
+ process.stdout.write(trimmed.length > 150 ? trimmed.slice(0, 147) + '...' : trimmed);
49
+ } catch(e) {
50
+ process.exit(0);
51
+ }
52
+ });
53
+ " 2>/dev/null) || exit 0
54
+
55
+ # Skip if empty or too short
56
+ if [[ -z "$MESSAGE" ]] || [[ ${#MESSAGE} -lt 2 ]]; then
57
+ exit 0
58
+ fi
59
+
60
+ # Check if a BMAD agent is active — route through bmad-speak.sh for per-agent voice
61
+ PROJECT_DIR="${CLAUDE_PROJECT_DIR:-.}"
62
+ BMAD_CONTEXT="$PROJECT_DIR/.bmad-agent-context"
63
+ BMAD_SPEAK="$PROJECT_DIR/.claude/hooks/bmad-speak.sh"
64
+
65
+ if [[ -f "$BMAD_CONTEXT" ]] && [[ -f "$BMAD_SPEAK" ]]; then
66
+ AGENT_ID=$(head -1 "$BMAD_CONTEXT" 2>/dev/null | tr -d '[:space:]')
67
+
68
+ # Party mode: context file contains "party-mode" — skip stop hook TTS entirely.
69
+ # Party mode handles its own TTS inline via bmad-speak.sh per agent.
70
+ if [[ "$AGENT_ID" == "party-mode" ]]; then
71
+ exit 0
72
+ fi
73
+
74
+ if [[ -n "$AGENT_ID" ]] && [[ "$AGENT_ID" =~ ^[a-zA-Z0-9_-]+$ ]]; then
75
+ # Single agent mode: use bmad-speak for per-agent voice/pretext
76
+ bash "$BMAD_SPEAK" "$AGENT_ID" "$MESSAGE" &
77
+ exit 0
78
+ fi
79
+ fi
80
+
81
+ # Default: speak with global voice (run in background so we don't block Claude)
82
+ "$SCRIPT_DIR/play-tts.sh" "$MESSAGE" &
83
+
84
+ exit 0
@@ -13,7 +13,7 @@ if [[ -n "${XDG_RUNTIME_DIR:-}" ]] && [[ -d "$XDG_RUNTIME_DIR" ]]; then
13
13
  QUEUE_DIR="$XDG_RUNTIME_DIR/agentvibes-tts-queue"
14
14
  else
15
15
  # Fallback to user-specific temp directory
16
- QUEUE_DIR="/tmp/agentvibes-tts-queue-$USER"
16
+ QUEUE_DIR="/tmp/agentvibes-tts-queue-$(id -u)"
17
17
  fi
18
18
 
19
19
  # Security: Validate queue directory exists and has correct ownership
@@ -71,42 +71,73 @@ process_queue() {
71
71
  exit 0
72
72
  fi
73
73
 
74
- # Wait 1 second and check again
75
- sleep 1
74
+ # Wait for a new queue item — use inotifywait if available to avoid polling
75
+ # Use a 1-second timeout (-t 1) so the idle counter still advances correctly
76
+ if command -v inotifywait &>/dev/null; then
77
+ inotifywait -q -e create -t 1 "$QUEUE_DIR" 2>/dev/null || true
78
+ else
79
+ sleep 1
80
+ fi
76
81
  continue
77
82
  fi
78
83
 
79
84
  # Reset idle counter - we have work
80
85
  idle_count=0
81
86
 
82
- # Load TTS request
83
- source "$queue_item"
87
+ # Load queue item — explicit key=value parsing (SECURITY: never source untrusted files)
88
+ TEXT_FILE=""
89
+ VOICE=""
90
+ AGENT=""
91
+ PROFILE_PATH=""
92
+ PLAY_WAV=""
93
+ while IFS='=' read -r _key _val; do
94
+ case "$_key" in
95
+ TEXT_FILE) TEXT_FILE="$_val" ;;
96
+ VOICE) VOICE="$_val" ;;
97
+ AGENT) AGENT="$_val" ;;
98
+ PROFILE_PATH) PROFILE_PATH="$_val" ;;
99
+ PLAY_WAV) PLAY_WAV="$_val" ;;
100
+ esac
101
+ done < "$queue_item"
102
+
103
+ # Check if this is a pre-generated WAV playback item
104
+ if [[ -n "${PLAY_WAV:-}" ]] && [[ -f "$PLAY_WAV" ]]; then
105
+ # Play the pre-generated WAV directly (synthesis already done by bmad-speak)
106
+ if command -v paplay &>/dev/null; then
107
+ paplay "$PLAY_WAV" 2>/dev/null || true
108
+ elif command -v aplay &>/dev/null; then
109
+ aplay -q "$PLAY_WAV" 2>/dev/null || true
110
+ elif command -v ffplay &>/dev/null; then
111
+ ffplay -nodisp -autoexit -loglevel quiet "$PLAY_WAV" 2>/dev/null || true
112
+ fi
113
+ else
114
+ # Full TTS request — read text from companion file, use voice/agent directly
115
+ TEXT=""
116
+ if [[ -n "${TEXT_FILE:-}" ]] && [[ -f "$TEXT_FILE" ]]; then
117
+ TEXT=$(cat "$TEXT_FILE")
118
+ rm -f "$TEXT_FILE"
119
+ fi
120
+ AGENT_PROFILE="${PROFILE_PATH:-}"
84
121
 
85
- # Decode base64 values
86
- TEXT=$(echo -n "$TEXT_B64" | base64 -d)
87
- VOICE=$(echo -n "$VOICE_B64" | base64 -d)
88
- AGENT=$(echo -n "${AGENT_B64:-}" | base64 -d 2>/dev/null || echo "default")
122
+ export AGENTVIBES_AGENT_PROFILE="$AGENT_PROFILE"
89
123
 
90
- # Use enhanced TTS with agent-specific background music if agent is specified
91
- # and background music is enabled
92
- if [[ -f "$SCRIPT_DIR/play-tts-enhanced.sh" ]] && [[ "$AGENT" != "default" ]] && [[ -n "$AGENT" ]]; then
93
- # Party mode: each agent gets their unique background music from audio-effects.cfg
94
- bash "$SCRIPT_DIR/play-tts-enhanced.sh" "$TEXT" "$AGENT" "$VOICE" || true
95
- else
96
- # Standard TTS without background music
97
- # Display output to show file location (GitHub Issue #39)
98
124
  if [[ -n "${VOICE:-}" ]]; then
99
- bash "$SCRIPT_DIR/play-tts.sh" "$TEXT" "$VOICE" || true
125
+ bash "$SCRIPT_DIR/play-tts.sh" "$TEXT" "${VOICE}" || true
100
126
  else
101
127
  bash "$SCRIPT_DIR/play-tts.sh" "$TEXT" || true
102
128
  fi
129
+
130
+ if [[ -n "$AGENT_PROFILE" ]] && [[ -f "$AGENT_PROFILE" ]]; then
131
+ rm -f "$AGENT_PROFILE"
132
+ fi
133
+ unset AGENTVIBES_AGENT_PROFILE
103
134
  fi
104
135
 
105
136
  # Add configurable pause between speakers for natural conversation flow
106
137
  sleep $SPEAKER_DELAY
107
138
 
108
- # Remove processed item
109
- rm -f "$queue_item"
139
+ # Remove processed item and any companion text file
140
+ rm -f "$queue_item" "${queue_item%.queue}.txt"
110
141
  done
111
142
  }
112
143
 
@@ -14,7 +14,7 @@ if [[ -n "${XDG_RUNTIME_DIR:-}" ]] && [[ -d "$XDG_RUNTIME_DIR" ]]; then
14
14
  QUEUE_DIR="$XDG_RUNTIME_DIR/agentvibes-tts-queue"
15
15
  else
16
16
  # Fallback to user-specific temp directory
17
- QUEUE_DIR="/tmp/agentvibes-tts-queue-$USER"
17
+ QUEUE_DIR="/tmp/agentvibes-tts-queue-$(id -u)"
18
18
  fi
19
19
 
20
20
  QUEUE_LOCK="$QUEUE_DIR/queue.lock"
@@ -34,20 +34,26 @@ fi
34
34
  # @param $1 dialogue text
35
35
  # @param $2 voice name (optional)
36
36
  # @param $3 agent name (optional, for background music in party mode)
37
+ # @param $4 agent profile path (optional, PID-scoped temp JSON with reverb/personality/music overrides)
37
38
  add_to_queue() {
38
39
  local text="$1"
39
40
  local voice="${2:-}"
40
41
  local agent="${3:-default}"
42
+ local profile_path="${4:-}"
41
43
 
42
44
  # Create unique queue item with timestamp
43
45
  local timestamp=$(date +%s%N)
44
46
  local queue_file="$QUEUE_DIR/$timestamp.queue"
45
47
 
46
- # Write request to queue file (base64 encoded to handle all special chars)
48
+ # Write request to queue file using direct storage
49
+ # Text is stored in a separate .txt file (handles newlines and special chars safely)
50
+ # Voice and agent are simple identifiers with no special chars
51
+ printf '%s' "$text" > "${queue_file%.queue}.txt"
47
52
  cat > "$queue_file" <<EOF
48
- TEXT_B64=$(echo -n "$text" | base64 -w0)
49
- VOICE_B64=$(echo -n "$voice" | base64 -w0)
50
- AGENT_B64=$(echo -n "$agent" | base64 -w0)
53
+ TEXT_FILE=${queue_file%.queue}.txt
54
+ VOICE=$voice
55
+ AGENT=$agent
56
+ PROFILE_PATH=$profile_path
51
57
  EOF
52
58
 
53
59
  # Start queue worker if not already running
@@ -113,10 +119,32 @@ show_queue() {
113
119
  fi
114
120
  }
115
121
 
122
+ # @function play_wav
123
+ # @intent Queue a pre-generated WAV file for sequential playback
124
+ # @param $1 path to WAV file
125
+ play_wav() {
126
+ local wav_file="$1"
127
+ [[ -z "$wav_file" ]] && return 1
128
+ [[ ! -f "$wav_file" ]] && return 1
129
+
130
+ local timestamp=$(date +%s%N)
131
+ local queue_file="$QUEUE_DIR/$timestamp.queue"
132
+
133
+ # Write a playback-only queue item (no synthesis needed)
134
+ cat > "$queue_file" <<EOF
135
+ PLAY_WAV=$wav_file
136
+ EOF
137
+
138
+ start_worker_if_needed
139
+ }
140
+
116
141
  # Main command dispatcher
117
142
  case "${1:-help}" in
118
143
  add)
119
- add_to_queue "${2:-}" "${3:-}" "${4:-default}"
144
+ add_to_queue "${2:-}" "${3:-}" "${4:-default}" "${5:-}"
145
+ ;;
146
+ play)
147
+ play_wav "${2:-}"
120
148
  ;;
121
149
  clear)
122
150
  clear_queue
@@ -125,10 +153,11 @@ case "${1:-help}" in
125
153
  show_queue
126
154
  ;;
127
155
  *)
128
- echo "Usage: tts-queue.sh {add|clear|status}"
156
+ echo "Usage: tts-queue.sh {add|play|clear|status}"
129
157
  echo ""
130
158
  echo "Commands:"
131
- echo " add <text> [voice] [agent] Add TTS request to queue with optional agent for background music"
159
+ echo " add <text> [voice] [agent] Add TTS request to queue"
160
+ echo " play <wav_file> Queue a pre-generated WAV for playback"
132
161
  echo " clear Clear all pending requests"
133
162
  echo " status Show queue status"
134
163
  exit 1
@@ -49,7 +49,11 @@ to_lower() {
49
49
  # 2. Script location (for direct slash command usage)
50
50
  # 3. Global ~/.claude (fallback)
51
51
 
52
- if [[ -n "$CLAUDE_PROJECT_DIR" ]] && [[ -d "$CLAUDE_PROJECT_DIR/.claude" ]]; then
52
+ # SECURITY: Canonicalize path to prevent traversal (#128)
53
+ if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]]; then
54
+ CLAUDE_PROJECT_DIR=$(cd "${CLAUDE_PROJECT_DIR}" 2>/dev/null && pwd -P) || CLAUDE_PROJECT_DIR=""
55
+ fi
56
+ if [[ -n "${CLAUDE_PROJECT_DIR:-}" ]] && [[ -d "$CLAUDE_PROJECT_DIR/.claude" ]]; then
53
57
  # MCP context: Use the project directory where MCP was invoked
54
58
  CLAUDE_DIR="$CLAUDE_PROJECT_DIR/.claude"
55
59
  else
package/CLAUDE.md CHANGED
@@ -4,17 +4,6 @@
4
4
  **Updated:** 2026-02-15
5
5
  **Status:** Active (Using BMAD Methodology)
6
6
 
7
- ## TTS Protocol (MANDATORY)
8
-
9
- **ALWAYS call TTS inline (never `run_in_background: true`):**
10
- ```bash
11
- PULSE_SERVER=unix:/mnt/wslg/PulseServer bash ".claude/hooks/play-tts.sh" "text to speak"
12
- ```
13
- - Provider: `piper` (WSL bash, NOT Windows PowerShell)
14
- - Mode: `full` (`.agentvibes/config/mode.txt`) — Claude calls TTS directly, no stop-hook Audio Summary
15
- - Pretext: configured in `.agentvibes/config/agentvibes.json` (prepended automatically by play-tts.sh)
16
- - Call at: task acknowledgment + task completion (high verbosity = also reasoning/findings)
17
-
18
7
  ## Overview
19
8
 
20
9
  AgentVibes is a Text-to-Speech system for AI assistants with personality support.