agentvibes 4.6.0 β 4.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/config/audio-effects.cfg +1 -1
- package/.claude/hooks/audio-processor.sh +1 -1
- package/.claude/hooks-windows/bmad-party-speak.ps1 +81 -0
- package/.claude/hooks-windows/bmad-speak.ps1 +32 -7
- package/.claude/hooks-windows/play-tts-piper.ps1 +43 -6
- package/.claude/hooks-windows/play-tts.ps1 +57 -30
- package/.mcp.json +7 -0
- package/README.md +64 -2
- package/RELEASE_NOTES.md +41 -0
- package/package.json +110 -110
- package/src/console/tabs/agents-tab.js +240 -34
- package/src/console/tabs/voices-tab.js +38 -5
- package/src/console/widgets/track-picker.js +50 -18
- package/templates/agentvibes-receiver.sh +1 -1
|
@@ -17,7 +17,7 @@ bmad-agent-tech-writer||agent_vibes_arabic_v2_loop.mp3|0.70
|
|
|
17
17
|
# BMAD Agents - each with unique audio personality|||
|
|
18
18
|
|||
|
|
19
19
|
# PM John - upbeat, driving energy|||
|
|
20
|
-
John|gain -1 equalizer 3000 1q +2|agentvibes_soft_flamenco_loop.mp3|0.
|
|
20
|
+
John|gain -1 equalizer 3000 1q +2|agentvibes_soft_flamenco_loop.mp3|0.20
|
|
21
21
|
|||
|
|
22
22
|
# Architect Winston - deep, authoritative|||
|
|
23
23
|
Winston|reverb 40 50 90 gain -2|agentvibes_soft_flamenco_loop.mp3|0.25
|
|
@@ -310,7 +310,7 @@ mix_background() {
|
|
|
310
310
|
fi
|
|
311
311
|
|
|
312
312
|
ffmpeg -y -i "$voice" -ss "$start_pos" -stream_loop -1 -i "$background" \
|
|
313
|
-
-filter_complex "[1:a]volume=${volume},afade=t=in:st=0:d=0.3,afade=t=out:st=${bg_fade_out_adjusted}:d=2[bg];[0:a]adelay=${voice_delay_ms}|${voice_delay_ms}[v];[v][bg]amix=inputs=2:duration=longest[out]" \
|
|
313
|
+
-filter_complex "[1:a]volume=${volume},afade=t=in:st=0:d=0.3,afade=t=out:st=${bg_fade_out_adjusted}:d=2[bg];[0:a]adelay=${voice_delay_ms}|${voice_delay_ms},volume=1.5[v];[v][bg]amix=inputs=2:duration=longest:normalize=0[out]" \
|
|
314
314
|
-map "[out]" $audio_settings -t "$total_duration" "$output" 2>/dev/null || {
|
|
315
315
|
echo "Warning: Background mixing failed, using voice only" >&2
|
|
316
316
|
cp "$voice" "$output"
|
|
@@ -108,6 +108,81 @@ try {
|
|
|
108
108
|
# "high" = full text
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
+
# --- Pre-synthesize WAV before acquiring mutex so synthesis overlaps with previous agent's playback ---
|
|
112
|
+
$PreSynthWav = $null
|
|
113
|
+
try {
|
|
114
|
+
# Resolve agent voice from voice map
|
|
115
|
+
$VoiceMapLocal = if ($ProjectRoot) { Join-Path $ProjectRoot ".agentvibes\bmad-voice-map.json" } else { $null }
|
|
116
|
+
$VoiceMapGlobal = Join-Path $env:USERPROFILE ".agentvibes\bmad-voice-map.json"
|
|
117
|
+
$VoiceMapFile = if ($VoiceMapLocal -and (Test-Path $VoiceMapLocal)) { $VoiceMapLocal }
|
|
118
|
+
elseif (Test-Path $VoiceMapGlobal) { $VoiceMapGlobal }
|
|
119
|
+
else { $null }
|
|
120
|
+
|
|
121
|
+
$AgentVoiceName = $null
|
|
122
|
+
$SpeakerId = $null
|
|
123
|
+
if ($VoiceMapFile) {
|
|
124
|
+
$vm = Get-Content $VoiceMapFile -Raw | ConvertFrom-Json
|
|
125
|
+
$profile = $vm.agents.$AgentId
|
|
126
|
+
if ($profile -and $profile.voice) {
|
|
127
|
+
$raw = $profile.voice
|
|
128
|
+
if ($raw -match '::') {
|
|
129
|
+
$parts = $raw -split '::'
|
|
130
|
+
$AgentVoiceName = $parts[0]
|
|
131
|
+
$SpeakerName = if ($parts.Length -ge 2) { $parts[1] } else { "" }
|
|
132
|
+
# NOTE: The suffix number (e.g. "14" in "Yara-14") is a display disambiguator,
|
|
133
|
+
# NOT the piper speaker index. Must look up real index from speaker_id_map.
|
|
134
|
+
if ($SpeakerName) {
|
|
135
|
+
$VoicesDir = "$env:USERPROFILE\.claude\piper-voices"
|
|
136
|
+
$OnnxJsonPath = "$VoicesDir\$AgentVoiceName.onnx.json"
|
|
137
|
+
if (Test-Path $OnnxJsonPath) {
|
|
138
|
+
try {
|
|
139
|
+
$onnxData = Get-Content $OnnxJsonPath -Raw -Encoding UTF8 | ConvertFrom-Json
|
|
140
|
+
$speakerIdMap = $onnxData.speaker_id_map
|
|
141
|
+
if ($speakerIdMap -and $speakerIdMap.PSObject.Properties[$SpeakerName]) {
|
|
142
|
+
$SpeakerId = [string]$speakerIdMap.PSObject.Properties[$SpeakerName].Value
|
|
143
|
+
}
|
|
144
|
+
} catch { }
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
} else {
|
|
148
|
+
$AgentVoiceName = $raw
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
# Locate piper
|
|
154
|
+
$PiperExe = "$env:LOCALAPPDATA\Programs\Piper\piper.exe"
|
|
155
|
+
if (-not (Test-Path $PiperExe)) {
|
|
156
|
+
$found = Get-Command piper.exe -ErrorAction SilentlyContinue
|
|
157
|
+
if ($found) { $PiperExe = $found.Source }
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (Test-Path $PiperExe) {
|
|
161
|
+
$VoicesDir = "$env:USERPROFILE\.claude\piper-voices"
|
|
162
|
+
# Fall back to first available voice if agent voice not found
|
|
163
|
+
if (-not $AgentVoiceName) {
|
|
164
|
+
$first = Get-ChildItem $VoicesDir -Filter "*.onnx" -ErrorAction SilentlyContinue | Select-Object -First 1
|
|
165
|
+
if ($first) { $AgentVoiceName = $first.BaseName }
|
|
166
|
+
}
|
|
167
|
+
if ($AgentVoiceName -and ($AgentVoiceName -match '^[a-zA-Z0-9_\-\.]+$')) {
|
|
168
|
+
$VoiceModel = Join-Path $VoicesDir "$AgentVoiceName.onnx"
|
|
169
|
+
if (Test-Path $VoiceModel) {
|
|
170
|
+
$AudioDir = "$env:USERPROFILE\.claude\audio"
|
|
171
|
+
if (-not (Test-Path $AudioDir)) { New-Item -ItemType Directory -Path $AudioDir -Force | Out-Null }
|
|
172
|
+
$PreSynthWav = Join-Path $AudioDir "tts-presynth-$([System.IO.Path]::GetRandomFileName() -replace '\..*').wav"
|
|
173
|
+
$piperArgs = @("--model", $VoiceModel, "--output-file", $PreSynthWav)
|
|
174
|
+
if ($SpeakerId) { $piperArgs += @("--speaker", $SpeakerId) }
|
|
175
|
+
$ResponseText | & $PiperExe @piperArgs 2>$null
|
|
176
|
+
if (-not (Test-Path $PreSynthWav) -or (Get-Item $PreSynthWav).Length -eq 0) {
|
|
177
|
+
$PreSynthWav = $null
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
} catch {
|
|
183
|
+
$PreSynthWav = $null # degrade gracefully β will synthesize inside mutex instead
|
|
184
|
+
}
|
|
185
|
+
|
|
111
186
|
# --- Speak with queue serialization (named mutex, cross-process) ---
|
|
112
187
|
$mutex = New-Object System.Threading.Mutex($false, "AgentVibesPartyModeTTSQueue")
|
|
113
188
|
try {
|
|
@@ -122,9 +197,15 @@ try {
|
|
|
122
197
|
|
|
123
198
|
if ($acquired) {
|
|
124
199
|
try {
|
|
200
|
+
# Pass pre-synthesized WAV path so play-tts.ps1 skips synthesis (reduces gap between agents)
|
|
201
|
+
if ($PreSynthWav) { $env:AGENTVIBES_PRESYNTHESIZED_WAV = $PreSynthWav }
|
|
125
202
|
# Pass positional args directly after -File (spaces handled by quoting via array)
|
|
126
203
|
& powershell -NoProfile -ExecutionPolicy Bypass -File $BmadSpeak $AgentId $ResponseText
|
|
127
204
|
} finally {
|
|
205
|
+
$env:AGENTVIBES_PRESYNTHESIZED_WAV = ""
|
|
206
|
+
if ($PreSynthWav -and (Test-Path $PreSynthWav)) {
|
|
207
|
+
Remove-Item $PreSynthWav -Force -ErrorAction SilentlyContinue
|
|
208
|
+
}
|
|
128
209
|
$mutex.ReleaseMutex()
|
|
129
210
|
}
|
|
130
211
|
} else {
|
|
@@ -50,6 +50,7 @@ $VoiceMapGlobal = Join-Path $env:USERPROFILE ".agentvibes\bmad-voice-map.json"
|
|
|
50
50
|
$VoiceMapFile = if (Test-Path $VoiceMapLocal) { $VoiceMapLocal } else { $VoiceMapGlobal }
|
|
51
51
|
|
|
52
52
|
$AgentVoice = ""
|
|
53
|
+
$AgentPretext = ""
|
|
53
54
|
$AgentPersonality = ""
|
|
54
55
|
$AgentBgEnabled = $false
|
|
55
56
|
$AgentBgTrack = ""
|
|
@@ -72,24 +73,35 @@ if (Test-Path $_BgVolFile) {
|
|
|
72
73
|
$AgentBgVolume = "0.20"
|
|
73
74
|
}
|
|
74
75
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
76
|
+
# Resolve agent ID and display name/title from manifest (needed for default pretext)
|
|
77
|
+
$AgentDisplayName = ""
|
|
78
|
+
$AgentTitle = ""
|
|
78
79
|
|
|
79
|
-
|
|
80
|
+
if (Test-Path $ManifestFile) {
|
|
81
|
+
try {
|
|
80
82
|
$ManifestRows = Import-Csv $ManifestFile -Encoding UTF8
|
|
81
83
|
foreach ($row in $ManifestRows) {
|
|
82
84
|
$id = ($row.PSObject.Properties | Select-Object -First 1).Value -replace '^"|"$', ''
|
|
83
85
|
$display = ($row.PSObject.Properties | Select-Object -Skip 1 -First 1).Value -replace '^"|"$', ''
|
|
86
|
+
$title = ($row.PSObject.Properties | Select-Object -Skip 2 -First 1).Value -replace '^"|"$', ''
|
|
84
87
|
if ($id -ieq $AgentNameOrId -or $display -like "$AgentNameOrId*") {
|
|
85
|
-
$AgentId
|
|
88
|
+
$AgentId = $id
|
|
89
|
+
$AgentDisplayName = $display
|
|
90
|
+
$AgentTitle = $title
|
|
86
91
|
break
|
|
87
92
|
}
|
|
88
93
|
}
|
|
94
|
+
} catch { }
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
if (Test-Path $VoiceMapFile) {
|
|
98
|
+
try {
|
|
99
|
+
$VoiceMap = Get-Content $VoiceMapFile -Raw | ConvertFrom-Json
|
|
89
100
|
|
|
90
101
|
if ($AgentId -and $VoiceMap.agents.$AgentId) {
|
|
91
102
|
$Profile = $VoiceMap.agents.$AgentId
|
|
92
103
|
if ($Profile.voice) { $AgentVoice = $Profile.voice }
|
|
104
|
+
if ($Profile.pretext) { $AgentPretext = $Profile.pretext }
|
|
93
105
|
if ($Profile.personality) { $AgentPersonality = $Profile.personality }
|
|
94
106
|
if ($Profile.backgroundMusic) {
|
|
95
107
|
$AgentBgEnabled = [bool]$Profile.backgroundMusic.enabled
|
|
@@ -105,6 +117,16 @@ if (Test-Path $VoiceMapFile) {
|
|
|
105
117
|
}
|
|
106
118
|
}
|
|
107
119
|
|
|
120
|
+
# Fall back to default pretext if none stored: "DisplayName, Title here."
|
|
121
|
+
# Matches AgentVoiceStore.getDefaultPretext() in agent-voice-store.js
|
|
122
|
+
if (-not $AgentPretext -and $AgentDisplayName) {
|
|
123
|
+
if ($AgentTitle) {
|
|
124
|
+
$AgentPretext = "$AgentDisplayName, $AgentTitle here."
|
|
125
|
+
} else {
|
|
126
|
+
$AgentPretext = "$AgentDisplayName here."
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
108
130
|
# ---------------------------------------------------------------------------
|
|
109
131
|
# Locate play-tts.ps1 β prefer project-local, fall back to global
|
|
110
132
|
$PlayTtsLocal = Join-Path $ProjectRoot ".claude\hooks-windows\play-tts.ps1"
|
|
@@ -163,11 +185,14 @@ if ($AgentBgEnabled -and $AgentBgTrack) {
|
|
|
163
185
|
}
|
|
164
186
|
|
|
165
187
|
try {
|
|
188
|
+
# Prepend pretext if configured (e.g. "As your UX designer")
|
|
189
|
+
$SpeakText = if ($AgentPretext) { "$AgentPretext. $Dialogue" } else { $Dialogue }
|
|
190
|
+
|
|
166
191
|
# Speak with agent's voice (or global voice if none configured)
|
|
167
192
|
if ($AgentVoice) {
|
|
168
|
-
& powershell -NoProfile -ExecutionPolicy Bypass -File $PlayTtsScript $
|
|
193
|
+
& powershell -NoProfile -ExecutionPolicy Bypass -File $PlayTtsScript $SpeakText $AgentVoice
|
|
169
194
|
} else {
|
|
170
|
-
& powershell -NoProfile -ExecutionPolicy Bypass -File $PlayTtsScript $
|
|
195
|
+
& powershell -NoProfile -ExecutionPolicy Bypass -File $PlayTtsScript $SpeakText
|
|
171
196
|
}
|
|
172
197
|
} finally {
|
|
173
198
|
# Restore personality
|
|
@@ -70,15 +70,52 @@ elseif (Test-Path $VoiceFile) {
|
|
|
70
70
|
$VoiceName = (Get-Content $VoiceFile -Raw).Trim()
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
# Strip display name suffix (e.g. "en_US-libritts-high::
|
|
74
|
-
# and
|
|
73
|
+
# Strip display name suffix (e.g. "en_US-libritts-high::Holly-7" -> "en_US-libritts-high")
|
|
74
|
+
# and resolve the real Piper speaker index.
|
|
75
|
+
# IMPORTANT: The trailing number in a speaker name (e.g. "Holly-7") is a disambiguation
|
|
76
|
+
# suffix, NOT the speaker index. Real index must be looked up from voice-assignments.json.
|
|
75
77
|
if ($VoiceName -match '::') {
|
|
76
78
|
$parts = $VoiceName -split '::'
|
|
77
79
|
$VoiceName = $parts[0]
|
|
78
|
-
if ($parts.Length -ge 2
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
80
|
+
$SpeakerName = if ($parts.Length -ge 2) { $parts[1] } else { "" }
|
|
81
|
+
Remove-Item env:PIPER_SPEAKER -ErrorAction SilentlyContinue
|
|
82
|
+
|
|
83
|
+
if ($SpeakerName) {
|
|
84
|
+
# Primary: look up in voice-assignments.json catalog (libritts_speakers keyed by speaker index)
|
|
85
|
+
# Derive project root from this script's location: .claude/hooks-windows/ -> project root
|
|
86
|
+
$PiperScriptRoot = Split-Path -Parent $MyInvocation.MyCommand.Path
|
|
87
|
+
$PiperProjectRoot = Split-Path -Parent (Split-Path -Parent $PiperScriptRoot)
|
|
88
|
+
$VoiceAssignmentsPath = Join-Path $PiperProjectRoot "voice-assignments.json"
|
|
89
|
+
# Fallback: global AgentVibes install if not found in project
|
|
90
|
+
if (-not (Test-Path $VoiceAssignmentsPath)) {
|
|
91
|
+
$VoiceAssignmentsPath = Join-Path $env:USERPROFILE "AgentVibes\voice-assignments.json"
|
|
92
|
+
}
|
|
93
|
+
$SpeakerResolved = $false
|
|
94
|
+
if (Test-Path $VoiceAssignmentsPath) {
|
|
95
|
+
try {
|
|
96
|
+
$vaData = Get-Content $VoiceAssignmentsPath -Raw | ConvertFrom-Json
|
|
97
|
+
foreach ($prop in $vaData.libritts_speakers.PSObject.Properties) {
|
|
98
|
+
if ($prop.Value.voice_name -eq $SpeakerName) {
|
|
99
|
+
$env:PIPER_SPEAKER = $prop.Name
|
|
100
|
+
$SpeakerResolved = $true
|
|
101
|
+
break
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
} catch { }
|
|
105
|
+
}
|
|
106
|
+
# Fallback: check patched speaker_id_map in the .onnx.json
|
|
107
|
+
if (-not $SpeakerResolved) {
|
|
108
|
+
$OnnxJsonPath = "$VoicesDir\$VoiceName.onnx.json"
|
|
109
|
+
if (Test-Path $OnnxJsonPath) {
|
|
110
|
+
try {
|
|
111
|
+
$onnxData = Get-Content $OnnxJsonPath -Raw | ConvertFrom-Json
|
|
112
|
+
$speakerIdMap = $onnxData.speaker_id_map
|
|
113
|
+
if ($speakerIdMap -and $speakerIdMap.PSObject.Properties[$SpeakerName]) {
|
|
114
|
+
$env:PIPER_SPEAKER = [string]$speakerIdMap.PSObject.Properties[$SpeakerName].Value
|
|
115
|
+
}
|
|
116
|
+
} catch { }
|
|
117
|
+
}
|
|
118
|
+
}
|
|
82
119
|
}
|
|
83
120
|
} else {
|
|
84
121
|
# No multi-speaker syntax β clear any stale speaker env var
|
|
@@ -128,58 +128,85 @@ if ($BgEnabled -or $HasReverb) {
|
|
|
128
128
|
}
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
+
# Check for pre-synthesized WAV (party mode optimization β synthesis done before mutex acquisition)
|
|
132
|
+
$PreSynthWav = $env:AGENTVIBES_PRESYNTHESIZED_WAV
|
|
133
|
+
$UsePreSynth = $PreSynthWav -and (Test-Path $PreSynthWav) -and
|
|
134
|
+
(Get-Item $PreSynthWav -ErrorAction SilentlyContinue).Length -gt 0
|
|
135
|
+
|
|
131
136
|
# If background music or reverb enabled and ffmpeg available, tell provider to skip playback
|
|
132
137
|
if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
|
|
133
138
|
$env:AGENTVIBES_NO_PLAY = "1"
|
|
134
139
|
}
|
|
135
140
|
|
|
136
|
-
# Call the provider script
|
|
141
|
+
# Call the provider script (skip if using pre-synthesized audio)
|
|
137
142
|
# When post-processing (reverb/music), capture output preserving InformationRecord colors.
|
|
138
143
|
# Otherwise call directly so Write-Host colors pass through to the terminal.
|
|
139
144
|
$NeedsPostProcess = ($BgEnabled -or $HasReverb) -and $HasFfmpeg
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
145
|
+
if ($UsePreSynth) {
|
|
146
|
+
Write-Host "[SYNTH] Using pre-synthesized audio..." -ForegroundColor Cyan
|
|
147
|
+
# If no post-processing needed, play the pre-synth file directly and exit
|
|
148
|
+
if (-not $NeedsPostProcess) {
|
|
149
|
+
$player = $null
|
|
150
|
+
try {
|
|
151
|
+
$player = New-Object System.Media.SoundPlayer $PreSynthWav
|
|
152
|
+
$player.PlaySync()
|
|
153
|
+
} catch {
|
|
154
|
+
Write-Host "[WARNING] Pre-synth playback failed: $_" -ForegroundColor Yellow
|
|
155
|
+
} finally {
|
|
156
|
+
if ($player) { $player.Dispose() }
|
|
146
157
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
158
|
+
Remove-Item env:AGENTVIBES_NO_PLAY -ErrorAction SilentlyContinue
|
|
159
|
+
exit 0
|
|
160
|
+
}
|
|
161
|
+
} else {
|
|
162
|
+
try {
|
|
163
|
+
if ($NeedsPostProcess) {
|
|
164
|
+
if ($VoiceOverride) {
|
|
165
|
+
$providerOutput = & $ProviderScript $Text $VoiceOverride 6>&1 2>&1
|
|
166
|
+
} else {
|
|
167
|
+
$providerOutput = & $ProviderScript $Text 6>&1 2>&1
|
|
168
|
+
}
|
|
169
|
+
# Re-emit preserving colors from InformationRecords (Write-Host output)
|
|
170
|
+
foreach ($item in $providerOutput) {
|
|
171
|
+
if ($item -is [System.Management.Automation.InformationRecord]) {
|
|
172
|
+
$msg = $item.MessageData
|
|
173
|
+
if ($msg -is [System.Management.Automation.HostInformationMessage]) {
|
|
174
|
+
Write-Host $msg.Message -ForegroundColor $msg.ForegroundColor -NoNewline:$msg.NoNewLine
|
|
175
|
+
if (-not $msg.NoNewLine) { Write-Host }
|
|
176
|
+
} else {
|
|
177
|
+
Write-Host "$item"
|
|
178
|
+
}
|
|
154
179
|
} else {
|
|
155
180
|
Write-Host "$item"
|
|
156
181
|
}
|
|
157
|
-
} else {
|
|
158
|
-
Write-Host "$item"
|
|
159
182
|
}
|
|
160
|
-
}
|
|
161
|
-
} else {
|
|
162
|
-
if ($VoiceOverride) {
|
|
163
|
-
& $ProviderScript $Text $VoiceOverride
|
|
164
183
|
} else {
|
|
165
|
-
|
|
184
|
+
if ($VoiceOverride) {
|
|
185
|
+
& $ProviderScript $Text $VoiceOverride
|
|
186
|
+
} else {
|
|
187
|
+
& $ProviderScript $Text
|
|
188
|
+
}
|
|
166
189
|
}
|
|
167
190
|
}
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
191
|
+
catch {
|
|
192
|
+
Write-Host "[ERROR] TTS Error: $_" -ForegroundColor Red
|
|
193
|
+
Remove-Item env:AGENTVIBES_NO_PLAY -ErrorAction SilentlyContinue
|
|
194
|
+
exit 1
|
|
195
|
+
}
|
|
173
196
|
}
|
|
174
197
|
|
|
175
198
|
# Apply reverb and/or mix with background music
|
|
176
199
|
if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
|
|
177
200
|
Remove-Item env:AGENTVIBES_NO_PLAY -ErrorAction SilentlyContinue
|
|
178
201
|
|
|
179
|
-
# Find the
|
|
202
|
+
# Find the WAV to post-process: use pre-synthesized file if available, else most recent
|
|
180
203
|
$AudioDir = "$ClaudeDir\audio"
|
|
181
|
-
$RecentWav =
|
|
182
|
-
|
|
204
|
+
$RecentWav = if ($UsePreSynth) {
|
|
205
|
+
Get-Item $PreSynthWav -ErrorAction SilentlyContinue
|
|
206
|
+
} else {
|
|
207
|
+
Get-ChildItem -Path $AudioDir -Filter "tts-*.wav" -ErrorAction SilentlyContinue |
|
|
208
|
+
Sort-Object LastWriteTime -Descending | Select-Object -First 1
|
|
209
|
+
}
|
|
183
210
|
|
|
184
211
|
if ($RecentWav -and $RecentWav.Length -gt 0) {
|
|
185
212
|
$voicePath = $RecentWav.FullName
|
|
@@ -282,7 +309,7 @@ if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
|
|
|
282
309
|
$fadeOutStart = $totalDuration - 2
|
|
283
310
|
|
|
284
311
|
# Filter: music fades in 0.5s, voice delayed 2s, music fades out last 2s
|
|
285
|
-
$filter = "[0:a]volume=${BgVolume},afade=t=in:d=0.5,afade=t=out:st=${fadeOutStart}:d=2[bg];[1:a]adelay=
|
|
312
|
+
$filter = "[0:a]volume=${BgVolume},afade=t=in:d=0.5,afade=t=out:st=${fadeOutStart}:d=2[bg];[1:a]adelay=1000|1000,volume=1.5,apad=pad_dur=2[voice];[bg][voice]amix=inputs=2:duration=longest:dropout_transition=2:normalize=0[out]"
|
|
286
313
|
|
|
287
314
|
# Run ffmpeg - use Start-Process to avoid stderr issues with $ErrorActionPreference
|
|
288
315
|
$ffmpegArgs = "-y -stream_loop -1 -i `"$BgTrackPath`" -i `"$voicePath`" -filter_complex `"$filter`" -map `"[out]`" -t $totalDuration `"$MixedFile`""
|
package/.mcp.json
CHANGED
package/README.md
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
[](https://github.com/paulpreibisch/AgentVibes/actions/workflows/publish.yml)
|
|
12
12
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
13
13
|
|
|
14
|
-
**Author**: Paul Preibisch ([@997Fire](https://x.com/997Fire)) | **Version**: v4.
|
|
14
|
+
**Author**: Paul Preibisch ([@997Fire](https://x.com/997Fire)) | **Version**: v4.6.2
|
|
15
15
|
|
|
16
16
|
---
|
|
17
17
|
|
|
@@ -40,7 +40,69 @@ Whether you're coding in Claude Code, chatting in Claude Desktop, or running Ope
|
|
|
40
40
|
|
|
41
41
|
---
|
|
42
42
|
|
|
43
|
-
##
|
|
43
|
+
## π NEW IN v4.6.2 β Party Mode Voices, LibriTTS Speaker Fix, Agent Pretext
|
|
44
|
+
|
|
45
|
+
- **Party mode agents now speak in their unique voices** β SKILL.md wired to `bmad-speak.ps1` per agent
|
|
46
|
+
- **LibriTTS speaker IDs resolved correctly** β `Holly-7` is speaker 322, not 7
|
|
47
|
+
- **Agent pretext spoken on Windows** β "Mary, Business Analyst here." before every response
|
|
48
|
+
- **`parseMultiSpeaker` fallback** β works on fresh installs before `.onnx.json` is patched
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## π NEW IN v4.6.1 β Party Mode Voice Clarity + Agent Config UI Polish
|
|
53
|
+
|
|
54
|
+
### π Voice Volume Fixed in Party Mode
|
|
55
|
+
|
|
56
|
+
- **`normalize=0`** added to ffmpeg `amix` β prevents voices being silenced to 50% when mixed with background music
|
|
57
|
+
- **Voice boost `volume=1.5`** applied to every TTS stream β agents are now loud and clear
|
|
58
|
+
- **Music intro reduced to 1 second** (`adelay=1000`) β less dead air before each agent speaks
|
|
59
|
+
- **Pre-synthesis gap reduction** β WAV files are generated *before* acquiring the mutex, so synthesis overlaps with the previous agent's playback (gap drops from ~4β6s to ~1s)
|
|
60
|
+
|
|
61
|
+
### ποΈ BMAD Agent Config β Preview + Split Fields
|
|
62
|
+
|
|
63
|
+
- **Music Track** and **Music Vol** are now separate fields in the agent editor β each opens its own dialog
|
|
64
|
+
- **Preview button** plays the selected voice with full effects: personality, reverb, background music track and volume
|
|
65
|
+
- **Blinking indicator** (`βΊβ`) highlights the focused button β reuses the shared `attachBtnBlink` utility
|
|
66
|
+
- **Preview spinner** animates while audio is playing
|
|
67
|
+
- **TabβSave hint** shown in the volume input dialog
|
|
68
|
+
|
|
69
|
+
### π» Voice Gender Auto-Assign Fixed
|
|
70
|
+
|
|
71
|
+
- `inferGender` now strips the numeric suffix from LibriTTS speaker names (e.g. `anna-9` β `anna`) before looking up gender
|
|
72
|
+
- Expanded `GENDER_MAP` with 60+ first names covering all bundled voices
|
|
73
|
+
- `libritts` blanket-male override removed β LibriTTS voices are now inferred per-name
|
|
74
|
+
|
|
75
|
+
### π Other Fixes
|
|
76
|
+
|
|
77
|
+
- Volume dialog text now uses `cyan`/`white` β no more invisible-on-dark-background instructions
|
|
78
|
+
- After saving agent settings, focus correctly returns to the agent list (Enter re-opens the agent)
|
|
79
|
+
- Boundary navigation in agent fields no longer jumps to buttons prematurely
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## π NEW IN v4.6 β Party Mode Auto-Install + Volume Fix
|
|
84
|
+
|
|
85
|
+
### π BMAD Party Mode TTS β Zero Setup
|
|
86
|
+
|
|
87
|
+
Every agent now speaks automatically in any BMAD project β no manual hook configuration needed:
|
|
88
|
+
|
|
89
|
+
- Installer copies `bmad-party-speak.sh` (Linux/macOS/WSL) or `bmad-party-speak.ps1` (Windows) to `~/.claude/hooks/`
|
|
90
|
+
- `PostToolUse` hook registered in `~/.claude/settings.json` automatically
|
|
91
|
+
- `npx agentvibes update` keeps the scripts fresh across all platforms
|
|
92
|
+
|
|
93
|
+
### π Background Music Volume Default: 20%
|
|
94
|
+
|
|
95
|
+
All volume defaults lowered from 70% to 20% β new installs and agents start at a sensible level. `bmad-speak` scripts now inherit the global volume setting instead of ignoring it.
|
|
96
|
+
|
|
97
|
+
### π Installer Navigation Fix
|
|
98
|
+
|
|
99
|
+
Pressing β on the completion screen no longer jumps back to the installation step.
|
|
100
|
+
|
|
101
|
+
### π§ͺ 628 Tests, Zero Failures
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## π v4.5 β "Speak Every Language" Release
|
|
44
106
|
|
|
45
107
|
### π Multilingual TUI β 9 Languages
|
|
46
108
|
|
package/RELEASE_NOTES.md
CHANGED
|
@@ -1,5 +1,46 @@
|
|
|
1
1
|
# AgentVibes Release Notes
|
|
2
2
|
|
|
3
|
+
## π v4.6.3 β Patch Release
|
|
4
|
+
|
|
5
|
+
**Release Date:** April 2026
|
|
6
|
+
|
|
7
|
+
### Bug Fixes
|
|
8
|
+
|
|
9
|
+
- **Party mode pre-synthesis uses wrong speaker for every agent** β `bmad-party-speak.ps1` extracted the trailing number from the display name suffix (e.g. `14` from `Yara-14`) and passed it directly as the piper `--speaker` index. The display suffix is a human-readable disambiguator, not the model index. `Yara-14` is speaker 860, but the bug caused piper to speak as speaker 14 ("Ivy") instead. Every configured agent was silently playing a completely different voice. The fix looks up the full speaker name in `speaker_id_map` from the `.onnx.json` file, matching what `play-tts-piper.ps1` already does correctly. Fixes [#165](https://github.com/paulpreibisch/AgentVibes/issues/165).
|
|
10
|
+
|
|
11
|
+
### Testing
|
|
12
|
+
|
|
13
|
+
- **New cross-platform test: `bmad-party-speak-speaker-id.test.js`** β 15 tests covering correct `speaker_id_map` lookup for all 8 configured agents, plain names without suffixes (e.g. `Evan`), missing model graceful degradation, and a regression suite that verifies the correct index disagrees with the naive suffix extraction for every agent.
|
|
14
|
+
|
|
15
|
+
### User Impact
|
|
16
|
+
|
|
17
|
+
- Party mode agents now speak with their correct configured voices (the voices shown in the BMAD Agents tab)
|
|
18
|
+
- No configuration changes needed β the fix is automatic
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## π v4.6.2 β Patch Release
|
|
23
|
+
|
|
24
|
+
**Release Date:** April 2026
|
|
25
|
+
|
|
26
|
+
### Bug Fixes
|
|
27
|
+
|
|
28
|
+
- **BMAD party mode: agents now speak with their unique voices** β The party mode `SKILL.md` was missing TTS wiring entirely. It now creates `.bmad-agent-context` on activation, calls `bmad-speak.ps1` sequentially per agent after each round, and cleans up on exit. When BMAD and AgentVibes are both installed, AgentVibes' skill now correctly overrides the BMAD version.
|
|
29
|
+
|
|
30
|
+
- **LibriTTS speaker IDs resolved correctly on Windows** β `play-tts-piper.ps1` was extracting the speaker index via a regex on the voice name suffix (e.g. `Holly-7` β `7`). That suffix is a disambiguation counter, not the Piper speaker index. `Holly-7` is actually speaker 322. The script now looks up the real index from `voice-assignments.json`, with a fallback to the patched `.onnx.json`.
|
|
31
|
+
|
|
32
|
+
- **LibriTTS `parseMultiSpeaker` fallback for unpatched models** β `voices-tab.js` now falls back to `voice-assignments.json` when the `.onnx.json` speaker map hasn't been patched yet with friendly names, preventing silent fallback to speaker 0 (often male) on fresh installs.
|
|
33
|
+
|
|
34
|
+
- **Agent pretext spoken on Windows** β `bmad-speak.ps1` never read or applied the agent's configured pretext. It now reads `pretext` from the voice map profile, and falls back to the default `"DisplayName, Title here."` computed from the agent manifest β matching the behaviour of `bmad-speak.sh` and `AgentVoiceStore.getDefaultPretext()`.
|
|
35
|
+
|
|
36
|
+
### User Impact
|
|
37
|
+
|
|
38
|
+
- Party mode agents will introduce themselves by role before speaking and use their individually configured voices throughout the conversation
|
|
39
|
+
- LibriTTS multi-speaker voices now reliably map to the correct speaker on first install (no manual patching required)
|
|
40
|
+
- No breaking changes β all fixes are silent fallbacks or missing behaviours being added
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
3
44
|
## β¨ v4.6.0 β Minor Release
|
|
4
45
|
|
|
5
46
|
**Release Date:** April 2026
|