agentvibes 5.1.3 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agentvibes/config.json +23 -13
- package/.claude/commands/agent-vibes/verbosity.md +98 -89
- package/.claude/config/audio-effects.cfg +6 -1
- package/.claude/hooks/bmad-speak.sh +2 -2
- package/.claude/hooks/piper-download-voices.sh +233 -225
- package/.claude/hooks/piper-installer.sh +1 -1
- package/.claude/hooks/piper-voice-manager.sh +125 -0
- package/.claude/hooks/play-tts-agentvibes-receiver-for-voiceless-connections.sh +97 -90
- package/.claude/hooks/play-tts-enhanced.sh +1 -1
- package/.claude/hooks/play-tts-piper.sh +16 -5
- package/.claude/hooks/play-tts-ssh-remote.sh +168 -167
- package/.claude/hooks/play-tts.sh +31 -9
- package/.claude/hooks/session-start-tts.sh +4 -1
- package/.claude/hooks/stop-tts.sh +1 -1
- package/.claude/hooks/verbosity-manager.sh +185 -178
- package/.claude/hooks-windows/download-extra-voices.ps1 +243 -185
- package/.claude/hooks-windows/play-tts-piper.ps1 +7 -2
- package/.claude/hooks-windows/play-tts.ps1 +219 -65
- package/.claude/hooks-windows/session-start-tts.ps1 +2 -1
- package/.claude/hooks-windows/verbosity-manager.ps1 +126 -119
- package/README.md +24 -1
- package/RELEASE_NOTES.md +113 -0
- package/bin/agentvibes-voice-browser.js +1939 -1840
- package/mcp-server/server.py +75 -25
- package/package.json +1 -1
- package/src/console/tabs/receiver-tab.js +1527 -1483
- package/src/console/tabs/settings-tab.js +2 -2
- package/src/console/tabs/setup-tab.js +122 -20
- package/src/console/tabs/voices-tab.js +130 -13
- package/src/i18n/en.js +202 -202
- package/src/installer.js +29 -25
- package/src/services/llm-provider-service.js +114 -11
- package/src/services/verbosity-service.js +159 -157
- package/templates/agentvibes-receiver.sh +3 -2
|
@@ -17,18 +17,141 @@ param(
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
# Security: Validate LLM provider name (alphanumeric, hyphens, underscores
|
|
20
|
-
# only)
|
|
20
|
+
# only) -- mirrors play-tts.sh line 92. This prevents weird values from
|
|
21
21
|
# poisoning the audio-effects.cfg lookup or the AGENTVIBES_LLM_KEY env var
|
|
22
22
|
# we export to child scripts. An invalid value is treated as unset rather
|
|
23
23
|
# than aborting, so the script falls back to the default config and the
|
|
24
24
|
# rest of TTS still works.
|
|
25
25
|
if ($llm -and $llm -notmatch '^[a-zA-Z0-9_-]+$') {
|
|
26
|
-
Write-Error "Invalid LLM provider name: '
|
|
26
|
+
Write-Error ("Invalid LLM provider name: '{0}' - must match {1}. Falling back to default config." -f $llm, '^[a-zA-Z0-9_-]+$')
|
|
27
27
|
$llm = ""
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
+
# When no -llm is supplied, route through the "default" pseudo-LLM so the
|
|
31
|
+
# user-managed `llm:default` row in audio-effects.cfg becomes the global
|
|
32
|
+
# fallback for voice / pretext / music / effects. This is configured via
|
|
33
|
+
# Setup -> Default -> Configure in the TUI. If `llm:default` doesn't exist,
|
|
34
|
+
# the lookup will return empty and the script falls through to the
|
|
35
|
+
# legacy global config chain (project / user .agentvibes/config.json).
|
|
36
|
+
if (-not $llm) {
|
|
37
|
+
$llm = "default"
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
# --- Cross-process playback serialization ---
|
|
41
|
+
# Without this, any two callers of play-tts.ps1 (Claude Code PostToolUse hook,
|
|
42
|
+
# Codex MCP text_to_speech, Copilot MCP text_to_speech, direct CLI) race each
|
|
43
|
+
# other and produce overlapping / interleaved audio. Party mode already has
|
|
44
|
+
# its own mutex (AgentVibesPartyModeTTSQueue) at the bmad-party-speak.ps1
|
|
45
|
+
# level, but MCP-initiated calls bypass it entirely.
|
|
46
|
+
#
|
|
47
|
+
# We use a DIFFERENT mutex name ("AgentVibesPlaybackLock") so there's no
|
|
48
|
+
# deadlock risk with the party-mode mutex -- they can be held independently
|
|
49
|
+
# by nested processes.
|
|
50
|
+
#
|
|
51
|
+
# The mutex is acquired immediately before PlaySync() and released right
|
|
52
|
+
# after, so CPU-bound synthesis/ffmpeg work can overlap with another
|
|
53
|
+
# process's playback.
|
|
54
|
+
$_PlaybackMutex = New-Object System.Threading.Mutex($false, "AgentVibesPlaybackLock")
|
|
55
|
+
|
|
56
|
+
# --- Script-level watchdog ---
|
|
57
|
+
# If anything in this script hangs (SoundPlayer deadlock, audio device
|
|
58
|
+
# locked, ffmpeg stuck, etc.), a sibling PowerShell job waits 25 seconds
|
|
59
|
+
# and force-kills this process. Without this, a stuck play-tts.ps1 holds
|
|
60
|
+
# the playback mutex forever and silently blocks every subsequent TTS
|
|
61
|
+
# call across all LLMs. The watchdog guarantees forward progress.
|
|
62
|
+
#
|
|
63
|
+
# 25s is chosen to be LONGER than the mutex timeout (15s) but SHORT
|
|
64
|
+
# enough that a stuck process clears before the user's next turn. If
|
|
65
|
+
# you fire two calls per turn and the first is stuck, the watchdog kills
|
|
66
|
+
# it before the second turn arrives so the audio subsystem recovers
|
|
67
|
+
# without manual intervention. Long legitimate messages (>25s of speech)
|
|
68
|
+
# are rare at default verbosity levels; when they do occur the watchdog
|
|
69
|
+
# kills playback mid-sentence, which is acceptable degradation vs. a
|
|
70
|
+
# deadlocked queue.
|
|
71
|
+
$_WatchdogJob = $null
|
|
72
|
+
try {
|
|
73
|
+
$_WatchdogJob = Start-Job -ArgumentList $PID -ScriptBlock {
|
|
74
|
+
param($parentPid)
|
|
75
|
+
Start-Sleep -Seconds 25
|
|
76
|
+
try {
|
|
77
|
+
# Only kill if still alive -- harmless if already exited
|
|
78
|
+
$p = Get-Process -Id $parentPid -ErrorAction SilentlyContinue
|
|
79
|
+
if ($p) {
|
|
80
|
+
[Console]::Error.WriteLine("[AgentVibes] play-tts.ps1 watchdog fired -- force-killing pid $parentPid after 25s")
|
|
81
|
+
Stop-Process -Id $parentPid -Force -ErrorAction SilentlyContinue
|
|
82
|
+
}
|
|
83
|
+
} catch { }
|
|
84
|
+
}
|
|
85
|
+
} catch {
|
|
86
|
+
# If Start-Job fails (rare), just continue without the watchdog -- no
|
|
87
|
+
# regression from pre-watchdog behavior.
|
|
88
|
+
$_WatchdogJob = $null
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function Invoke-SerializedPlay {
|
|
92
|
+
param([Parameter(Mandatory)][string]$WavPath)
|
|
93
|
+
$acquired = $false
|
|
94
|
+
try {
|
|
95
|
+
try {
|
|
96
|
+
# 15s timeout to acquire the playback mutex. If we can't get
|
|
97
|
+
# it in 15s, the holder is almost certainly a stuck/crashed
|
|
98
|
+
# prior run. AbandonedMutexException means the holder's
|
|
99
|
+
# process actually died -- we inherit ownership.
|
|
100
|
+
$acquired = $_PlaybackMutex.WaitOne(15000)
|
|
101
|
+
} catch [System.Threading.AbandonedMutexException] {
|
|
102
|
+
$acquired = $true
|
|
103
|
+
}
|
|
104
|
+
if (-not $acquired) {
|
|
105
|
+
# Self-heal: kill any stuck play-tts.ps1 processes (other than
|
|
106
|
+
# ourselves) that have been alive longer than 20 seconds. This
|
|
107
|
+
# frees the mutex so the NEXT call can succeed without the user
|
|
108
|
+
# running taskkill manually. We still exit with code 2 because
|
|
109
|
+
# this call's audio is lost, but the queue recovers immediately.
|
|
110
|
+
try {
|
|
111
|
+
$myPid = $PID
|
|
112
|
+
$cutoff = (Get-Date).AddSeconds(-20)
|
|
113
|
+
$stuck = Get-CimInstance Win32_Process -ErrorAction SilentlyContinue |
|
|
114
|
+
Where-Object {
|
|
115
|
+
$_.Name -eq 'powershell.exe' -and
|
|
116
|
+
$_.ProcessId -ne $myPid -and
|
|
117
|
+
$_.CommandLine -like '*play-tts.ps1*' -and
|
|
118
|
+
$_.CreationDate -lt $cutoff
|
|
119
|
+
}
|
|
120
|
+
foreach ($p in $stuck) {
|
|
121
|
+
[Console]::Error.WriteLine("[AgentVibes] Self-heal: killing stuck play-tts.ps1 pid $($p.ProcessId) (alive since $($p.CreationDate))")
|
|
122
|
+
Stop-Process -Id $p.ProcessId -Force -ErrorAction SilentlyContinue
|
|
123
|
+
}
|
|
124
|
+
} catch { }
|
|
125
|
+
[Console]::Error.WriteLine("[AgentVibes] ERROR: play-tts.ps1 could not acquire playback mutex within 15s. A prior play-tts.ps1 process was stuck holding it and has been killed; the next TTS call should succeed.")
|
|
126
|
+
exit 2
|
|
127
|
+
}
|
|
128
|
+
$player = $null
|
|
129
|
+
try {
|
|
130
|
+
$player = New-Object System.Media.SoundPlayer $WavPath
|
|
131
|
+
$player.PlaySync()
|
|
132
|
+
} finally {
|
|
133
|
+
if ($player) { $player.Dispose() }
|
|
134
|
+
}
|
|
135
|
+
} finally {
|
|
136
|
+
if ($acquired) {
|
|
137
|
+
try { $_PlaybackMutex.ReleaseMutex() } catch { }
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
# Register an exit handler that stops the watchdog job on normal exit so
|
|
143
|
+
# it doesn't fire on successful short runs.
|
|
144
|
+
Register-EngineEvent -SourceIdentifier PowerShell.Exiting -Action {
|
|
145
|
+
try {
|
|
146
|
+
if ($_WatchdogJob) {
|
|
147
|
+
Stop-Job -Job $_WatchdogJob -ErrorAction SilentlyContinue
|
|
148
|
+
Remove-Job -Job $_WatchdogJob -Force -ErrorAction SilentlyContinue
|
|
149
|
+
}
|
|
150
|
+
} catch { }
|
|
151
|
+
} | Out-Null
|
|
152
|
+
|
|
30
153
|
# Configuration paths
|
|
31
|
-
# Priority: CLAUDE_PROJECT_DIR env var
|
|
154
|
+
# Priority: CLAUDE_PROJECT_DIR env var -> script's parent project -> user profile
|
|
32
155
|
# Local project settings ALWAYS override global (~/.claude)
|
|
33
156
|
$ScriptPath = Split-Path -Parent $MyInvocation.MyCommand.Path
|
|
34
157
|
|
|
@@ -58,16 +181,19 @@ if (Test-Path $MuteFile) {
|
|
|
58
181
|
}
|
|
59
182
|
|
|
60
183
|
# Per-LLM config lookup: if --llm is passed, look up llm:<name> in audio-effects.cfg
|
|
61
|
-
# Format: llm:<name>|
|
|
184
|
+
# Format: llm:<name>|REVERB|BG_FILE|BG_VOLUME|VOICE|PRETEXT|ENGINE
|
|
62
185
|
$LlmVoice = ""
|
|
63
186
|
$LlmPretext = ""
|
|
64
187
|
$LlmReverb = ""
|
|
65
188
|
$LlmEngine = ""
|
|
189
|
+
$LlmBgTrack = ""
|
|
190
|
+
$LlmBgVolume = ""
|
|
66
191
|
$ProjectRoot = Split-Path -Parent $ClaudeDir
|
|
67
192
|
$ConfigDir = "$ClaudeDir\config"
|
|
68
193
|
|
|
69
194
|
if ($llm) {
|
|
70
195
|
$llmKey = "llm:$llm"
|
|
196
|
+
$llmKeyPattern = '^' + [regex]::Escape($llmKey) + '\|'
|
|
71
197
|
# Check project-local audio-effects.cfg first, then global
|
|
72
198
|
$cfgPaths = @(
|
|
73
199
|
"$ConfigDir\audio-effects.cfg",
|
|
@@ -76,12 +202,18 @@ if ($llm) {
|
|
|
76
202
|
foreach ($cfgPath in $cfgPaths) {
|
|
77
203
|
if (-not $LlmVoice -and -not $LlmPretext -and (Test-Path $cfgPath)) {
|
|
78
204
|
foreach ($line in (Get-Content $cfgPath)) {
|
|
79
|
-
if ($line -match
|
|
205
|
+
if ($line -match $llmKeyPattern) {
|
|
80
206
|
$parts = $line -split '\|'
|
|
81
|
-
# parts: [0]=key [1]=
|
|
207
|
+
# parts: [0]=key [1]=reverb [2]=bg_file [3]=bg_vol [4]=voice [5]=pretext [6]=engine
|
|
82
208
|
if ($parts.Length -ge 2 -and $parts[1].Trim()) {
|
|
83
209
|
$LlmReverb = $parts[1].Trim()
|
|
84
210
|
}
|
|
211
|
+
if ($parts.Length -ge 3 -and $parts[2].Trim()) {
|
|
212
|
+
$LlmBgTrack = $parts[2].Trim()
|
|
213
|
+
}
|
|
214
|
+
if ($parts.Length -ge 4 -and $parts[3].Trim()) {
|
|
215
|
+
$LlmBgVolume = $parts[3].Trim()
|
|
216
|
+
}
|
|
85
217
|
if ($parts.Length -ge 5 -and $parts[4].Trim()) {
|
|
86
218
|
$LlmVoice = $parts[4].Trim()
|
|
87
219
|
}
|
|
@@ -96,8 +228,21 @@ if ($llm) {
|
|
|
96
228
|
}
|
|
97
229
|
}
|
|
98
230
|
}
|
|
99
|
-
#
|
|
100
|
-
|
|
231
|
+
# LLM per-LLM voice routing.
|
|
232
|
+
#
|
|
233
|
+
# PRIORITY CHANGE: when -llm is passed AND the llm row has a voice,
|
|
234
|
+
# the per-LLM voice always wins — even over an explicit VoiceOverride
|
|
235
|
+
# parameter passed by the MCP caller. Rationale: Codex / Copilot /
|
|
236
|
+
# Claude Code all call `get_config` at session start and then echo
|
|
237
|
+
# the global voice back on every `text_to_speech` call. With the
|
|
238
|
+
# old "explicit wins" priority, that global voice overrode our
|
|
239
|
+
# per-LLM routing and broke the entire point of having llm:<key>
|
|
240
|
+
# rows in audio-effects.cfg.
|
|
241
|
+
#
|
|
242
|
+
# To request a specific voice for a specific call that bypasses the
|
|
243
|
+
# LLM routing, the caller should NOT pass -llm, or should use the
|
|
244
|
+
# `llm:default` row (which has no voice column to override).
|
|
245
|
+
if ($LlmVoice) {
|
|
101
246
|
$VoiceOverride = $LlmVoice
|
|
102
247
|
}
|
|
103
248
|
# Export LLM key for child scripts (process-local, not system-wide)
|
|
@@ -105,8 +250,8 @@ if ($llm) {
|
|
|
105
250
|
}
|
|
106
251
|
|
|
107
252
|
# Prepend pretext if configured
|
|
108
|
-
# Priority: LLM-specific pretext
|
|
109
|
-
#
|
|
253
|
+
# Priority: LLM-specific pretext -> project .agentvibes/config.json -> project .claude/config/tts-pretext.txt
|
|
254
|
+
# -> global ~/.agentvibes/config.json -> global ~/.claude/config/tts-pretext.txt
|
|
110
255
|
$Pretext = $LlmPretext
|
|
111
256
|
if (-not $Pretext) {
|
|
112
257
|
$PretextSources = @(
|
|
@@ -132,7 +277,6 @@ if (-not $Pretext) {
|
|
|
132
277
|
if ($Pretext) {
|
|
133
278
|
$Text = "$Pretext, $Text"
|
|
134
279
|
}
|
|
135
|
-
|
|
136
280
|
# Determine active provider
|
|
137
281
|
# LLM-specific engine overrides global provider
|
|
138
282
|
$ActiveProvider = "sapi"
|
|
@@ -194,6 +338,15 @@ if (Test-Path $AgentVibesConfig) {
|
|
|
194
338
|
}
|
|
195
339
|
}
|
|
196
340
|
|
|
341
|
+
# When a per-LLM row in audio-effects.cfg has a background track configured,
|
|
342
|
+
# that's an implicit "bg music enabled for this LLM" — force it on regardless
|
|
343
|
+
# of the global backgroundMusic.enabled flag. Without this, setting a per-LLM
|
|
344
|
+
# track in the TUI's Configure modal would have no effect unless the user
|
|
345
|
+
# ALSO toggled global bg music on.
|
|
346
|
+
if ($LlmBgTrack) {
|
|
347
|
+
$BgEnabled = $true
|
|
348
|
+
}
|
|
349
|
+
|
|
197
350
|
# Check if reverb is enabled (allowlist validation)
|
|
198
351
|
# LLM-specific reverb overrides global setting
|
|
199
352
|
$ReverbLevel = "off"
|
|
@@ -227,7 +380,7 @@ if ($BgEnabled -or $HasReverb) {
|
|
|
227
380
|
}
|
|
228
381
|
}
|
|
229
382
|
|
|
230
|
-
# Check for pre-synthesized WAV (party mode optimization
|
|
383
|
+
# Check for pre-synthesized WAV (party mode optimization -- synthesis done before mutex acquisition)
|
|
231
384
|
$PreSynthWav = $env:AGENTVIBES_PRESYNTHESIZED_WAV
|
|
232
385
|
$UsePreSynth = $PreSynthWav -and (Test-Path $PreSynthWav) -and
|
|
233
386
|
(Get-Item $PreSynthWav -ErrorAction SilentlyContinue).Length -gt 0
|
|
@@ -245,14 +398,10 @@ if ($UsePreSynth) {
|
|
|
245
398
|
Write-Host "[SYNTH] Using pre-synthesized audio..." -ForegroundColor Cyan
|
|
246
399
|
# If no post-processing needed, play the pre-synth file directly and exit
|
|
247
400
|
if (-not $NeedsPostProcess) {
|
|
248
|
-
$player = $null
|
|
249
401
|
try {
|
|
250
|
-
|
|
251
|
-
$player.PlaySync()
|
|
402
|
+
Invoke-SerializedPlay -WavPath $PreSynthWav
|
|
252
403
|
} catch {
|
|
253
404
|
Write-Host "[WARNING] Pre-synth playback failed: $_" -ForegroundColor Yellow
|
|
254
|
-
} finally {
|
|
255
|
-
if ($player) { $player.Dispose() }
|
|
256
405
|
}
|
|
257
406
|
Remove-Item env:AGENTVIBES_NO_PLAY -ErrorAction SilentlyContinue
|
|
258
407
|
exit 0
|
|
@@ -279,6 +428,25 @@ if ($UsePreSynth) {
|
|
|
279
428
|
Write-Host "$item"
|
|
280
429
|
}
|
|
281
430
|
}
|
|
431
|
+
# Parse the provider output for "[OK] Saved to: <path>" so we can
|
|
432
|
+
# use the EXACT file the provider just wrote. This replaces the
|
|
433
|
+
# old "pick most recent tts-XXXXXXXX.wav" heuristic which would
|
|
434
|
+
# silently replay stale audio whenever synthesis failed.
|
|
435
|
+
$FreshSynthFile = $null
|
|
436
|
+
foreach ($item in $providerOutput) {
|
|
437
|
+
$line = if ($item -is [System.Management.Automation.InformationRecord]) {
|
|
438
|
+
$m = $item.MessageData
|
|
439
|
+
if ($m -is [System.Management.Automation.HostInformationMessage]) { $m.Message } else { "$item" }
|
|
440
|
+
} else { "$item" }
|
|
441
|
+
if ($line -match '^\[OK\] Saved to:\s*(.+\.wav)\s*$') {
|
|
442
|
+
$FreshSynthFile = $Matches[1].Trim()
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
if (-not $FreshSynthFile -or -not (Test-Path $FreshSynthFile)) {
|
|
446
|
+
[Console]::Error.WriteLine("[AgentVibes] ERROR: Provider synthesis did not produce an output file. NOT falling back to stale audio. Check provider logs above.")
|
|
447
|
+
Remove-Item env:AGENTVIBES_NO_PLAY -ErrorAction SilentlyContinue
|
|
448
|
+
exit 3
|
|
449
|
+
}
|
|
282
450
|
} else {
|
|
283
451
|
if ($VoiceOverride) {
|
|
284
452
|
& $ProviderScript $Text $VoiceOverride
|
|
@@ -298,13 +466,17 @@ if ($UsePreSynth) {
|
|
|
298
466
|
if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
|
|
299
467
|
Remove-Item env:AGENTVIBES_NO_PLAY -ErrorAction SilentlyContinue
|
|
300
468
|
|
|
301
|
-
#
|
|
469
|
+
# Use the EXACT file the provider script just wrote (captured from its
|
|
470
|
+
# "[OK] Saved to: <path>" output line above). The old "pick most recent
|
|
471
|
+
# tts-XXXXXXXX.wav" heuristic silently replayed stale audio whenever
|
|
472
|
+
# synthesis failed — there is no safe way to guess which file is fresh.
|
|
302
473
|
$AudioDir = "$ClaudeDir\audio"
|
|
303
474
|
$RecentWav = if ($UsePreSynth) {
|
|
304
475
|
Get-Item $PreSynthWav -ErrorAction SilentlyContinue
|
|
476
|
+
} elseif ($FreshSynthFile -and (Test-Path $FreshSynthFile)) {
|
|
477
|
+
Get-Item $FreshSynthFile -ErrorAction SilentlyContinue
|
|
305
478
|
} else {
|
|
306
|
-
|
|
307
|
-
Sort-Object LastWriteTime -Descending | Select-Object -First 1
|
|
479
|
+
$null
|
|
308
480
|
}
|
|
309
481
|
|
|
310
482
|
if ($RecentWav -and $RecentWav.Length -gt 0) {
|
|
@@ -320,7 +492,10 @@ if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
|
|
|
320
492
|
default { "" }
|
|
321
493
|
}
|
|
322
494
|
if ($reverbFilter) {
|
|
323
|
-
|
|
495
|
+
# Use a fixed name OUTSIDE the `tts-XXXXXXXX` random-name
|
|
496
|
+
# namespace so the "pick most recent tts-*.wav" logic can't
|
|
497
|
+
# accidentally pick this post-processed file as a synth input.
|
|
498
|
+
$reverbedFile = "$AudioDir\av-reverbed-scratch.wav"
|
|
324
499
|
$reverbArgs = "-y -i `"$voicePath`" -af `"$reverbFilter`" `"$reverbedFile`""
|
|
325
500
|
$proc = Start-Process -FilePath "ffmpeg" -ArgumentList $reverbArgs -NoNewWindow -Wait -PassThru -RedirectStandardError "$env:TEMP\agentvibes-ffmpeg-stderr.txt"
|
|
326
501
|
if ($proc.ExitCode -eq 0 -and (Test-Path $reverbedFile)) {
|
|
@@ -340,7 +515,7 @@ if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
|
|
|
340
515
|
if (Test-Path $AudioEffectsCfg) {
|
|
341
516
|
# Try agent-specific config first, then fall back to default
|
|
342
517
|
# Format: AGENT_NAME|SOX_EFFECTS|BACKGROUND_FILE|BACKGROUND_VOLUME
|
|
343
|
-
# Lookup order: agent name
|
|
518
|
+
# Lookup order: agent name -> llm:<name> -> default
|
|
344
519
|
$agentName = $env:AGENTVIBES_AGENT_NAME
|
|
345
520
|
$configLine = $null
|
|
346
521
|
|
|
@@ -378,7 +553,7 @@ if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
|
|
|
378
553
|
if ($parts.Length -ge 3 -and $parts[2]) {
|
|
379
554
|
$trackName = $parts[2].Trim()
|
|
380
555
|
# Validate: filename only, no path separators or traversal
|
|
381
|
-
if ($trackName -match '^[a-zA-Z0-9_\-\.]+$') {
|
|
556
|
+
if ($trackName -match '^[a-zA-Z0-9_\-\. ]+$') {
|
|
382
557
|
$DefaultTrack = $trackName
|
|
383
558
|
}
|
|
384
559
|
}
|
|
@@ -403,7 +578,14 @@ if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
|
|
|
403
578
|
}
|
|
404
579
|
|
|
405
580
|
if (Test-Path $BgTrackPath) {
|
|
406
|
-
|
|
581
|
+
# Mixed output goes to a fixed name OUTSIDE the tts-XXXXXXXX
|
|
582
|
+
# random-name namespace so the "pick most recent tts-*.wav"
|
|
583
|
+
# logic can't accidentally pick this as a synth input in the
|
|
584
|
+
# next invocation. (Previously we'd name this as
|
|
585
|
+
# "$voicePath-mixed.wav" which generated files like
|
|
586
|
+
# tts-xxx.wav.effected-mixed.wav that kept re-matching and
|
|
587
|
+
# compounding on every run.)
|
|
588
|
+
$MixedFile = "$AudioDir\av-mixed-scratch.wav"
|
|
407
589
|
|
|
408
590
|
try {
|
|
409
591
|
# Get voice duration to calculate total length
|
|
@@ -426,64 +608,36 @@ if (($BgEnabled -or $HasReverb) -and $HasFfmpeg) {
|
|
|
426
608
|
$proc = Start-Process -FilePath "ffmpeg" -ArgumentList $ffmpegArgs -NoNewWindow -Wait -PassThru -RedirectStandardError "$env:TEMP\agentvibes-ffmpeg-stderr.txt"
|
|
427
609
|
|
|
428
610
|
if ($proc.ExitCode -eq 0 -and (Test-Path $MixedFile) -and (Get-Item $MixedFile).Length -gt 0) {
|
|
429
|
-
# Play the mixed audio
|
|
430
|
-
$player = $null
|
|
611
|
+
# Play the mixed audio (via serialized mutex)
|
|
431
612
|
try {
|
|
432
|
-
|
|
433
|
-
$player.PlaySync()
|
|
613
|
+
Invoke-SerializedPlay -WavPath $MixedFile
|
|
434
614
|
} catch {
|
|
435
615
|
Write-Host "[WARNING] Mixed playback failed, playing voice only" -ForegroundColor Yellow
|
|
436
|
-
|
|
437
|
-
try {
|
|
438
|
-
$player2 = New-Object System.Media.SoundPlayer $voicePath
|
|
439
|
-
$player2.PlaySync()
|
|
440
|
-
} finally {
|
|
441
|
-
if ($player2) { $player2.Dispose() }
|
|
442
|
-
}
|
|
443
|
-
} finally {
|
|
444
|
-
if ($player) { $player.Dispose() }
|
|
616
|
+
Invoke-SerializedPlay -WavPath $voicePath
|
|
445
617
|
}
|
|
446
618
|
} else {
|
|
447
619
|
# Mixing failed, play voice only
|
|
448
|
-
|
|
449
|
-
try {
|
|
450
|
-
$player = New-Object System.Media.SoundPlayer $voicePath
|
|
451
|
-
$player.PlaySync()
|
|
452
|
-
} finally {
|
|
453
|
-
if ($player) { $player.Dispose() }
|
|
454
|
-
}
|
|
620
|
+
Invoke-SerializedPlay -WavPath $voicePath
|
|
455
621
|
}
|
|
456
622
|
} catch {
|
|
457
623
|
# ffmpeg failed, play voice only
|
|
458
|
-
|
|
459
|
-
try {
|
|
460
|
-
$player = New-Object System.Media.SoundPlayer $voicePath
|
|
461
|
-
$player.PlaySync()
|
|
462
|
-
} finally {
|
|
463
|
-
if ($player) { $player.Dispose() }
|
|
464
|
-
}
|
|
624
|
+
Invoke-SerializedPlay -WavPath $voicePath
|
|
465
625
|
}
|
|
466
626
|
} else {
|
|
467
627
|
# No background track found, play voice only
|
|
468
|
-
|
|
469
|
-
try {
|
|
470
|
-
$player = New-Object System.Media.SoundPlayer $voicePath
|
|
471
|
-
$player.PlaySync()
|
|
472
|
-
} finally {
|
|
473
|
-
if ($player) { $player.Dispose() }
|
|
474
|
-
}
|
|
628
|
+
Invoke-SerializedPlay -WavPath $voicePath
|
|
475
629
|
}
|
|
476
630
|
} else {
|
|
477
631
|
# No background music, play the (possibly reverbed) voice
|
|
478
|
-
|
|
479
|
-
try {
|
|
480
|
-
$player = New-Object System.Media.SoundPlayer $voicePath
|
|
481
|
-
$player.PlaySync()
|
|
482
|
-
} finally {
|
|
483
|
-
if ($player) { $player.Dispose() }
|
|
484
|
-
}
|
|
632
|
+
Invoke-SerializedPlay -WavPath $voicePath
|
|
485
633
|
}
|
|
486
634
|
}
|
|
487
635
|
} else {
|
|
488
636
|
Remove-Item env:AGENTVIBES_NO_PLAY -ErrorAction SilentlyContinue
|
|
489
637
|
}
|
|
638
|
+
|
|
639
|
+
# Explicit exit 0 so that $LASTEXITCODE from native commands (piper.exe,
|
|
640
|
+
# ffmpeg, sox, etc.) doesn't leak through as the process exit code.
|
|
641
|
+
# Without this, bash/Claude Code sees whatever random exit code the last
|
|
642
|
+
# native command returned (e.g. 127) and treats it as a TTS failure.
|
|
643
|
+
exit 0
|
|
@@ -56,6 +56,7 @@ $VerbositySection = switch ($Verbosity) {
|
|
|
56
56
|
"low" { "## Verbosity: LOW`n- Acknowledgment: Action only`n- Completion: Result + errors only`n- Skip: Reasoning, decisions" }
|
|
57
57
|
"medium" { "## Verbosity: MEDIUM`n- Acknowledgment: Action + key approach`n- Completion: Result + important decisions`n- Include: Major choices only" }
|
|
58
58
|
"high" { "## Verbosity: HIGH`n- Acknowledgment: Action + approach + why`n- Completion: Result + decisions + trade-offs`n- Include: Full reasoning, alternatives" }
|
|
59
|
+
"caveman" { "## Verbosity: CAVEMAN`n- Respond terse. All technical substance stays. Only fluff dies.`n- Drop: articles (a/an/the), filler (just/really/basically), pleasantries, hedging`n- Abbreviate: DB/auth/config/req/res/fn/impl. Use arrows (X -> Y). Strip conjunctions.`n- Fragments OK. Short synonyms. Technical terms exact. Code unchanged.`n- Pattern: [thing] [action] [reason]. [next step].`n- TTS: Ultra-short. Max 60 chars." }
|
|
59
60
|
default { "## Verbosity: LOW`n- Acknowledgment: Action only`n- Completion: Result + errors only" }
|
|
60
61
|
}
|
|
61
62
|
|
|
@@ -86,7 +87,7 @@ $VerbositySection
|
|
|
86
87
|
4. Keep under 150 chars
|
|
87
88
|
5. Always include errors
|
|
88
89
|
|
|
89
|
-
Quick Ref: low=action+result | medium=+key decisions | high=+full reasoning
|
|
90
|
+
Quick Ref: low=action+result | medium=+key decisions | high=+full reasoning | caveman=ultra-terse fragments
|
|
90
91
|
|
|
91
92
|
## BMAD Agent Voice Routing
|
|
92
93
|
If ``.bmad-agent-context`` exists, check its content:
|