verbalcoding 0.2.12 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +74 -4
- package/README.es.md +3 -1
- package/README.fr.md +3 -1
- package/README.ja.md +3 -1
- package/README.ko.md +4 -2
- package/README.md +4 -2
- package/README.ru.md +3 -1
- package/README.zh.md +3 -1
- package/app-node/agent_adapters.test.mjs +14 -0
- package/app-node/agent_routing.mjs +148 -0
- package/app-node/agent_routing.test.mjs +138 -0
- package/app-node/agent_turn.mjs +86 -0
- package/app-node/agent_turn.test.mjs +109 -0
- package/app-node/bridge_context.mjs +73 -0
- package/app-node/bridge_context.test.mjs +54 -0
- package/app-node/bridge_state.mjs +4 -0
- package/app-node/bridge_wireup.test.mjs +462 -0
- package/app-node/cli_install.test.mjs +31 -0
- package/app-node/cross_agent_routing.test.mjs +78 -0
- package/app-node/discord_command_router.mjs +204 -0
- package/app-node/discord_command_router.test.mjs +311 -0
- package/app-node/discord_voice_setup.mjs +251 -0
- package/app-node/discord_voice_setup.test.mjs +86 -0
- package/app-node/hermes_profiles.test.mjs +12 -1
- package/app-node/install_config.mjs +110 -3
- package/app-node/install_config.test.mjs +8 -0
- package/app-node/instance_doctor.test.mjs +9 -0
- package/app-node/instances.test.mjs +8 -1
- package/app-node/main.mjs +488 -1368
- package/app-node/mcp_tools.test.mjs +7 -0
- package/app-node/notification_handler.mjs +89 -0
- package/app-node/notification_handler.test.mjs +187 -0
- package/app-node/plan_dispatcher.mjs +215 -0
- package/app-node/plan_dispatcher.test.mjs +101 -0
- package/app-node/plan_mode.mjs +36 -7
- package/app-node/plan_mode.test.mjs +78 -0
- package/app-node/progress_handler.mjs +220 -0
- package/app-node/progress_handler.test.mjs +193 -0
- package/app-node/progress_speech.mjs +54 -32
- package/app-node/progress_speech.test.mjs +12 -3
- package/app-node/project_sessions.mjs +5 -2
- package/app-node/project_sessions.test.mjs +7 -0
- package/app-node/research_mode.mjs +282 -0
- package/app-node/research_mode.test.mjs +264 -0
- package/app-node/restart_notice.mjs +3 -0
- package/app-node/restart_notice.test.mjs +11 -0
- package/app-node/session_ontology.mjs +271 -0
- package/app-node/session_ontology.test.mjs +130 -0
- package/app-node/smart_progress.mjs +1 -1
- package/app-node/stream_sentencer.mjs +32 -2
- package/app-node/stream_sentencer.test.mjs +65 -0
- package/app-node/streaming_tts_queue.mjs +5 -1
- package/app-node/streaming_tts_queue.test.mjs +7 -1
- package/app-node/stt_whisper.mjs +24 -0
- package/app-node/stt_whisper.test.mjs +32 -0
- package/app-node/text_routing.mjs +4 -2
- package/app-node/tts_backends.mjs +537 -3
- package/app-node/tts_backends.test.mjs +454 -0
- package/app-node/tts_player.mjs +164 -0
- package/app-node/tts_player.test.mjs +202 -0
- package/app-node/tts_runtime.mjs +134 -0
- package/app-node/tts_runtime.test.mjs +89 -0
- package/app-node/tts_settings.mjs +150 -3
- package/app-node/tts_settings.test.mjs +204 -0
- package/app-node/tts_voice_config.mjs +136 -2
- package/app-node/tts_voice_config.test.mjs +94 -0
- package/app-node/utterance_router.mjs +216 -0
- package/app-node/utterance_router.test.mjs +236 -0
- package/app-node/voice_autojoin.mjs +37 -0
- package/app-node/voice_autojoin.test.mjs +59 -0
- package/app-node/voice_io.mjs +272 -0
- package/app-node/voice_io.test.mjs +102 -0
- package/app-node/voice_turn_runner.mjs +449 -0
- package/app-node/voice_turn_runner.test.mjs +289 -0
- package/docs/CONFIGURATION.md +12 -2
- package/docs/HARNESSES.md +58 -0
- package/docs/HARNESS_AIDER.md +50 -0
- package/docs/HARNESS_CLAUDE.md +56 -0
- package/docs/HARNESS_CODEX.md +56 -0
- package/docs/HARNESS_CURSOR.md +45 -0
- package/docs/HARNESS_GEMINI.md +45 -0
- package/docs/HARNESS_HERMES.md +57 -0
- package/docs/HARNESS_OPENCLAW.md +44 -0
- package/docs/HARNESS_OPENCODE.md +44 -0
- package/docs/README.md +1 -0
- package/docs/ROADMAP.md +20 -5
- package/docs/TTS_BACKENDS.md +227 -0
- package/docs/USAGE.md +22 -0
- package/docs/i18n/AGENTS.es.md +34 -0
- package/docs/i18n/AGENTS.fr.md +34 -0
- package/docs/i18n/AGENTS.ja.md +34 -0
- package/docs/i18n/AGENTS.ko.md +34 -0
- package/docs/i18n/AGENTS.ru.md +34 -0
- package/docs/i18n/AGENTS.zh.md +34 -0
- package/docs/i18n/HARNESSES.es.md +58 -0
- package/docs/i18n/HARNESSES.fr.md +58 -0
- package/docs/i18n/HARNESSES.ja.md +58 -0
- package/docs/i18n/HARNESSES.ko.md +58 -0
- package/docs/i18n/HARNESSES.ru.md +58 -0
- package/docs/i18n/HARNESSES.zh.md +58 -0
- package/docs/i18n/HARNESS_AIDER.es.md +48 -0
- package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
- package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
- package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
- package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
- package/docs/i18n/HARNESS_CODEX.es.md +55 -0
- package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
- package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
- package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
- package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
- package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
- package/docs/i18n/HARNESS_HERMES.es.md +54 -0
- package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
- package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
- package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
- package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
- package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
- package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
- package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
- package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
- package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
- package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
- package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
- package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
- package/integrations/fireredtts2/mlx_llm.py +183 -0
- package/integrations/fireredtts2/synth.py +156 -0
- package/integrations/fireredtts2/synth_mlx.py +196 -0
- package/integrations/mlxaudio/synth.py +74 -0
- package/integrations/neuttsair/synth.py +104 -0
- package/integrations/omnivoice/synth.py +110 -0
- package/package.json +6 -1
- package/scripts/cli.mjs +84 -0
- package/scripts/doctor.mjs +104 -4
- package/scripts/install.mjs +5 -1
- package/scripts/install_fireredtts2.sh +109 -0
- package/scripts/install_mlxaudio.sh +34 -0
- package/scripts/install_mossttsnano.sh +46 -0
- package/scripts/postinstall.mjs +34 -0
|
@@ -0,0 +1,449 @@
|
|
|
1
|
+
// Per-turn agent execution: takes a Discord voice utterance from voice_io,
|
|
2
|
+
// runs it through dispatch (wake/language/voice/clone/sensitivity/verbose/
|
|
3
|
+
// research/cross-agent/plan-mode), invokes the selected agent adapter, drives
|
|
4
|
+
// the progress idle loop while waiting, then sends + speaks the answer.
|
|
5
|
+
//
|
|
6
|
+
// Phase 7a extraction from utterance_router.mjs. handleRecording was the
|
|
7
|
+
// single biggest function in the codebase (~365 LOC) and was the reason
|
|
8
|
+
// utterance_router had a 90-key dep list. Pulling it into its own module
|
|
9
|
+
// lets utterance_router go back to being plain command dispatch + adapter
|
|
10
|
+
// selection.
|
|
11
|
+
//
|
|
12
|
+
// Voice IO calls handleRecording via main.mjs's forward-declared
|
|
13
|
+
// `voiceTurnRunner` and a thunk:
|
|
14
|
+
// handleRecording: (...args) => voiceTurnRunner.handleRecording(...args)
|
|
15
|
+
// At runtime the thunk resolves through the closure after createVoiceTurnRunner
|
|
16
|
+
// has been constructed.
|
|
17
|
+
|
|
18
|
+
export function createVoiceTurnRunner(deps) {
|
|
19
|
+
const {
|
|
20
|
+
bridge,
|
|
21
|
+
agentTurnLifecycle,
|
|
22
|
+
settings,
|
|
23
|
+
client,
|
|
24
|
+
log,
|
|
25
|
+
warn,
|
|
26
|
+
fs,
|
|
27
|
+
transcribe,
|
|
28
|
+
beginStreamingTurn,
|
|
29
|
+
endStreamingTurn,
|
|
30
|
+
speakText,
|
|
31
|
+
queueProgressSpeechText,
|
|
32
|
+
stopProgressSpeech,
|
|
33
|
+
speakImmediateNotice,
|
|
34
|
+
maybeNotifyTaskComplete,
|
|
35
|
+
handleLanguageCommand,
|
|
36
|
+
handleTtsVoiceCommand,
|
|
37
|
+
handleVoiceCloneCommand,
|
|
38
|
+
dispatchPlanModeUtterance,
|
|
39
|
+
adapterForBackend,
|
|
40
|
+
adapterForProjectSession,
|
|
41
|
+
planChannelKey,
|
|
42
|
+
routingStateFor,
|
|
43
|
+
recordUtterance,
|
|
44
|
+
clearTransientRouting,
|
|
45
|
+
isAllowed,
|
|
46
|
+
isAbortError,
|
|
47
|
+
sleep,
|
|
48
|
+
sendText,
|
|
49
|
+
sendEmbed,
|
|
50
|
+
reloadRuntimeLanguageFromEnv,
|
|
51
|
+
drainDeferredProcessingUtterances,
|
|
52
|
+
resolveProjectSessionForChannel,
|
|
53
|
+
projectSessionContextText,
|
|
54
|
+
ontologyStateFor,
|
|
55
|
+
captureOntologyFromTurn,
|
|
56
|
+
formatRecentDiscordContext,
|
|
57
|
+
formatSttResultMessage,
|
|
58
|
+
formatSttStartMessage,
|
|
59
|
+
formatVoiceErrorMessage,
|
|
60
|
+
formatWakeRejectedMessage,
|
|
61
|
+
agentAnswerHeader,
|
|
62
|
+
emptyAgentAnswer,
|
|
63
|
+
spokenResultOnly,
|
|
64
|
+
stripWake,
|
|
65
|
+
acceptsWake,
|
|
66
|
+
sensitivityChangedSpeech,
|
|
67
|
+
sensitivityModeFromTranscript,
|
|
68
|
+
sensitivityStatusText,
|
|
69
|
+
setSensitivityMode,
|
|
70
|
+
isSensitivityOnlyRequest,
|
|
71
|
+
verboseChangedSpeech,
|
|
72
|
+
verboseModeFromTranscript,
|
|
73
|
+
verboseStatusText,
|
|
74
|
+
setVerboseProgress,
|
|
75
|
+
isVerboseOnlyRequest,
|
|
76
|
+
isRoutingOnlyUtterance,
|
|
77
|
+
parseAgentRoutingCommand,
|
|
78
|
+
renderAgentPrefix,
|
|
79
|
+
buildCrossAgentPrompt,
|
|
80
|
+
buildFallbackDecision,
|
|
81
|
+
parseDecisionAnswer,
|
|
82
|
+
parseResearchCommand,
|
|
83
|
+
runResearchTurn,
|
|
84
|
+
PROGRESS_IDLE_CHECK_MS,
|
|
85
|
+
PROGRESS_IDLE_NOTICE_INITIAL_MS,
|
|
86
|
+
PROGRESS_IDLE_NOTICE_LIMIT,
|
|
87
|
+
PROGRESS_IDLE_NOTICE_MAX_MS,
|
|
88
|
+
PROGRESS_IDLE_NOTICE_MULTIPLIER,
|
|
89
|
+
STT_START_VOICE_NOTICE,
|
|
90
|
+
} = deps;
|
|
91
|
+
|
|
92
|
+
async function handleRecording(userId, wavPath, pcmBytes, segments = 1, metricsTurn = null) {
|
|
93
|
+
if (bridge.processing) { log('drop while processing', userId); metricsTurn?.finish({ status: 'drop_processing' }); return; }
|
|
94
|
+
if (!isAllowed(userId)) { warn('ignore unauthorized', userId); metricsTurn?.finish({ status: 'unauthorized' }); return; }
|
|
95
|
+
const turn = agentTurnLifecycle.start({ withTurnId: true });
|
|
96
|
+
const { controller, signal, turnId } = turn;
|
|
97
|
+
const sessionForVoice = resolveProjectSessionForChannel(bridge.activeVoiceChannelId || settings.transcriptChannelId);
|
|
98
|
+
bridge.activeTranscriptChannelId = sessionForVoice?.transcriptChannelId || settings.transcriptChannelId;
|
|
99
|
+
try {
|
|
100
|
+
const runtimeLanguage = reloadRuntimeLanguageFromEnv();
|
|
101
|
+
if (runtimeLanguage.changed) {
|
|
102
|
+
log('drop current utterance because language changed before STT', userId, 'turn', turnId, 'language', runtimeLanguage.voiceLanguage);
|
|
103
|
+
fs.rm(wavPath, { force: true }, () => {});
|
|
104
|
+
metricsTurn?.finish({ status: 'drop_stale_language_change' });
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
const session = resolveProjectSessionForChannel(bridge.activeVoiceChannelId || settings.transcriptChannelId);
|
|
108
|
+
bridge.activeTranscriptChannelId = session?.transcriptChannelId || settings.transcriptChannelId;
|
|
109
|
+
log('voice turn text target', session ? `project=${session.slug}` : 'project=default', 'channel', bridge.activeTranscriptChannelId ? 'project-or-default' : 'none');
|
|
110
|
+
log('transcribing', userId, wavPath, 'pcmBytes', pcmBytes, 'segments', segments, 'turn', turnId);
|
|
111
|
+
const sttNotice = formatSttStartMessage(settings.voiceLanguage);
|
|
112
|
+
await sendText(sttNotice);
|
|
113
|
+
const sttNoticeSpeech = STT_START_VOICE_NOTICE
|
|
114
|
+
? speakImmediateNotice(sttNotice.replace(/^🎧\s*/u, ''), signal, 'stt-start')
|
|
115
|
+
: Promise.resolve();
|
|
116
|
+
const sttStart = Date.now();
|
|
117
|
+
const text = await transcribe(wavPath);
|
|
118
|
+
await sttNoticeSpeech;
|
|
119
|
+
metricsTurn?.stage('stt', Date.now() - sttStart, { transcriptChars: String(text || '').length });
|
|
120
|
+
if (bridge.interruptedTurns.has(turnId) || signal.aborted) { metricsTurn?.finish({ status: 'aborted_after_stt' }); return; }
|
|
121
|
+
if (!text) { log('empty transcript', userId, wavPath); metricsTurn?.finish({ status: 'empty_transcript' }); return; }
|
|
122
|
+
log(`user ${userId} said: ${text}`);
|
|
123
|
+
await sendText(formatSttResultMessage(settings.voiceLanguage, userId, text));
|
|
124
|
+
if (!acceptsWake(text)) { await sendText(formatWakeRejectedMessage(settings.voiceLanguage)); metricsTurn?.finish({ status: 'wake_rejected' }); return; }
|
|
125
|
+
|
|
126
|
+
let prompt = stripWake(text);
|
|
127
|
+
if (await handleLanguageCommand(prompt, signal)) {
|
|
128
|
+
metricsTurn?.finish({ status: 'language_command' });
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
if (await handleTtsVoiceCommand(prompt, signal)) {
|
|
132
|
+
metricsTurn?.finish({ status: 'voice_command' });
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
if (await handleVoiceCloneCommand(userId, prompt, signal)) {
|
|
136
|
+
metricsTurn?.finish({ status: 'voice_clone_command' });
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
139
|
+
const sensitivityRequest = sensitivityModeFromTranscript(prompt);
|
|
140
|
+
if (sensitivityRequest) {
|
|
141
|
+
const thresholds = setSensitivityMode(sensitivityRequest.mode, sensitivityRequest.reason);
|
|
142
|
+
await sendText(`🎚️ ${sensitivityStatusText()}`);
|
|
143
|
+
if (isSensitivityOnlyRequest(prompt)) {
|
|
144
|
+
await speakText(sensitivityChangedSpeech(thresholds.mode, settings.voiceLanguage), signal, metricsTurn);
|
|
145
|
+
metricsTurn?.finish({ status: 'sensitivity_only' });
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
const verboseRequest = verboseModeFromTranscript(prompt);
|
|
150
|
+
if (verboseRequest !== null) {
|
|
151
|
+
setVerboseProgress(verboseRequest, 'voice-command');
|
|
152
|
+
await sendText(`🔎 ${verboseStatusText()}`);
|
|
153
|
+
if (isVerboseOnlyRequest(prompt)) {
|
|
154
|
+
await speakText(verboseChangedSpeech(verboseRequest, settings.voiceLanguage), signal, metricsTurn);
|
|
155
|
+
metricsTurn?.finish({ status: 'verbose_only' });
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
const routingKey = planChannelKey();
|
|
160
|
+
const routingState = routingStateFor(routingKey);
|
|
161
|
+
if (routingState.pendingFallbackPrompt) {
|
|
162
|
+
const decision = buildFallbackDecision(
|
|
163
|
+
routingState.pendingFallbackPrompt.requestedBackend || 'agent',
|
|
164
|
+
settings.agent.label,
|
|
165
|
+
settings.voiceLanguage,
|
|
166
|
+
);
|
|
167
|
+
const fallbackAnswer = parseDecisionAnswer(prompt, decision, settings.voiceLanguage);
|
|
168
|
+
if (fallbackAnswer.type === 'unknown') {
|
|
169
|
+
const msg = /^en/i.test(String(settings.voiceLanguage || ''))
|
|
170
|
+
? 'Please answer yes or no.'
|
|
171
|
+
: '예 또는 아니오로 대답해줘.';
|
|
172
|
+
await sendText(`⚠️ ${msg}`);
|
|
173
|
+
await speakText(msg, signal, null);
|
|
174
|
+
metricsTurn?.finish({ status: 'fallback_pending' });
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
177
|
+
const accepted = fallbackAnswer.type === 'auto' || fallbackAnswer.choice === 'yes';
|
|
178
|
+
const previous = routingState.pendingFallbackPrompt;
|
|
179
|
+
routingState.pendingFallbackPrompt = null;
|
|
180
|
+
if (!accepted) {
|
|
181
|
+
const msg = /^en/i.test(String(settings.voiceLanguage || '')) ? 'Cancelled.' : '취소했어.';
|
|
182
|
+
await sendText(`❎ ${msg}`);
|
|
183
|
+
await speakText(msg, signal, null);
|
|
184
|
+
metricsTurn?.finish({ status: 'fallback_declined' });
|
|
185
|
+
return;
|
|
186
|
+
}
|
|
187
|
+
routingState.activeRouting = { backend: settings.agent.backend, sticky: false };
|
|
188
|
+
prompt = previous.originalPrompt;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const researchCmd = parseResearchCommand(prompt, settings.voiceLanguage);
|
|
192
|
+
if (researchCmd.type === 'research') {
|
|
193
|
+
const preemptiveRouting = parseAgentRoutingCommand(prompt, settings.voiceLanguage);
|
|
194
|
+
let researchBackend = routingState.activeRouting.backend;
|
|
195
|
+
if (preemptiveRouting.type === 'route') {
|
|
196
|
+
const routedCandidate = adapterForBackend(preemptiveRouting.backend, session);
|
|
197
|
+
if (routedCandidate) {
|
|
198
|
+
researchBackend = preemptiveRouting.backend;
|
|
199
|
+
if (preemptiveRouting.sticky) routingState.activeRouting = { backend: preemptiveRouting.backend, sticky: true };
|
|
200
|
+
} else {
|
|
201
|
+
const en = /^en/i.test(String(settings.voiceLanguage || ''));
|
|
202
|
+
const msg = en
|
|
203
|
+
? `${preemptiveRouting.backend} is not installed. Want me to research with ${settings.agent.label} instead?`
|
|
204
|
+
: `${preemptiveRouting.backend}이(가) 설치되어 있지 않아. ${settings.agent.label}로 리서치할까?`;
|
|
205
|
+
await sendText(`⚠️ ${msg}`);
|
|
206
|
+
await speakText(msg, signal, null);
|
|
207
|
+
routingState.pendingFallbackPrompt = {
|
|
208
|
+
requestedBackend: preemptiveRouting.backend,
|
|
209
|
+
originalPrompt: `research ${researchCmd.query}`,
|
|
210
|
+
};
|
|
211
|
+
metricsTurn?.finish({ status: 'research_routing_fallback_pending' });
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
const en = /^en/i.test(String(settings.voiceLanguage || ''));
|
|
216
|
+
const startMsg = en ? `Researching ${researchCmd.query}.` : `${researchCmd.query} 리서치할게.`;
|
|
217
|
+
await sendText(`🔎 ${startMsg}`);
|
|
218
|
+
await speakText(startMsg, signal, null);
|
|
219
|
+
const adapter = adapterForBackend(researchBackend, session) || adapterForProjectSession(session);
|
|
220
|
+
const synthesize = async (synthPrompt, opts = {}) => {
|
|
221
|
+
const out = await adapter.ask(synthPrompt, signal, {
|
|
222
|
+
task: Boolean(opts.task),
|
|
223
|
+
label: adapter.label,
|
|
224
|
+
language: settings.voiceLanguage,
|
|
225
|
+
});
|
|
226
|
+
return String(out || '');
|
|
227
|
+
};
|
|
228
|
+
const result = await runResearchTurn({ query: researchCmd.query, language: settings.voiceLanguage, synthesize, signal })
|
|
229
|
+
.catch(e => ({ status: 'error', error: e?.message || String(e), query: researchCmd.query }));
|
|
230
|
+
if (result.status === 'ok') {
|
|
231
|
+
const sentEmbed = await sendEmbed(result.embed);
|
|
232
|
+
if (!sentEmbed) await sendText(result.markdown);
|
|
233
|
+
await speakText(result.speech, signal, null);
|
|
234
|
+
captureOntologyFromTurn(routingKey, { prompt, answer: result.bullets.join('\n'), backend: 'research' });
|
|
235
|
+
} else if (result.status === 'empty') {
|
|
236
|
+
await sendText(result.markdown);
|
|
237
|
+
await speakText(result.speech, signal, null);
|
|
238
|
+
} else if (result.status === 'no_backend') {
|
|
239
|
+
const msg = en
|
|
240
|
+
? 'No search backend is configured. Set TAVILY_API_KEY, BRAVE_SEARCH_API_KEY, SEARXNG_URL, or SEARCH_BACKEND_AGENT_FALLBACK=1 to delegate research to the active agent.'
|
|
241
|
+
: '검색 백엔드가 설정돼 있지 않아. TAVILY_API_KEY, BRAVE_SEARCH_API_KEY, SEARXNG_URL 중 하나를 설정하거나 SEARCH_BACKEND_AGENT_FALLBACK=1로 활성 에이전트에게 위임할 수 있어.';
|
|
242
|
+
await sendText(`⚠️ ${msg}`);
|
|
243
|
+
await speakText(msg, signal, null);
|
|
244
|
+
} else {
|
|
245
|
+
const msg = en ? `Research failed: ${result.error || result.status}` : `리서치 실패: ${result.error || result.status}`;
|
|
246
|
+
await sendText(`⚠️ ${msg}`);
|
|
247
|
+
await speakText(en ? 'Research failed.' : '리서치 실패.', signal, null);
|
|
248
|
+
}
|
|
249
|
+
if (preemptiveRouting.type === 'route' && !preemptiveRouting.sticky && researchBackend !== settings.agent.backend) {
|
|
250
|
+
routingState.activeRouting = { backend: settings.agent.backend, sticky: false };
|
|
251
|
+
}
|
|
252
|
+
metricsTurn?.finish({ status: `research_${result.status}` });
|
|
253
|
+
return;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const routing = parseAgentRoutingCommand(prompt, settings.voiceLanguage);
|
|
257
|
+
if (routing.type === 'restore') {
|
|
258
|
+
routingState.activeRouting = { backend: settings.agent.backend, sticky: false };
|
|
259
|
+
const msg = /^en/i.test(String(settings.voiceLanguage || ''))
|
|
260
|
+
? `Back to the default agent (${settings.agent.label}).`
|
|
261
|
+
: `기본 에이전트로 돌아갈게 (${settings.agent.label}).`;
|
|
262
|
+
await sendText(`↩ ${msg}`);
|
|
263
|
+
await speakText(msg, signal, null);
|
|
264
|
+
metricsTurn?.finish({ status: 'routing_restore' });
|
|
265
|
+
return;
|
|
266
|
+
}
|
|
267
|
+
if (routing.type === 'route') {
|
|
268
|
+
const candidate = adapterForBackend(routing.backend, session);
|
|
269
|
+
if (!candidate) {
|
|
270
|
+
const msg = /^en/i.test(String(settings.voiceLanguage || ''))
|
|
271
|
+
? `${routing.backend} is not installed. Want me to use ${settings.agent.label} instead?`
|
|
272
|
+
: `${routing.backend}이(가) 설치되어 있지 않아. ${settings.agent.label}로 대신 진행할까?`;
|
|
273
|
+
await sendText(`⚠️ ${msg}`);
|
|
274
|
+
await speakText(msg, signal, null);
|
|
275
|
+
routingState.pendingFallbackPrompt = { requestedBackend: routing.backend, originalPrompt: prompt };
|
|
276
|
+
metricsTurn?.finish({ status: 'routing_fallback_pending' });
|
|
277
|
+
return;
|
|
278
|
+
}
|
|
279
|
+
routingState.activeRouting = { backend: routing.backend, sticky: routing.sticky };
|
|
280
|
+
if (isRoutingOnlyUtterance(prompt)) {
|
|
281
|
+
const en = /^en/i.test(String(settings.voiceLanguage || ''));
|
|
282
|
+
const label = candidate.label || routing.backend;
|
|
283
|
+
const msg = routing.sticky
|
|
284
|
+
? (en ? `Switched to ${label}.` : `${label}로 전환했어.`)
|
|
285
|
+
: (en ? `Asking ${label} this turn.` : `이번 턴은 ${label}로 진행할게.`);
|
|
286
|
+
await sendText(`↪ ${msg}`);
|
|
287
|
+
await speakText(msg, signal, null);
|
|
288
|
+
metricsTurn?.finish({ status: 'routing_only' });
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
recordUtterance(routingKey, prompt);
|
|
293
|
+
|
|
294
|
+
let promptForAgent = prompt;
|
|
295
|
+
try {
|
|
296
|
+
const planOutcome = await dispatchPlanModeUtterance(prompt, signal);
|
|
297
|
+
if (planOutcome.handled) {
|
|
298
|
+
metricsTurn?.finish({ status: 'plan_mode' });
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
301
|
+
if (planOutcome.prompt) promptForAgent = planOutcome.prompt;
|
|
302
|
+
} catch (e) {
|
|
303
|
+
warn('plan mode dispatch failed', e?.stack || e);
|
|
304
|
+
}
|
|
305
|
+
const routedBackend = routingState.activeRouting.backend;
|
|
306
|
+
const selectedAgentAdapter = adapterForBackend(routedBackend, session) || adapterForProjectSession(session);
|
|
307
|
+
const isHandoff = routingState.lastUsedBackend !== routedBackend;
|
|
308
|
+
const ttsPrefix = isHandoff ? renderAgentPrefix(routedBackend, settings.voiceLanguage) : '';
|
|
309
|
+
if (isHandoff) {
|
|
310
|
+
const ontologyStore = ontologyStateFor(routingKey);
|
|
311
|
+
const ontologyBlock = ontologyStore.nodeCount > 0
|
|
312
|
+
? ontologyStore.serializeForHandoff({ language: settings.voiceLanguage })
|
|
313
|
+
: '';
|
|
314
|
+
promptForAgent = buildCrossAgentPrompt({
|
|
315
|
+
prompt: promptForAgent,
|
|
316
|
+
fromBackend: routingState.lastUsedBackend,
|
|
317
|
+
toBackend: routedBackend,
|
|
318
|
+
resolvedDecisions: routingState.lastResolvedDecisions || {},
|
|
319
|
+
priorUtterances: routingState.recentUtterances.slice(0, -1),
|
|
320
|
+
language: settings.voiceLanguage,
|
|
321
|
+
});
|
|
322
|
+
if (ontologyBlock) {
|
|
323
|
+
const header = /^en/i.test(String(settings.voiceLanguage || '')) ? '\n\n[Session ontology]\n' : '\n\n[세션 온톨로지]\n';
|
|
324
|
+
promptForAgent = `${promptForAgent}${header}${ontologyBlock}`;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
routingState.lastUsedBackend = routedBackend;
|
|
328
|
+
if (!routingState.activeRouting.sticky) routingState.activeRouting = { backend: settings.agent.backend, sticky: false };
|
|
329
|
+
const projectContext = projectSessionContextText(session);
|
|
330
|
+
const recentDiscordContext = formatRecentDiscordContext(bridge.recentDiscordTextByChannel, {
|
|
331
|
+
channelId: bridge.activeTranscriptChannelId,
|
|
332
|
+
});
|
|
333
|
+
const plan = {
|
|
334
|
+
task: true,
|
|
335
|
+
label: selectedAgentAdapter.label,
|
|
336
|
+
verboseProgress: bridge.verboseProgress,
|
|
337
|
+
language: settings.voiceLanguage,
|
|
338
|
+
cwd: session?.workdir,
|
|
339
|
+
projectContext,
|
|
340
|
+
recentDiscordContext,
|
|
341
|
+
};
|
|
342
|
+
log('Agent plan', plan.label, 'backend', selectedAgentAdapter.backend, 'task', plan.task, 'language', plan.language, session ? `project=${session.slug}` : 'project=default');
|
|
343
|
+
const agentStart = Date.now();
|
|
344
|
+
// agentTurnLifecycle.start() already seeded bridge.activeProgressAbortController
|
|
345
|
+
// and bridge.activeProgressSignal at the top of the turn. Reuse the lifecycle's
|
|
346
|
+
// progressController so cleanup ownership stays consistent.
|
|
347
|
+
const progressController = turn.progressController;
|
|
348
|
+
bridge.activeProgressLastEventAt = Date.now();
|
|
349
|
+
const streamingTurnActive = beginStreamingTurn(signal);
|
|
350
|
+
if (streamingTurnActive && ttsPrefix && bridge.activeStreamingQueue) {
|
|
351
|
+
bridge.activeStreamingQueue.enqueue(ttsPrefix.replace(/[:\s]+$/u, '.'));
|
|
352
|
+
}
|
|
353
|
+
const agentPromise = selectedAgentAdapter.ask(promptForAgent, signal, plan);
|
|
354
|
+
let done = false;
|
|
355
|
+
// Status announcements share one queue with verbose progress so they never
|
|
356
|
+
// talk over each other. In verbose mode, skip the generic initial prompt;
|
|
357
|
+
// the detailed tool/file/test events are the initial progress voice.
|
|
358
|
+
const progressLoop = (async () => {
|
|
359
|
+
if (!bridge.verboseProgress) {
|
|
360
|
+
await sleep(2500);
|
|
361
|
+
if (!done && !signal.aborted && !bridge.interruptedTurns.has(turnId)) {
|
|
362
|
+
const initial = /^en/i.test(String(settings.voiceLanguage || ''))
|
|
363
|
+
? 'calling the agent.'
|
|
364
|
+
: '에이전트 호출했어. 응답 기다리는 중.';
|
|
365
|
+
queueProgressSpeechText(initial, progressController.signal, 'generic-initial');
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
let idleNotices = 0;
|
|
369
|
+
let nextIdleNoticeMs = PROGRESS_IDLE_NOTICE_INITIAL_MS;
|
|
370
|
+
let lastObservedProgressAt = bridge.activeProgressLastEventAt;
|
|
371
|
+
while (!done && !signal.aborted && !bridge.interruptedTurns.has(turnId) && idleNotices < PROGRESS_IDLE_NOTICE_LIMIT) {
|
|
372
|
+
await sleep(Math.min(PROGRESS_IDLE_CHECK_MS, nextIdleNoticeMs));
|
|
373
|
+
if (done || signal.aborted || bridge.interruptedTurns.has(turnId)) break;
|
|
374
|
+
if (bridge.activeProgressLastEventAt !== lastObservedProgressAt) {
|
|
375
|
+
lastObservedProgressAt = bridge.activeProgressLastEventAt;
|
|
376
|
+
nextIdleNoticeMs = PROGRESS_IDLE_NOTICE_INITIAL_MS;
|
|
377
|
+
continue;
|
|
378
|
+
}
|
|
379
|
+
const idleMs = Date.now() - bridge.activeProgressLastEventAt;
|
|
380
|
+
if (idleMs < nextIdleNoticeMs) continue;
|
|
381
|
+
idleNotices += 1;
|
|
382
|
+
bridge.activeProgressLastEventAt = Date.now();
|
|
383
|
+
lastObservedProgressAt = bridge.activeProgressLastEventAt;
|
|
384
|
+
const idle = /^en/i.test(String(settings.voiceLanguage || ''))
|
|
385
|
+
? 'still working on that.'
|
|
386
|
+
: '아직 작업 중이야.';
|
|
387
|
+
queueProgressSpeechText(idle, progressController.signal, `idle-${idleNotices}-${Math.round(nextIdleNoticeMs / 1000)}s`);
|
|
388
|
+
nextIdleNoticeMs = Math.min(
|
|
389
|
+
PROGRESS_IDLE_NOTICE_MAX_MS,
|
|
390
|
+
Math.max(nextIdleNoticeMs + 1000, Math.round(nextIdleNoticeMs * PROGRESS_IDLE_NOTICE_MULTIPLIER)),
|
|
391
|
+
);
|
|
392
|
+
}
|
|
393
|
+
})().catch(e => {
|
|
394
|
+
if (!isAbortError(e)) warn('progress loop failed', e?.stack || e);
|
|
395
|
+
});
|
|
396
|
+
const answer = await agentPromise.finally(() => { done = true; });
|
|
397
|
+
if (streamingTurnActive) await endStreamingTurn();
|
|
398
|
+
metricsTurn?.stage('agent', Date.now() - agentStart, { answerChars: String(answer || '').length, backend: selectedAgentAdapter.backend });
|
|
399
|
+
void progressLoop;
|
|
400
|
+
if (bridge.interruptedTurns.has(turnId) || signal.aborted) { metricsTurn?.finish({ status: 'aborted_after_agent' }); return; }
|
|
401
|
+
|
|
402
|
+
log('Agent answer', selectedAgentAdapter.label, answer.slice(0, 200));
|
|
403
|
+
captureOntologyFromTurn(routingKey, { prompt, answer, backend: routedBackend });
|
|
404
|
+
const spokenAnswerCore = spokenResultOnly(prompt, answer, settings.voiceLanguage);
|
|
405
|
+
const spokenAnswer = ttsPrefix ? `${ttsPrefix}${spokenAnswerCore}` : spokenAnswerCore;
|
|
406
|
+
const fullAnswerText = `${agentAnswerHeader(settings.voiceLanguage, selectedAgentAdapter.label)}\n${answer || emptyAgentAnswer(settings.voiceLanguage)}`;
|
|
407
|
+
log('send agent answer text', 'chars', fullAnswerText.length);
|
|
408
|
+
const answerTextDelivered = await sendText(fullAnswerText);
|
|
409
|
+
if (!answerTextDelivered) {
|
|
410
|
+
warn('agent answer text delivery failed; still speaking answer');
|
|
411
|
+
}
|
|
412
|
+
log('spoken answer', spokenAnswer.slice(0, 200));
|
|
413
|
+
stopProgressSpeech(progressController.signal, 'agent-answer-ready');
|
|
414
|
+
if (streamingTurnActive && bridge.streamingSpeechDelivered) {
|
|
415
|
+
log('skipping post-run speakText; streaming already delivered audio');
|
|
416
|
+
} else {
|
|
417
|
+
await speakText(spokenAnswer, signal, metricsTurn, { mirrorText: !answerTextDelivered });
|
|
418
|
+
}
|
|
419
|
+
try {
|
|
420
|
+
const guildId = client.channels.cache.get(bridge.activeVoiceChannelId)?.guild?.id || '';
|
|
421
|
+
await maybeNotifyTaskComplete({
|
|
422
|
+
answer: spokenAnswer || answer,
|
|
423
|
+
label: selectedAgentAdapter.label,
|
|
424
|
+
elapsedMs: Date.now() - agentStart,
|
|
425
|
+
guildId,
|
|
426
|
+
});
|
|
427
|
+
} catch (e) { warn('maybeNotifyTaskComplete failed', e?.message || e); }
|
|
428
|
+
metricsTurn?.finish({ status: 'ok' });
|
|
429
|
+
} catch (e) {
|
|
430
|
+
if (isAbortError(e) || bridge.interruptedTurns.has(turnId)) {
|
|
431
|
+
log('turn aborted', userId, 'turn', turnId);
|
|
432
|
+
clearTransientRouting(planChannelKey());
|
|
433
|
+
metricsTurn?.finish({ status: 'aborted' });
|
|
434
|
+
return;
|
|
435
|
+
}
|
|
436
|
+
warn('handleRecording failed', e?.stack || e);
|
|
437
|
+
const shortMsg = String(e?.message || e).slice(0, 800);
|
|
438
|
+
metricsTurn?.finish({ status: 'error', error: shortMsg });
|
|
439
|
+
await sendText(formatVoiceErrorMessage(settings.voiceLanguage, shortMsg));
|
|
440
|
+
} finally {
|
|
441
|
+
agentTurnLifecycle.finish(turn);
|
|
442
|
+
if (bridge.bridgeState.deferredSize() > 0) {
|
|
443
|
+
setImmediate(() => drainDeferredProcessingUtterances().catch(e => warn('drain deferred utterance failed', e?.stack || e)));
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
return { handleRecording };
|
|
449
|
+
}
|