verbalcoding 0.2.12 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +74 -4
- package/README.es.md +3 -1
- package/README.fr.md +3 -1
- package/README.ja.md +3 -1
- package/README.ko.md +4 -2
- package/README.md +4 -2
- package/README.ru.md +3 -1
- package/README.zh.md +3 -1
- package/app-node/agent_adapters.test.mjs +14 -0
- package/app-node/agent_routing.mjs +148 -0
- package/app-node/agent_routing.test.mjs +138 -0
- package/app-node/agent_turn.mjs +86 -0
- package/app-node/agent_turn.test.mjs +109 -0
- package/app-node/bridge_context.mjs +73 -0
- package/app-node/bridge_context.test.mjs +54 -0
- package/app-node/bridge_state.mjs +4 -0
- package/app-node/bridge_wireup.test.mjs +462 -0
- package/app-node/cli_install.test.mjs +31 -0
- package/app-node/cross_agent_routing.test.mjs +78 -0
- package/app-node/discord_command_router.mjs +204 -0
- package/app-node/discord_command_router.test.mjs +311 -0
- package/app-node/discord_voice_setup.mjs +251 -0
- package/app-node/discord_voice_setup.test.mjs +86 -0
- package/app-node/hermes_profiles.test.mjs +12 -1
- package/app-node/install_config.mjs +110 -3
- package/app-node/install_config.test.mjs +8 -0
- package/app-node/instance_doctor.test.mjs +9 -0
- package/app-node/instances.test.mjs +8 -1
- package/app-node/main.mjs +488 -1368
- package/app-node/mcp_tools.test.mjs +7 -0
- package/app-node/notification_handler.mjs +89 -0
- package/app-node/notification_handler.test.mjs +187 -0
- package/app-node/plan_dispatcher.mjs +215 -0
- package/app-node/plan_dispatcher.test.mjs +101 -0
- package/app-node/plan_mode.mjs +36 -7
- package/app-node/plan_mode.test.mjs +78 -0
- package/app-node/progress_handler.mjs +220 -0
- package/app-node/progress_handler.test.mjs +193 -0
- package/app-node/progress_speech.mjs +54 -32
- package/app-node/progress_speech.test.mjs +12 -3
- package/app-node/project_sessions.mjs +5 -2
- package/app-node/project_sessions.test.mjs +7 -0
- package/app-node/research_mode.mjs +282 -0
- package/app-node/research_mode.test.mjs +264 -0
- package/app-node/restart_notice.mjs +3 -0
- package/app-node/restart_notice.test.mjs +11 -0
- package/app-node/session_ontology.mjs +271 -0
- package/app-node/session_ontology.test.mjs +130 -0
- package/app-node/smart_progress.mjs +1 -1
- package/app-node/stream_sentencer.mjs +32 -2
- package/app-node/stream_sentencer.test.mjs +65 -0
- package/app-node/streaming_tts_queue.mjs +5 -1
- package/app-node/streaming_tts_queue.test.mjs +7 -1
- package/app-node/stt_whisper.mjs +24 -0
- package/app-node/stt_whisper.test.mjs +32 -0
- package/app-node/text_routing.mjs +4 -2
- package/app-node/tts_backends.mjs +537 -3
- package/app-node/tts_backends.test.mjs +454 -0
- package/app-node/tts_player.mjs +164 -0
- package/app-node/tts_player.test.mjs +202 -0
- package/app-node/tts_runtime.mjs +134 -0
- package/app-node/tts_runtime.test.mjs +89 -0
- package/app-node/tts_settings.mjs +150 -3
- package/app-node/tts_settings.test.mjs +204 -0
- package/app-node/tts_voice_config.mjs +136 -2
- package/app-node/tts_voice_config.test.mjs +94 -0
- package/app-node/utterance_router.mjs +216 -0
- package/app-node/utterance_router.test.mjs +236 -0
- package/app-node/voice_autojoin.mjs +37 -0
- package/app-node/voice_autojoin.test.mjs +59 -0
- package/app-node/voice_io.mjs +272 -0
- package/app-node/voice_io.test.mjs +102 -0
- package/app-node/voice_turn_runner.mjs +449 -0
- package/app-node/voice_turn_runner.test.mjs +289 -0
- package/docs/CONFIGURATION.md +12 -2
- package/docs/HARNESSES.md +58 -0
- package/docs/HARNESS_AIDER.md +50 -0
- package/docs/HARNESS_CLAUDE.md +56 -0
- package/docs/HARNESS_CODEX.md +56 -0
- package/docs/HARNESS_CURSOR.md +45 -0
- package/docs/HARNESS_GEMINI.md +45 -0
- package/docs/HARNESS_HERMES.md +57 -0
- package/docs/HARNESS_OPENCLAW.md +44 -0
- package/docs/HARNESS_OPENCODE.md +44 -0
- package/docs/README.md +1 -0
- package/docs/ROADMAP.md +20 -5
- package/docs/TTS_BACKENDS.md +227 -0
- package/docs/USAGE.md +22 -0
- package/docs/i18n/AGENTS.es.md +34 -0
- package/docs/i18n/AGENTS.fr.md +34 -0
- package/docs/i18n/AGENTS.ja.md +34 -0
- package/docs/i18n/AGENTS.ko.md +34 -0
- package/docs/i18n/AGENTS.ru.md +34 -0
- package/docs/i18n/AGENTS.zh.md +34 -0
- package/docs/i18n/HARNESSES.es.md +58 -0
- package/docs/i18n/HARNESSES.fr.md +58 -0
- package/docs/i18n/HARNESSES.ja.md +58 -0
- package/docs/i18n/HARNESSES.ko.md +58 -0
- package/docs/i18n/HARNESSES.ru.md +58 -0
- package/docs/i18n/HARNESSES.zh.md +58 -0
- package/docs/i18n/HARNESS_AIDER.es.md +48 -0
- package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
- package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
- package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
- package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
- package/docs/i18n/HARNESS_CODEX.es.md +55 -0
- package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
- package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
- package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
- package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
- package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
- package/docs/i18n/HARNESS_HERMES.es.md +54 -0
- package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
- package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
- package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
- package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
- package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
- package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
- package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
- package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
- package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
- package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
- package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
- package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
- package/integrations/fireredtts2/mlx_llm.py +183 -0
- package/integrations/fireredtts2/synth.py +156 -0
- package/integrations/fireredtts2/synth_mlx.py +196 -0
- package/integrations/mlxaudio/synth.py +74 -0
- package/integrations/neuttsair/synth.py +104 -0
- package/integrations/omnivoice/synth.py +110 -0
- package/package.json +6 -1
- package/scripts/cli.mjs +84 -0
- package/scripts/doctor.mjs +104 -4
- package/scripts/install.mjs +5 -1
- package/scripts/install_fireredtts2.sh +109 -0
- package/scripts/install_mlxaudio.sh +34 -0
- package/scripts/install_mossttsnano.sh +46 -0
- package/scripts/postinstall.mjs +34 -0
package/app-node/main.mjs
CHANGED
|
@@ -6,19 +6,8 @@ import { spawn, execFile } from 'node:child_process';
|
|
|
6
6
|
import { promisify } from 'node:util';
|
|
7
7
|
|
|
8
8
|
import { Client, GatewayIntentBits, Partials } from 'discord.js';
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
EndBehaviorType,
|
|
12
|
-
StreamType,
|
|
13
|
-
VoiceConnectionStatus,
|
|
14
|
-
createAudioPlayer,
|
|
15
|
-
createAudioResource,
|
|
16
|
-
entersState,
|
|
17
|
-
joinVoiceChannel,
|
|
18
|
-
} from '@discordjs/voice';
|
|
19
|
-
import prism from 'prism-media';
|
|
20
|
-
import wav from 'wav';
|
|
21
|
-
import { buildAgentSettings, createAgentAdapter, isPatchLikeOutput } from './agent_adapters.mjs';
|
|
9
|
+
import { createAudioPlayer } from '@discordjs/voice';
|
|
10
|
+
import { buildAgentSettings, createAgentAdapter, isPatchLikeOutput, shellSplit } from './agent_adapters.mjs';
|
|
22
11
|
import {
|
|
23
12
|
appendJsonl,
|
|
24
13
|
createLatencyTurn,
|
|
@@ -26,11 +15,6 @@ import {
|
|
|
26
15
|
readJsonlRecords,
|
|
27
16
|
summarizeLatencyRecords,
|
|
28
17
|
} from './latency_metrics.mjs';
|
|
29
|
-
import { splitForTTS } from './tts_chunks.mjs';
|
|
30
|
-
import { playChunkedTTSWithPrefetch } from './tts_prefetch.mjs';
|
|
31
|
-
import { createSentencer } from './stream_sentencer.mjs';
|
|
32
|
-
import { createStreamingTTSQueue } from './streaming_tts_queue.mjs';
|
|
33
|
-
import { createSmartProgressSummarizer } from './smart_progress.mjs';
|
|
34
18
|
import {
|
|
35
19
|
isPlanEntryUtterance,
|
|
36
20
|
parsePlanOutput,
|
|
@@ -43,8 +27,16 @@ import {
|
|
|
43
27
|
renderDecisionPrompt,
|
|
44
28
|
renderResolvedDecisions,
|
|
45
29
|
} from './plan_mode.mjs';
|
|
46
|
-
import {
|
|
47
|
-
|
|
30
|
+
import {
|
|
31
|
+
parseAgentRoutingCommand,
|
|
32
|
+
renderAgentPrefix,
|
|
33
|
+
buildCrossAgentPrompt,
|
|
34
|
+
isAgentRoutingDecision,
|
|
35
|
+
buildFallbackDecision,
|
|
36
|
+
isRoutingOnlyUtterance,
|
|
37
|
+
} from './agent_routing.mjs';
|
|
38
|
+
import { createSessionOntology } from './session_ontology.mjs';
|
|
39
|
+
import { parseResearchCommand, runResearchTurn } from './research_mode.mjs';
|
|
48
40
|
import { buildTtsSettings } from './tts_settings.mjs';
|
|
49
41
|
import { createTtsBackend } from './tts_backends.mjs';
|
|
50
42
|
import {
|
|
@@ -59,21 +51,30 @@ import {
|
|
|
59
51
|
} from './tts_voice_config.mjs';
|
|
60
52
|
import { createBridgeLogger, createTransientErrorReporter, isTransientNetworkError } from './bridge_logger.mjs';
|
|
61
53
|
import { createBridgeState } from './bridge_state.mjs';
|
|
54
|
+
import { createBridge } from './bridge_context.mjs';
|
|
55
|
+
import { createVoiceIO } from './voice_io.mjs';
|
|
56
|
+
import { createTtsPlayer } from './tts_player.mjs';
|
|
57
|
+
import { createUtteranceRouter } from './utterance_router.mjs';
|
|
58
|
+
import { createProgressHandler } from './progress_handler.mjs';
|
|
59
|
+
import { createNotificationHandler } from './notification_handler.mjs';
|
|
60
|
+
import { createTtsRuntime } from './tts_runtime.mjs';
|
|
61
|
+
import { createDiscordVoiceSetup } from './discord_voice_setup.mjs';
|
|
62
|
+
import { createAgentTurnLifecycle } from './agent_turn.mjs';
|
|
63
|
+
import { createDiscordCommandRouter } from './discord_command_router.mjs';
|
|
64
|
+
import { createVoiceTurnRunner } from './voice_turn_runner.mjs';
|
|
65
|
+
import { createPlanDispatcher } from './plan_dispatcher.mjs';
|
|
62
66
|
import { sendDiscordText, splitDiscordMessage } from './discord_text.mjs';
|
|
63
|
-
import { progressTtsCacheFileName } from './progress_cache.mjs';
|
|
64
67
|
import { shouldPassWhisperLanguage, voiceLanguageCommandFromTranscript, languagePreset } from './language_config.mjs';
|
|
65
|
-
import {
|
|
68
|
+
import { whisperFailureMessage, whisperTimeoutMs } from './stt_whisper.mjs';
|
|
69
|
+
import { formatRestartCompleteNotice } from './restart_notice.mjs';
|
|
66
70
|
import {
|
|
67
|
-
appendRecentDiscordText,
|
|
68
71
|
formatRecentDiscordContext,
|
|
69
|
-
shouldRouteDiscordTextToAgent,
|
|
70
72
|
} from './text_routing.mjs';
|
|
71
73
|
import {
|
|
72
74
|
bindProjectSessionToChannel,
|
|
73
75
|
createProjectSession,
|
|
74
76
|
listProjectSessions,
|
|
75
77
|
loadProjectSessions,
|
|
76
|
-
parseProjectSessionCommand,
|
|
77
78
|
projectSessionContextText,
|
|
78
79
|
projectSessionForChannel,
|
|
79
80
|
saveProjectSessions,
|
|
@@ -161,13 +162,20 @@ function ensureTtsVoiceConfig() {
|
|
|
161
162
|
return readTtsVoiceConfig(TTS_VOICE_CONFIG_PATH);
|
|
162
163
|
}
|
|
163
164
|
function applyVoiceConfigToProcessEnv(config = ensureTtsVoiceConfig()) {
|
|
164
|
-
const selection = effectiveTtsVoiceSelection(config,
|
|
165
|
+
const selection = effectiveTtsVoiceSelection(config, process.env);
|
|
165
166
|
const configuredVoiceLanguage = process.env.VOICE_LANGUAGE;
|
|
166
167
|
const nextEnv = applyTtsVoiceSelectionToEnv(process.env, selection);
|
|
167
168
|
if (configuredVoiceLanguage) nextEnv.VOICE_LANGUAGE = configuredVoiceLanguage;
|
|
168
169
|
for (const [key, value] of Object.entries(nextEnv)) process.env[key] = value;
|
|
169
170
|
return { config, selection };
|
|
170
171
|
}
|
|
172
|
+
function rebuildTtsRuntimeSettings(selection = null) {
|
|
173
|
+
settings.tts = buildTtsSettings(process.env, ROOT);
|
|
174
|
+
if (selection?.backend === 'edge' && selection.voice?.voice) settings.tts.edge.voice = selection.voice.voice;
|
|
175
|
+
try { bridge.ttsBackend?.close?.(); } catch (e) { warn('tts backend close failed', e?.message || e); }
|
|
176
|
+
bridge.ttsBackend = createTtsBackend(settings.tts, { execFileAsync, spawn, log, warn, onFallback: ttsFallbackNotice, voiceProvider: () => settings.tts.edge.voice });
|
|
177
|
+
return settings.tts;
|
|
178
|
+
}
|
|
171
179
|
function reloadRuntimeLanguageFromEnv() {
|
|
172
180
|
const previousWhisperLanguage = settings?.whisperLanguage;
|
|
173
181
|
const previousVoiceLanguage = settings?.voiceLanguage;
|
|
@@ -190,6 +198,7 @@ const settings = {
|
|
|
190
198
|
whisperBin: process.env.WHISPER_CPP_BIN || 'whisper-cli',
|
|
191
199
|
whisperModel: process.env.WHISPER_CPP_MODEL || path.join(ROOT, 'models', 'ggml-small-q5_1.bin'),
|
|
192
200
|
whisperLanguage: process.env.WHISPER_CPP_LANGUAGE || process.env.STT_LANGUAGE || 'ko',
|
|
201
|
+
whisperTimeoutMs: whisperTimeoutMs(process.env),
|
|
193
202
|
voiceLanguage: process.env.VOICE_LANGUAGE || process.env.WHISPER_CPP_LANGUAGE || process.env.STT_LANGUAGE || 'ko',
|
|
194
203
|
tts: buildTtsSettings(process.env, ROOT),
|
|
195
204
|
requireWakeWord: ['1', 'true', 'yes'].includes((process.env.REQUIRE_WAKE_WORD || '0').toLowerCase()),
|
|
@@ -207,21 +216,33 @@ const client = new Client({
|
|
|
207
216
|
intents: [GatewayIntentBits.Guilds, GatewayIntentBits.GuildVoiceStates, GatewayIntentBits.GuildMessages, GatewayIntentBits.MessageContent],
|
|
208
217
|
partials: [Partials.Channel],
|
|
209
218
|
});
|
|
210
|
-
|
|
219
|
+
const announcedTtsFallbacks = new Set();
|
|
220
|
+
const pendingFallbackNoticePromises = new Set();
|
|
221
|
+
function ttsFallbackNotice({ backend } = {}) {
|
|
222
|
+
if (!backend || backend === 'edge') return;
|
|
223
|
+
if (announcedTtsFallbacks.has(backend)) return;
|
|
224
|
+
announcedTtsFallbacks.add(backend);
|
|
225
|
+
const en = /^en/i.test(String(settings.voiceLanguage || ''));
|
|
226
|
+
const msg = en
|
|
227
|
+
? `${backend} synthesis failed; using Edge for the rest of this session.`
|
|
228
|
+
: `${backend} 음성 생성에 실패해서 이번 세션은 Edge로 진행할게.`;
|
|
229
|
+
const textPromise = sendText(`⚠️ ${msg}`)
|
|
230
|
+
.catch(e => warn('tts fallback notice send failed', e?.message || e));
|
|
231
|
+
pendingFallbackNoticePromises.add(textPromise);
|
|
232
|
+
textPromise.finally(() => pendingFallbackNoticePromises.delete(textPromise));
|
|
233
|
+
const speakPromise = new Promise(resolve => queueMicrotask(() => {
|
|
234
|
+
speakText(msg, null, null, { mirrorText: false })
|
|
235
|
+
.catch(e => warn('tts fallback notice speak failed', e?.message || e))
|
|
236
|
+
.finally(resolve);
|
|
237
|
+
}));
|
|
238
|
+
pendingFallbackNoticePromises.add(speakPromise);
|
|
239
|
+
speakPromise.finally(() => pendingFallbackNoticePromises.delete(speakPromise));
|
|
240
|
+
}
|
|
241
|
+
const bridge = createBridge();
|
|
242
|
+
bridge.ttsBackend = createTtsBackend(settings.tts, { execFileAsync, spawn, log, warn, onFallback: ttsFallbackNotice, voiceProvider: () => settings.tts.edge.voice });
|
|
211
243
|
const voiceCloneCapture = createVoiceCloneCaptureState({ defaultTargetPath: settings.tts.openvoice.refAudio });
|
|
212
244
|
|
|
213
|
-
|
|
214
|
-
let activeVoiceChannelId = '';
|
|
215
|
-
let activeTranscriptChannelId = '';
|
|
216
|
-
const recentDiscordTextByChannel = new Map();
|
|
217
|
-
let player = createAudioPlayer();
|
|
218
|
-
let speaking = false;
|
|
219
|
-
let processing = false;
|
|
220
|
-
let activeTurnId = 0;
|
|
221
|
-
let currentAbortController = null;
|
|
222
|
-
const interruptedTurns = new Set();
|
|
223
|
-
const activeStreams = new Map();
|
|
224
|
-
let bridgeState = null;
|
|
245
|
+
bridge.player = createAudioPlayer();
|
|
225
246
|
const MAX_DEFERRED_PROCESSING_UTTERANCES = Number(process.env.MAX_DEFERRED_PROCESSING_UTTERANCES || '0');
|
|
226
247
|
const MIN_UTTERANCE_SECONDS = Number(process.env.MIN_UTTERANCE_SECONDS || '1.4');
|
|
227
248
|
const MIN_UTTERANCE_BYTES = 48000 * 2 * 2 * MIN_UTTERANCE_SECONDS;
|
|
@@ -254,7 +275,7 @@ const bridgeLogger = createBridgeLogger({
|
|
|
254
275
|
});
|
|
255
276
|
function log(...args) { bridgeLogger.log(...args); }
|
|
256
277
|
function warn(...args) { bridgeLogger.warn(...args); }
|
|
257
|
-
bridgeState = createBridgeState({ log, cleanupFile: file => fs.rm(file, { force: true }, () => {}) });
|
|
278
|
+
bridge.bridgeState = createBridgeState({ log, cleanupFile: file => fs.rm(file, { force: true }, () => {}) });
|
|
258
279
|
const reportTransientProcessError = createTransientErrorReporter({ warn });
|
|
259
280
|
function isBenignTransientNetworkError(error) {
|
|
260
281
|
return isTransientNetworkError(error);
|
|
@@ -273,241 +294,80 @@ function newLatencyTurn(userId, startedAtMs) {
|
|
|
273
294
|
}
|
|
274
295
|
|
|
275
296
|
function discardVoiceInputQueues(reason = 'config-change') {
|
|
276
|
-
return bridgeState?.discardQueues(reason) || 0;
|
|
277
|
-
}
|
|
278
|
-
let verboseProgress = Boolean(settings.agent.verboseProgress);
|
|
279
|
-
let activeProgressSignal = null;
|
|
280
|
-
let verboseProgressSpeechQueue = Promise.resolve();
|
|
281
|
-
let activeProgressAbortController = null;
|
|
282
|
-
let speechPlaybackGeneration = 0;
|
|
283
|
-
let progressSpeechBatch = [];
|
|
284
|
-
let progressSpeechBatchTimer = null;
|
|
285
|
-
let progressSpeechBatchSignal = null;
|
|
286
|
-
let progressSpeechBatchStartedAt = 0;
|
|
287
|
-
|
|
288
|
-
const STREAMING_TTS_ENABLED = ['1', 'true', 'yes', 'on'].includes(String(process.env.STREAMING_TTS || '0').toLowerCase());
|
|
289
|
-
let activeSentencer = null;
|
|
290
|
-
let activeStreamingQueue = null;
|
|
291
|
-
let streamingSpeechDelivered = false;
|
|
292
|
-
|
|
293
|
-
let notifyUserOptIn = false;
|
|
294
|
-
let notifierInstance = null;
|
|
295
|
-
function ensureNotifier() {
|
|
296
|
-
if (notifierInstance) return notifierInstance;
|
|
297
|
-
notifierInstance = createNotifier({
|
|
298
|
-
provider: (process.env.NOTIFY_PROVIDER || 'ntfy').toLowerCase(),
|
|
299
|
-
topic: process.env.NTFY_TOPIC || '',
|
|
300
|
-
pushoverUser: process.env.PUSHOVER_USER || '',
|
|
301
|
-
pushoverToken: process.env.PUSHOVER_TOKEN || '',
|
|
302
|
-
});
|
|
303
|
-
return notifierInstance;
|
|
304
|
-
}
|
|
305
|
-
function notifyStatusText() {
|
|
306
|
-
const provider = (process.env.NOTIFY_PROVIDER || 'ntfy').toLowerCase();
|
|
307
|
-
const hasTopic = provider === 'ntfy' ? Boolean(process.env.NTFY_TOPIC) : (provider === 'pushover' ? Boolean(process.env.PUSHOVER_USER && process.env.PUSHOVER_TOKEN) : true);
|
|
308
|
-
const mode = notifyUserOptIn ? 'always' : 'empty-channel only';
|
|
309
|
-
const config = hasTopic ? 'configured' : 'NOT configured';
|
|
310
|
-
return `notify: ${mode} via ${provider} (${config}). Threshold: ${process.env.NOTIFY_MIN_TASK_MS || '60000'}ms.`;
|
|
311
|
-
}
|
|
312
|
-
async function getVoiceChannelHumanCount() {
|
|
313
|
-
if (!activeVoiceChannelId) return 0;
|
|
314
|
-
try {
|
|
315
|
-
const ch = await client.channels.fetch(activeVoiceChannelId).catch(() => null);
|
|
316
|
-
if (!ch || !ch.members) return 0;
|
|
317
|
-
let count = 0;
|
|
318
|
-
for (const [, m] of ch.members) if (!m.user?.bot) count += 1;
|
|
319
|
-
return count;
|
|
320
|
-
} catch (e) {
|
|
321
|
-
warn('humanCount failed', e?.message || e);
|
|
322
|
-
return 0;
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
async function maybeNotifyTaskComplete({ answer, label, elapsedMs, guildId }) {
|
|
326
|
-
const provider = (process.env.NOTIFY_PROVIDER || '').toLowerCase();
|
|
327
|
-
if (!provider || provider === 'noop') return;
|
|
328
|
-
const minTaskMs = Number(process.env.NOTIFY_MIN_TASK_MS || '60000');
|
|
329
|
-
const humanCount = await getVoiceChannelHumanCount();
|
|
330
|
-
const notifier = ensureNotifier();
|
|
331
|
-
if (!notifier.shouldNotify({ humanCount, taskMs: elapsedMs, minTaskMs, userOptIn: notifyUserOptIn })) return;
|
|
332
|
-
const text = String(answer || '').trim();
|
|
333
|
-
const lastSentence = text.split(/(?<=[.!?。!?])\s+/).filter(Boolean).pop() || text;
|
|
334
|
-
const body = lastSentence.slice(0, 200);
|
|
335
|
-
const title = label ? `${label} finished` : 'VerbalCoding finished';
|
|
336
|
-
const deepLink = buildDiscordDeepLink({ guildId, channelId: activeVoiceChannelId });
|
|
337
|
-
try {
|
|
338
|
-
const result = await notifier.send({ title, body, deepLink });
|
|
339
|
-
log('notify sent', 'provider', provider, 'status', result?.status || result?.ok, 'skipped', result?.skipped || false);
|
|
340
|
-
} catch (e) {
|
|
341
|
-
warn('notify send failed', e?.message || e);
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
const planStates = new Map(); // channelId -> { steps, language }
|
|
346
|
-
|
|
347
|
-
function planChannelKey() {
|
|
348
|
-
return activeVoiceChannelId || settings.transcriptChannelId || 'default';
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
async function askNextDecision(state, signal) {
|
|
352
|
-
const decision = state.decisions[state.pendingDecisionIndex];
|
|
353
|
-
if (!decision) return;
|
|
354
|
-
const text = renderDecisionPrompt(decision, state.language);
|
|
355
|
-
await sendText(`❓ ${text}`);
|
|
356
|
-
await speakText(text, signal, null);
|
|
297
|
+
return bridge.bridgeState?.discardQueues(reason) || 0;
|
|
357
298
|
}
|
|
299
|
+
bridge.verboseProgress = Boolean(settings.agent.verboseProgress);
|
|
358
300
|
|
|
359
|
-
|
|
360
|
-
const language = state.language;
|
|
361
|
-
const resolvedLine = renderResolvedDecisions(state.resolvedDecisions, language);
|
|
362
|
-
const plan = planNarrationLines(state.steps, language);
|
|
363
|
-
const tail = /^en/i.test(String(language || ''))
|
|
364
|
-
? `${plan}\n${resolvedLine}\nSay "approve" to run, or edit with skip/insert.`
|
|
365
|
-
: `${plan}\n${resolvedLine}\n"실행"이라고 하면 시작할게. skip/insert로 수정도 돼.`;
|
|
366
|
-
await sendText(`📝 ${tail}`);
|
|
367
|
-
await speakText(tail, signal, null);
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
async function dispatchPlanModeUtterance(prompt, signal) {
|
|
371
|
-
const language = settings.voiceLanguage;
|
|
372
|
-
const key = planChannelKey();
|
|
373
|
-
const existing = planStates.get(key);
|
|
374
|
-
|
|
375
|
-
if (existing && existing.pendingDecisionIndex < existing.decisions.length) {
|
|
376
|
-
const decision = existing.decisions[existing.pendingDecisionIndex];
|
|
377
|
-
const answer = parseDecisionAnswer(prompt, decision, language);
|
|
378
|
-
if (answer.type === 'unknown') {
|
|
379
|
-
await sendText(/^en/i.test(String(language || ''))
|
|
380
|
-
? '⚠️ I did not catch that. Please pick an option.'
|
|
381
|
-
: '⚠️ 못 알아들었어. 옵션 중에 하나 골라줘.');
|
|
382
|
-
await askNextDecision(existing, signal);
|
|
383
|
-
return { handled: true };
|
|
384
|
-
}
|
|
385
|
-
const next = {
|
|
386
|
-
...existing,
|
|
387
|
-
resolvedDecisions: { ...existing.resolvedDecisions, [decision.slot]: answer.choice },
|
|
388
|
-
pendingDecisionIndex: existing.pendingDecisionIndex + 1,
|
|
389
|
-
};
|
|
390
|
-
planStates.set(key, next);
|
|
391
|
-
if (next.pendingDecisionIndex < next.decisions.length) {
|
|
392
|
-
await askNextDecision(next, signal);
|
|
393
|
-
} else {
|
|
394
|
-
await finalizePlanReady(next, signal);
|
|
395
|
-
}
|
|
396
|
-
return { handled: true };
|
|
397
|
-
}
|
|
301
|
+
const STREAMING_TTS_ENABLED = ['1', 'true', 'yes', 'on'].includes(String(process.env.STREAMING_TTS || '1').toLowerCase());
|
|
398
302
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
if (cmd.type === 'skip' || cmd.type === 'insert') {
|
|
402
|
-
const nextSteps = applyPlanCommand(existing.steps, cmd);
|
|
403
|
-
planStates.set(key, { ...existing, steps: nextSteps });
|
|
404
|
-
await finalizePlanReady({ ...existing, steps: nextSteps }, signal);
|
|
405
|
-
return { handled: true };
|
|
406
|
-
}
|
|
407
|
-
if (cmd.type === 'cancel') {
|
|
408
|
-
planStates.delete(key);
|
|
409
|
-
const msg = /^en/i.test(String(language || '')) ? 'Plan cancelled.' : '계획을 취소했어.';
|
|
410
|
-
await sendText(`❎ ${msg}`);
|
|
411
|
-
await speakText(msg, signal, null);
|
|
412
|
-
return { handled: true };
|
|
413
|
-
}
|
|
414
|
-
if (cmd.type === 'approve') {
|
|
415
|
-
const finalPlan = renderFinalPlan(existing.steps);
|
|
416
|
-
const resolvedLine = renderResolvedDecisions(existing.resolvedDecisions, language);
|
|
417
|
-
const promptToRun = [
|
|
418
|
-
planExecutionPreamble(language),
|
|
419
|
-
'',
|
|
420
|
-
finalPlan,
|
|
421
|
-
resolvedLine,
|
|
422
|
-
'',
|
|
423
|
-
`Original user request: ${existing.originalPrompt}`,
|
|
424
|
-
].filter(Boolean).join('\n');
|
|
425
|
-
planStates.delete(key);
|
|
426
|
-
const note = /^en/i.test(String(language || '')) ? 'Running the plan now.' : '계획대로 실행할게.';
|
|
427
|
-
await sendText(`▶ ${note}`);
|
|
428
|
-
await speakText(note, signal, null);
|
|
429
|
-
return { handled: false, prompt: promptToRun };
|
|
430
|
-
}
|
|
431
|
-
planStates.delete(key);
|
|
432
|
-
return { handled: false, prompt };
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
if (isPlanEntryUtterance(prompt, language)) {
|
|
436
|
-
const planPrompt = `${planModePreamble(language)}\n\nUser request: ${prompt}`;
|
|
437
|
-
const adapter = adapterForProjectSession(resolveProjectSessionForChannel(planChannelKey()));
|
|
438
|
-
const plan = { task: false, label: adapter.label, verboseProgress: false, language, projectContext: '' };
|
|
439
|
-
const result = await adapter.run(planPrompt, signal, plan).catch(e => ({ answer: '', error: e }));
|
|
440
|
-
const { steps, decisions } = parsePlanOutput(result.answer || '');
|
|
441
|
-
if (!steps.length) {
|
|
442
|
-
const failMsg = /^en/i.test(String(language || ''))
|
|
443
|
-
? 'I could not produce a plan. Continuing as a regular turn.'
|
|
444
|
-
: '계획을 만들지 못했어. 일반 작업으로 진행할게.';
|
|
445
|
-
await sendText(`⚠️ ${failMsg}`);
|
|
446
|
-
return { handled: false, prompt };
|
|
447
|
-
}
|
|
448
|
-
const state = {
|
|
449
|
-
steps,
|
|
450
|
-
decisions,
|
|
451
|
-
resolvedDecisions: {},
|
|
452
|
-
pendingDecisionIndex: 0,
|
|
453
|
-
originalPrompt: prompt,
|
|
454
|
-
language,
|
|
455
|
-
};
|
|
456
|
-
planStates.set(planChannelKey(), state);
|
|
457
|
-
const narration = planNarrationLines(steps, language);
|
|
458
|
-
await sendText(`📝 ${narration}`);
|
|
459
|
-
await speakText(narration, signal, null);
|
|
460
|
-
if (decisions.length) {
|
|
461
|
-
await askNextDecision(state, signal);
|
|
462
|
-
} else {
|
|
463
|
-
await finalizePlanReady(state, signal);
|
|
464
|
-
}
|
|
465
|
-
return { handled: true };
|
|
466
|
-
}
|
|
467
|
-
return { handled: false, prompt };
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
function planNarrationLines(steps, language) {
|
|
471
|
-
const visible = steps.filter(s => s.status !== 'skipped');
|
|
472
|
-
const header = /^en/i.test(String(language || ''))
|
|
473
|
-
? `Plan with ${visible.length} steps. Say "skip step N", "add X after step N", or "approve" to run.`
|
|
474
|
-
: `${visible.length}단계 계획. "step N 건너뛰어", "step N 다음에 X 추가", "실행"이라고 말해줘.`;
|
|
475
|
-
const body = visible.map((s, i) => `${i + 1}. ${s.text}`).join('\n');
|
|
476
|
-
return `${header}\n${body}`;
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
let smartProgressEnabled = Boolean(process.env.SMART_PROGRESS_API_KEY);
|
|
480
|
-
let smartProgressSummarizer = null;
|
|
481
|
-
function ensureSmartProgressSummarizer() {
|
|
482
|
-
if (smartProgressSummarizer) return smartProgressSummarizer;
|
|
483
|
-
smartProgressSummarizer = createSmartProgressSummarizer({
|
|
484
|
-
apiKey: process.env.SMART_PROGRESS_API_KEY || '',
|
|
485
|
-
baseUrl: process.env.SMART_PROGRESS_BASE_URL || 'https://api.groq.com/openai/v1',
|
|
486
|
-
model: process.env.SMART_PROGRESS_MODEL || 'llama-3.1-8b-instant',
|
|
487
|
-
language: settings.voiceLanguage,
|
|
488
|
-
});
|
|
489
|
-
smartProgressSummarizer.on('summary', summary => {
|
|
490
|
-
if (!summary || !activeProgressSignal) return;
|
|
491
|
-
queueVerboseProgressSpeech(summary, activeProgressSignal);
|
|
492
|
-
});
|
|
493
|
-
return smartProgressSummarizer;
|
|
494
|
-
}
|
|
495
|
-
function smartProgressStatusText() {
|
|
496
|
-
const hasKey = Boolean(process.env.SMART_PROGRESS_API_KEY);
|
|
497
|
-
const mode = smartProgressEnabled && hasKey ? 'on' : 'off';
|
|
498
|
-
const reason = !hasKey ? ' (no SMART_PROGRESS_API_KEY set)' : '';
|
|
499
|
-
return `smart-progress: ${mode}${reason}`;
|
|
500
|
-
}
|
|
501
|
-
let activeProgressLastEventAt = 0;
|
|
502
|
-
let lastVerboseProgressText = '';
|
|
503
|
-
let lastVerboseProgressTextAt = 0;
|
|
303
|
+
bridge.smartProgressEnabled = Boolean(process.env.SMART_PROGRESS_API_KEY);
|
|
304
|
+
const VOICE_CONNECT_TIMEOUT_MS = Number(process.env.VOICE_CONNECT_TIMEOUT_MS || '60000');
|
|
504
305
|
const PROGRESS_IDLE_NOTICE_INITIAL_MS = Number(process.env.PROGRESS_IDLE_NOTICE_INITIAL_MS || process.env.PROGRESS_IDLE_NOTICE_MS || '10000');
|
|
505
306
|
const PROGRESS_IDLE_NOTICE_MAX_MS = Number(process.env.PROGRESS_IDLE_NOTICE_MAX_MS || '30000');
|
|
506
307
|
const PROGRESS_IDLE_NOTICE_MULTIPLIER = Number(process.env.PROGRESS_IDLE_NOTICE_MULTIPLIER || '1.8');
|
|
507
308
|
const PROGRESS_IDLE_CHECK_MS = Number(process.env.PROGRESS_IDLE_CHECK_MS || '5000');
|
|
508
309
|
const PROGRESS_IDLE_NOTICE_LIMIT = Number(process.env.PROGRESS_IDLE_NOTICE_LIMIT || '20');
|
|
509
310
|
const projectSessionsState = loadProjectSessions(settings.projectSessionsPath);
|
|
510
|
-
const
|
|
311
|
+
const ttsPlayer = createTtsPlayer({
|
|
312
|
+
bridge,
|
|
313
|
+
settings,
|
|
314
|
+
log,
|
|
315
|
+
warn,
|
|
316
|
+
sleep,
|
|
317
|
+
sendText,
|
|
318
|
+
refreshTtsRuntimeConfig,
|
|
319
|
+
waitEvent,
|
|
320
|
+
isAbortError,
|
|
321
|
+
STREAMING_TTS_ENABLED,
|
|
322
|
+
});
|
|
323
|
+
const { synthTTS, playAudio, speakText, beginStreamingTurn, endStreamingTurn, stopPlaybackForBargeIn } = ttsPlayer;
|
|
324
|
+
|
|
325
|
+
const progressHandler = createProgressHandler({
|
|
326
|
+
bridge,
|
|
327
|
+
settings,
|
|
328
|
+
log,
|
|
329
|
+
warn,
|
|
330
|
+
isAbortError,
|
|
331
|
+
playAudio,
|
|
332
|
+
sendText,
|
|
333
|
+
refreshTtsRuntimeConfig,
|
|
334
|
+
});
|
|
335
|
+
const {
|
|
336
|
+
ensureSmartProgressSummarizer,
|
|
337
|
+
smartProgressStatusText,
|
|
338
|
+
progressEmoji,
|
|
339
|
+
formatProgressText,
|
|
340
|
+
sendVerboseProgressText,
|
|
341
|
+
synthProgressTTS,
|
|
342
|
+
speakProgress,
|
|
343
|
+
speakImmediateNotice,
|
|
344
|
+
queueProgressSpeechText,
|
|
345
|
+
flushProgressSpeechBatch,
|
|
346
|
+
queueVerboseProgressSpeech,
|
|
347
|
+
clearProgressSpeechBatch,
|
|
348
|
+
stopProgressSpeech,
|
|
349
|
+
} = progressHandler;
|
|
350
|
+
|
|
351
|
+
const agentTurnLifecycle = createAgentTurnLifecycle({ bridge, warn });
|
|
352
|
+
|
|
353
|
+
const notificationHandler = createNotificationHandler({ bridge, client, log, warn });
|
|
354
|
+
const {
|
|
355
|
+
ensureNotifier,
|
|
356
|
+
notifyStatusText,
|
|
357
|
+
getVoiceChannelHumanCount,
|
|
358
|
+
maybeNotifyTaskComplete,
|
|
359
|
+
} = notificationHandler;
|
|
360
|
+
|
|
361
|
+
const ttsRuntime = createTtsRuntime({
|
|
362
|
+
bridge,
|
|
363
|
+
ROOT,
|
|
364
|
+
execFileAsync,
|
|
365
|
+
speakText,
|
|
366
|
+
warn,
|
|
367
|
+
persistEnvValues,
|
|
368
|
+
});
|
|
369
|
+
const { ensureSelectedTtsBackendInstalled, commandIsInstalled } = ttsRuntime;
|
|
370
|
+
|
|
511
371
|
function createBridgeAgentAdapter(agentSettings) {
|
|
512
372
|
return createAgentAdapter(agentSettings, {
|
|
513
373
|
execFileAsync,
|
|
@@ -515,53 +375,72 @@ function createBridgeAgentAdapter(agentSettings) {
|
|
|
515
375
|
log,
|
|
516
376
|
warn,
|
|
517
377
|
onProgress: event => {
|
|
518
|
-
if (!verboseProgress) return;
|
|
519
|
-
activeProgressLastEventAt = Date.now();
|
|
520
|
-
sendVerboseProgressText(event, activeProgressSignal);
|
|
521
|
-
if (smartProgressEnabled && process.env.SMART_PROGRESS_API_KEY) {
|
|
378
|
+
if (!bridge.verboseProgress) return;
|
|
379
|
+
bridge.activeProgressLastEventAt = Date.now();
|
|
380
|
+
sendVerboseProgressText(event, bridge.activeProgressSignal);
|
|
381
|
+
if (bridge.smartProgressEnabled && process.env.SMART_PROGRESS_API_KEY) {
|
|
522
382
|
try { ensureSmartProgressSummarizer().ingest(event); }
|
|
523
|
-
catch (e) { warn('smart progress ingest failed', e?.stack || e); queueVerboseProgressSpeech(event, activeProgressSignal); }
|
|
383
|
+
catch (e) { warn('smart progress ingest failed', e?.stack || e); queueVerboseProgressSpeech(event, bridge.activeProgressSignal); }
|
|
524
384
|
} else {
|
|
525
|
-
queueVerboseProgressSpeech(event, activeProgressSignal);
|
|
385
|
+
queueVerboseProgressSpeech(event, bridge.activeProgressSignal);
|
|
526
386
|
}
|
|
527
387
|
},
|
|
528
388
|
onStdoutChunk: chunk => {
|
|
529
|
-
if (activeSentencer) {
|
|
530
|
-
try { activeSentencer.push(chunk); } catch (e) { warn('streaming sentencer push failed', e?.stack || e); }
|
|
389
|
+
if (bridge.activeSentencer) {
|
|
390
|
+
try { bridge.activeSentencer.push(chunk); } catch (e) { warn('streaming sentencer push failed', e?.stack || e); }
|
|
531
391
|
}
|
|
532
392
|
},
|
|
533
393
|
});
|
|
534
394
|
}
|
|
535
395
|
const agentAdapter = createBridgeAgentAdapter(settings.agent);
|
|
536
|
-
function adapterForProjectSession(session) {
|
|
537
|
-
if (!session) return agentAdapter;
|
|
538
|
-
const key = session.slug || session.name;
|
|
539
|
-
if (!agentAdaptersBySession.has(key)) {
|
|
540
|
-
agentAdaptersBySession.set(key, createBridgeAgentAdapter({
|
|
541
|
-
...settings.agent,
|
|
542
|
-
label: `${settings.agent.label} · ${session.name}`,
|
|
543
|
-
sessionFile: session.sessionFile,
|
|
544
|
-
cwd: session.workdir,
|
|
545
|
-
projectContext: projectSessionContextText(session),
|
|
546
|
-
}));
|
|
547
|
-
}
|
|
548
|
-
return agentAdaptersBySession.get(key);
|
|
549
|
-
}
|
|
550
396
|
function resolveProjectSessionForChannel(channelId) {
|
|
551
397
|
return projectSessionForChannel(projectSessionsState, channelId) || null;
|
|
552
398
|
}
|
|
399
|
+
|
|
400
|
+
function ontologyStateFor(channelKey) {
|
|
401
|
+
const key = String(channelKey || 'default');
|
|
402
|
+
let store = bridge.ontologyByChannel.get(key);
|
|
403
|
+
if (!store) {
|
|
404
|
+
store = createSessionOntology({ channelKey: key });
|
|
405
|
+
try { store.load(); } catch {}
|
|
406
|
+
bridge.ontologyByChannel.set(key, store);
|
|
407
|
+
}
|
|
408
|
+
return store;
|
|
409
|
+
}
|
|
410
|
+
function captureOntologyFromTurn(channelKey, { prompt, answer, backend }) {
|
|
411
|
+
try {
|
|
412
|
+
const store = ontologyStateFor(channelKey);
|
|
413
|
+
const promptEntities = store.entitiesFromText(String(prompt || ''), { by: backend, kind: 'utterance' });
|
|
414
|
+
const answerEntities = store.entitiesFromText(String(answer || ''), { by: backend, kind: 'result' });
|
|
415
|
+
store.add(promptEntities);
|
|
416
|
+
store.add(answerEntities);
|
|
417
|
+
store.save();
|
|
418
|
+
} catch (e) {
|
|
419
|
+
warn('ontology capture failed', e?.message || e);
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
function resetRoutingState(channelKey) {
|
|
423
|
+
const state = routingStateFor(channelKey);
|
|
424
|
+
state.activeRouting = { backend: settings.agent.backend, sticky: false };
|
|
425
|
+
state.pendingFallbackPrompt = null;
|
|
426
|
+
}
|
|
427
|
+
function invalidateBackendAdaptersForSession(sessionSlug) {
|
|
428
|
+
if (!sessionSlug) return;
|
|
429
|
+
for (const key of Array.from(bridge.agentAdaptersByBackend.keys())) {
|
|
430
|
+
if (key.endsWith(`::${sessionSlug}`)) bridge.agentAdaptersByBackend.delete(key);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
553
433
|
function saveProjectSessionsState() {
|
|
554
434
|
saveProjectSessions(settings.projectSessionsPath, projectSessionsState);
|
|
555
435
|
}
|
|
556
|
-
|
|
557
|
-
let sensitivityModeExpiresAt = 0;
|
|
436
|
+
bridge.sensitivityMode = SENSITIVITY_MODE_DEFAULT;
|
|
558
437
|
function currentBargeInThresholds() {
|
|
559
|
-
if (sensitivityModeExpiresAt && Date.now() > sensitivityModeExpiresAt) {
|
|
560
|
-
sensitivityMode = SENSITIVITY_MODE_DEFAULT;
|
|
561
|
-
sensitivityModeExpiresAt = 0;
|
|
562
|
-
log('barge-in sensitivity mode expired; restored', sensitivityMode);
|
|
438
|
+
if (bridge.sensitivityModeExpiresAt && Date.now() > bridge.sensitivityModeExpiresAt) {
|
|
439
|
+
bridge.sensitivityMode = SENSITIVITY_MODE_DEFAULT;
|
|
440
|
+
bridge.sensitivityModeExpiresAt = 0;
|
|
441
|
+
log('barge-in sensitivity mode expired; restored', bridge.sensitivityMode);
|
|
563
442
|
}
|
|
564
|
-
return bargeInThresholdsForMode(sensitivityMode, {
|
|
443
|
+
return bargeInThresholdsForMode(bridge.sensitivityMode, {
|
|
565
444
|
minSeconds: BARGE_IN_MIN_SECONDS,
|
|
566
445
|
minMeanDb: BARGE_IN_MIN_MEAN_VOLUME_DB,
|
|
567
446
|
minMaxDb: BARGE_IN_MIN_MAX_VOLUME_DB,
|
|
@@ -581,48 +460,28 @@ function currentPlaybackBargeInThresholds() {
|
|
|
581
460
|
};
|
|
582
461
|
}
|
|
583
462
|
function setSensitivityMode(mode, reason = 'manual') {
|
|
584
|
-
sensitivityMode = mode === 'conservative' ? 'conservative' : 'normal';
|
|
585
|
-
sensitivityModeExpiresAt = sensitivityMode === 'conservative' && SENSITIVITY_OUTDOOR_SECONDS > 0
|
|
463
|
+
bridge.sensitivityMode = mode === 'conservative' ? 'conservative' : 'normal';
|
|
464
|
+
bridge.sensitivityModeExpiresAt = bridge.sensitivityMode === 'conservative' && SENSITIVITY_OUTDOOR_SECONDS > 0
|
|
586
465
|
? Date.now() + SENSITIVITY_OUTDOOR_SECONDS * 1000
|
|
587
466
|
: 0;
|
|
588
467
|
const thresholds = currentBargeInThresholds();
|
|
589
|
-
log('barge-in sensitivity mode set', sensitivityMode, 'reason', reason, 'expiresAt', sensitivityModeExpiresAt || 'never', 'thresholds', thresholds);
|
|
468
|
+
log('barge-in sensitivity mode set', bridge.sensitivityMode, 'reason', reason, 'expiresAt', bridge.sensitivityModeExpiresAt || 'never', 'thresholds', thresholds);
|
|
590
469
|
return thresholds;
|
|
591
470
|
}
|
|
592
471
|
function sensitivityStatusText() {
|
|
593
472
|
const thresholds = currentBargeInThresholds();
|
|
594
|
-
const ttl = sensitivityModeExpiresAt ? Math.max(0, Math.round((sensitivityModeExpiresAt - Date.now()) / 1000)) : 0;
|
|
473
|
+
const ttl = bridge.sensitivityModeExpiresAt ? Math.max(0, Math.round((bridge.sensitivityModeExpiresAt - Date.now()) / 1000)) : 0;
|
|
595
474
|
return sensitivityStatusTextForLanguage(thresholds, ttl, settings.voiceLanguage);
|
|
596
475
|
}
|
|
597
476
|
|
|
598
477
|
function verboseStatusText() {
|
|
599
|
-
return verboseStatusTextForLanguage(verboseProgress, settings.voiceLanguage);
|
|
600
|
-
}
|
|
601
|
-
|
|
602
|
-
function progressEmoji(event) {
|
|
603
|
-
const category = progressCategory(event, { language: settings.voiceLanguage })?.key;
|
|
604
|
-
return {
|
|
605
|
-
test: '🧪',
|
|
606
|
-
edit: '✏️',
|
|
607
|
-
read: '📖',
|
|
608
|
-
search: '🔎',
|
|
609
|
-
terminal: '⌨️',
|
|
610
|
-
skill: '🧰',
|
|
611
|
-
browser: '🌐',
|
|
612
|
-
tool: '🛠️',
|
|
613
|
-
agent: '🤖',
|
|
614
|
-
work: '⚙️',
|
|
615
|
-
}[category] || '⚙️';
|
|
616
|
-
}
|
|
617
|
-
|
|
618
|
-
function formatProgressText(event) {
|
|
619
|
-
return formatProgressMessage(event, { language: settings.voiceLanguage });
|
|
478
|
+
return verboseStatusTextForLanguage(bridge.verboseProgress, settings.voiceLanguage);
|
|
620
479
|
}
|
|
621
480
|
|
|
622
481
|
function setVerboseProgress(enabled, reason = 'manual') {
|
|
623
|
-
verboseProgress = Boolean(enabled);
|
|
624
|
-
log('verbose progress mode set', verboseProgress, 'reason', reason);
|
|
625
|
-
return verboseProgress;
|
|
482
|
+
bridge.verboseProgress = Boolean(enabled);
|
|
483
|
+
log('verbose progress mode set', bridge.verboseProgress, 'reason', reason);
|
|
484
|
+
return bridge.verboseProgress;
|
|
626
485
|
}
|
|
627
486
|
|
|
628
487
|
function persistEnvValues(values) {
|
|
@@ -633,7 +492,7 @@ function persistEnvValues(values) {
|
|
|
633
492
|
} catch (e) {
|
|
634
493
|
warn('read .env for update failed', e?.stack || e);
|
|
635
494
|
}
|
|
636
|
-
const pending = new Map(Object.entries(values));
|
|
495
|
+
const pending = new Map(Object.entries(values).filter(([, value]) => value !== undefined));
|
|
637
496
|
const updated = lines.map(line => {
|
|
638
497
|
const match = line.match(/^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=.*$/);
|
|
639
498
|
if (!match || !pending.has(match[1])) return line;
|
|
@@ -655,8 +514,8 @@ function applyRuntimeLanguage(language) {
|
|
|
655
514
|
config = updateTtsVoiceConfig(config, { voiceType: preferredVoiceTypeForLanguage(config, preset.voiceLanguage) });
|
|
656
515
|
writeTtsVoiceConfig(TTS_VOICE_CONFIG_PATH, config);
|
|
657
516
|
const { selection } = applyVoiceConfigToProcessEnv(config);
|
|
658
|
-
|
|
659
|
-
|
|
517
|
+
rebuildTtsRuntimeSettings(selection);
|
|
518
|
+
if (selection.backend !== 'edge') settings.tts.edge.voice = preset.ttsVoice;
|
|
660
519
|
process.env.VOICE_LANGUAGE = preset.voiceLanguage;
|
|
661
520
|
process.env.WHISPER_CPP_LANGUAGE = preset.sttLanguage;
|
|
662
521
|
process.env.STT_LANGUAGE = preset.sttLanguage;
|
|
@@ -685,33 +544,23 @@ function voiceChangedText(selection) {
|
|
|
685
544
|
return `Voice changed to ${selection.voice?.label || selection.voiceType}.`;
|
|
686
545
|
}
|
|
687
546
|
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
if (!request) return false;
|
|
691
|
-
discardVoiceInputQueues('voice-change');
|
|
692
|
-
let config = ensureTtsVoiceConfig();
|
|
693
|
-
config = updateTtsVoiceConfig(config, request);
|
|
694
|
-
writeTtsVoiceConfig(TTS_VOICE_CONFIG_PATH, config);
|
|
695
|
-
const { selection } = applyVoiceConfigToProcessEnv(config);
|
|
696
|
-
settings.tts.backend = selection.backend;
|
|
697
|
-
if (selection.backend === 'edge') settings.tts.edge.voice = selection.voice.voice;
|
|
698
|
-
if (selection.voice?.language) settings.voiceLanguage = selection.voice.language;
|
|
699
|
-
persistEnvValues({
|
|
700
|
-
TTS_BACKEND: selection.backend,
|
|
701
|
-
TTS_VOICE_TYPE: selection.voiceType,
|
|
702
|
-
TTS_VOICE: selection.backend === 'edge' ? selection.voice.voice : process.env.TTS_VOICE,
|
|
703
|
-
VOICE_LANGUAGE: settings.voiceLanguage,
|
|
704
|
-
});
|
|
705
|
-
await speakText(voiceChangedText(selection), signal);
|
|
706
|
-
return true;
|
|
547
|
+
function isCloneVoiceType(voiceType) {
|
|
548
|
+
return /^(cloned_reference|prompt_reference|cosyvoice_reference)$/i.test(String(voiceType || ''));
|
|
707
549
|
}
|
|
708
550
|
|
|
709
|
-
async function
|
|
710
|
-
|
|
711
|
-
if (!
|
|
712
|
-
const
|
|
713
|
-
|
|
714
|
-
|
|
551
|
+
async function notifyVoiceCloneSampleGapIfNeeded(selection, signal) {
|
|
552
|
+
if (!selection || selection.backend === 'edge') return;
|
|
553
|
+
if (!isCloneVoiceType(selection.voiceType)) return;
|
|
554
|
+
const ref = String(selection.voice?.voice || '').trim();
|
|
555
|
+
if (!ref) return;
|
|
556
|
+
const candidatePath = path.isAbsolute(ref) ? ref : path.resolve(ROOT, ref);
|
|
557
|
+
if (fs.existsSync(candidatePath)) return;
|
|
558
|
+
const en = /^en/i.test(String(settings.voiceLanguage || ''));
|
|
559
|
+
const msg = en
|
|
560
|
+
? `${selection.backend} needs a voice clone sample at ${ref}. Say "voice clone capture" to record one, or pick a non-clone voice.`
|
|
561
|
+
: `${selection.backend} 백엔드는 음성 클론 샘플(${ref})이 필요해. "보이스 클로닝 캡처"라고 하거나 다른 보이스를 골라줘.`;
|
|
562
|
+
await sendText(`🎙️ ${msg}`);
|
|
563
|
+
await speakText(msg, signal, null);
|
|
715
564
|
}
|
|
716
565
|
|
|
717
566
|
function isAllowed(userId) { return settings.allowedUsers.size === 0 || settings.allowedUsers.has(String(userId)); }
|
|
@@ -757,13 +606,28 @@ function spokenResultOnly(userPrompt, answer, language = settings.voiceLanguage)
|
|
|
757
606
|
async function sendText(text) {
|
|
758
607
|
return sendDiscordText({
|
|
759
608
|
client,
|
|
760
|
-
channelId: activeTranscriptChannelId || settings.transcriptChannelId,
|
|
609
|
+
channelId: bridge.activeTranscriptChannelId || settings.transcriptChannelId,
|
|
761
610
|
text,
|
|
762
611
|
log,
|
|
763
612
|
warn,
|
|
764
613
|
});
|
|
765
614
|
}
|
|
766
615
|
|
|
616
|
+
async function sendEmbed(embed, { content = '' } = {}) {
|
|
617
|
+
if (!embed) return false;
|
|
618
|
+
try {
|
|
619
|
+
const channelId = bridge.activeTranscriptChannelId || settings.transcriptChannelId;
|
|
620
|
+
if (!channelId) return false;
|
|
621
|
+
const channel = await client.channels.fetch(channelId).catch(() => null);
|
|
622
|
+
if (!channel?.send) return false;
|
|
623
|
+
await channel.send(content ? { content, embeds: [embed] } : { embeds: [embed] });
|
|
624
|
+
return true;
|
|
625
|
+
} catch (e) {
|
|
626
|
+
warn('sendEmbed failed', e?.message || e);
|
|
627
|
+
return false;
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
767
631
|
async function sendChannelText(channel, text) {
|
|
768
632
|
const body = String(text || '');
|
|
769
633
|
const chunks = splitDiscordMessage(body);
|
|
@@ -771,18 +635,6 @@ async function sendChannelText(channel, text) {
|
|
|
771
635
|
return true;
|
|
772
636
|
}
|
|
773
637
|
|
|
774
|
-
function sendVerboseProgressText(event, signal) {
|
|
775
|
-
if (!verboseProgress || !signal || signal.aborted || activeProgressSignal !== signal) return;
|
|
776
|
-
const formatted = formatProgressText(event).replace(/\s+/g, ' ').trim();
|
|
777
|
-
if (!formatted) return;
|
|
778
|
-
const message = formatted.slice(0, 1900);
|
|
779
|
-
const now = Date.now();
|
|
780
|
-
if (message === lastVerboseProgressText && now - lastVerboseProgressTextAt < 2000) return;
|
|
781
|
-
lastVerboseProgressText = message;
|
|
782
|
-
lastVerboseProgressTextAt = now;
|
|
783
|
-
void sendText(message).catch(e => warn('verbose progress text delivery failed', e?.stack || e));
|
|
784
|
-
}
|
|
785
|
-
|
|
786
638
|
function sleep(ms) {
|
|
787
639
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
788
640
|
}
|
|
@@ -798,83 +650,240 @@ function waitEvent(emitter, event, timeoutMs = 60000) {
|
|
|
798
650
|
});
|
|
799
651
|
}
|
|
800
652
|
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
653
|
+
// handleRecording lives inside utteranceRouter (extracted in Phase 4b) but
|
|
654
|
+
// voiceIO.flushUtterance needs to call it. Use a forward-declared `let` plus
|
|
655
|
+
// a thunk so the deps for createVoiceIO resolve before createUtteranceRouter
|
|
656
|
+
// is constructed.
|
|
657
|
+
let utteranceRouter;
|
|
658
|
+
let voiceTurnRunner;
|
|
659
|
+
const voiceIO = createVoiceIO({
|
|
660
|
+
bridge,
|
|
661
|
+
settings,
|
|
662
|
+
client,
|
|
663
|
+
execFileAsync,
|
|
664
|
+
log,
|
|
665
|
+
warn,
|
|
666
|
+
stamp,
|
|
667
|
+
sleep,
|
|
668
|
+
isAllowed,
|
|
669
|
+
UTTERANCE_IDLE_MS,
|
|
670
|
+
SUBSCRIBE_AFTER_SILENCE_MS,
|
|
671
|
+
MIN_UTTERANCE_BYTES,
|
|
672
|
+
MIN_MEAN_VOLUME_DB,
|
|
673
|
+
MIN_MAX_VOLUME_DB,
|
|
674
|
+
currentBargeInThresholds,
|
|
675
|
+
currentPlaybackBargeInThresholds,
|
|
676
|
+
createLiveBargeInMonitor,
|
|
677
|
+
shouldUseLivePlaybackBargeIn,
|
|
678
|
+
stopPlaybackForBargeIn,
|
|
679
|
+
analyzeAudio,
|
|
680
|
+
concatWavs,
|
|
681
|
+
saveCapturedVoiceCloneSample,
|
|
682
|
+
isBargeInCandidate,
|
|
683
|
+
validateProcessingBargeIn,
|
|
684
|
+
enqueueDeferredProcessingUtterance,
|
|
685
|
+
newLatencyTurn,
|
|
686
|
+
handleRecording: (...args) => voiceTurnRunner.handleRecording(...args),
|
|
687
|
+
});
|
|
688
|
+
const { transcribeOnce, transcribe, cleanTranscript, queueSegment, flushUtterance, subscribeUser } = voiceIO;
|
|
689
|
+
|
|
690
|
+
const discordVoiceSetup = createDiscordVoiceSetup({
|
|
691
|
+
bridge,
|
|
692
|
+
client,
|
|
693
|
+
settings,
|
|
694
|
+
ROOT,
|
|
695
|
+
log,
|
|
696
|
+
warn,
|
|
697
|
+
speakText,
|
|
698
|
+
waitEvent,
|
|
699
|
+
subscribeUser,
|
|
700
|
+
pendingFallbackNoticePromises,
|
|
701
|
+
bindProjectSessionToChannel,
|
|
702
|
+
createProjectSession,
|
|
703
|
+
resolveProjectSessionForChannel,
|
|
704
|
+
saveProjectSessionsState,
|
|
705
|
+
projectSessionsState,
|
|
706
|
+
invalidateBackendAdaptersForSession,
|
|
707
|
+
VOICE_CONNECT_TIMEOUT_MS,
|
|
708
|
+
});
|
|
709
|
+
const {
|
|
710
|
+
connectTo,
|
|
711
|
+
autoJoin,
|
|
712
|
+
findVoiceChannelBySelector,
|
|
713
|
+
voiceChannelLabel,
|
|
714
|
+
resolveVoiceChannelForAttach,
|
|
715
|
+
attachVoiceChannelToTextSession,
|
|
716
|
+
gracefulShutdown,
|
|
717
|
+
} = discordVoiceSetup;
|
|
718
|
+
utteranceRouter = createUtteranceRouter({
|
|
719
|
+
bridge,
|
|
720
|
+
agentTurnLifecycle,
|
|
721
|
+
log,
|
|
722
|
+
warn,
|
|
723
|
+
path,
|
|
724
|
+
fs,
|
|
725
|
+
ROOT,
|
|
726
|
+
TTS_VOICE_CONFIG_PATH,
|
|
727
|
+
agentAdapter,
|
|
728
|
+
settings,
|
|
729
|
+
isPlanEntryUtterance,
|
|
730
|
+
parsePlanOutput,
|
|
731
|
+
parsePlanVoiceCommand,
|
|
732
|
+
applyPlanCommand,
|
|
733
|
+
renderFinalPlan,
|
|
734
|
+
planModePreamble,
|
|
735
|
+
planExecutionPreamble,
|
|
736
|
+
parseDecisionAnswer,
|
|
737
|
+
renderDecisionPrompt,
|
|
738
|
+
renderResolvedDecisions,
|
|
739
|
+
isAgentRoutingDecision,
|
|
740
|
+
projectSessionContextText,
|
|
741
|
+
resolveProjectSessionForChannel,
|
|
742
|
+
createBridgeAgentAdapter,
|
|
743
|
+
buildAgentSettings,
|
|
744
|
+
commandIsInstalled,
|
|
745
|
+
shellSplit,
|
|
746
|
+
sendText,
|
|
747
|
+
speakText,
|
|
748
|
+
ensureTtsVoiceConfig,
|
|
749
|
+
updateTtsVoiceConfig,
|
|
750
|
+
writeTtsVoiceConfig,
|
|
751
|
+
applyVoiceConfigToProcessEnv,
|
|
752
|
+
ensureSelectedTtsBackendInstalled,
|
|
753
|
+
rebuildTtsRuntimeSettings,
|
|
754
|
+
voiceCommandFromTranscript,
|
|
755
|
+
voiceChangedText,
|
|
756
|
+
voiceLanguageCommandFromTranscript,
|
|
757
|
+
voiceCloneCommandFromText,
|
|
758
|
+
voiceCloneCapture,
|
|
759
|
+
notifyVoiceCloneSampleGapIfNeeded,
|
|
760
|
+
languageChangedText,
|
|
761
|
+
applyRuntimeLanguage,
|
|
762
|
+
persistEnvValues,
|
|
763
|
+
discardVoiceInputQueues,
|
|
764
|
+
// Phase 4b deps
|
|
765
|
+
transcribe,
|
|
766
|
+
beginStreamingTurn,
|
|
767
|
+
endStreamingTurn,
|
|
768
|
+
client,
|
|
769
|
+
isAllowed,
|
|
770
|
+
isAbortError,
|
|
771
|
+
sleep,
|
|
772
|
+
sendEmbed,
|
|
773
|
+
speakImmediateNotice,
|
|
774
|
+
reloadRuntimeLanguageFromEnv,
|
|
775
|
+
drainDeferredProcessingUtterances,
|
|
776
|
+
maybeNotifyTaskComplete,
|
|
777
|
+
ontologyStateFor,
|
|
778
|
+
captureOntologyFromTurn,
|
|
779
|
+
queueProgressSpeechText,
|
|
780
|
+
stopProgressSpeech,
|
|
781
|
+
agentAnswerHeader,
|
|
782
|
+
emptyAgentAnswer,
|
|
783
|
+
formatRecentDiscordContext,
|
|
784
|
+
formatSttResultMessage,
|
|
785
|
+
formatSttStartMessage,
|
|
786
|
+
formatVoiceErrorMessage,
|
|
787
|
+
formatWakeRejectedMessage,
|
|
788
|
+
spokenResultOnly,
|
|
789
|
+
stripWake,
|
|
790
|
+
acceptsWake,
|
|
791
|
+
sensitivityChangedSpeech,
|
|
792
|
+
sensitivityModeFromTranscript,
|
|
793
|
+
sensitivityStatusText,
|
|
794
|
+
setSensitivityMode,
|
|
795
|
+
isSensitivityOnlyRequest,
|
|
796
|
+
verboseChangedSpeech,
|
|
797
|
+
verboseModeFromTranscript,
|
|
798
|
+
verboseStatusText,
|
|
799
|
+
setVerboseProgress,
|
|
800
|
+
isVerboseOnlyRequest,
|
|
801
|
+
isRoutingOnlyUtterance,
|
|
802
|
+
parseAgentRoutingCommand,
|
|
803
|
+
renderAgentPrefix,
|
|
804
|
+
buildCrossAgentPrompt,
|
|
805
|
+
buildFallbackDecision,
|
|
806
|
+
parseResearchCommand,
|
|
807
|
+
runResearchTurn,
|
|
808
|
+
PROGRESS_IDLE_CHECK_MS,
|
|
809
|
+
PROGRESS_IDLE_NOTICE_INITIAL_MS,
|
|
810
|
+
PROGRESS_IDLE_NOTICE_LIMIT,
|
|
811
|
+
PROGRESS_IDLE_NOTICE_MAX_MS,
|
|
812
|
+
PROGRESS_IDLE_NOTICE_MULTIPLIER,
|
|
813
|
+
STT_START_VOICE_NOTICE,
|
|
814
|
+
});
|
|
815
|
+
const {
|
|
816
|
+
adapterForProjectSession,
|
|
817
|
+
routingStateFor,
|
|
818
|
+
recordUtterance,
|
|
819
|
+
clearTransientRouting,
|
|
820
|
+
adapterForBackend,
|
|
821
|
+
handleTtsVoiceCommand,
|
|
822
|
+
handleLanguageCommand,
|
|
823
|
+
handleVoiceCloneCommand,
|
|
824
|
+
interruptCurrentResponse,
|
|
825
|
+
} = utteranceRouter;
|
|
826
|
+
|
|
827
|
+
const planDispatcher = createPlanDispatcher({
|
|
828
|
+
bridge, settings,
|
|
829
|
+
sendText, speakText,
|
|
830
|
+
routingStateFor, adapterForBackend, adapterForProjectSession,
|
|
831
|
+
resolveProjectSessionForChannel,
|
|
832
|
+
isAgentRoutingDecision,
|
|
833
|
+
parseDecisionAnswer, parsePlanVoiceCommand: parsePlanVoiceCommand,
|
|
834
|
+
applyPlanCommand: applyPlanCommand,
|
|
835
|
+
parsePlanOutput,
|
|
836
|
+
renderDecisionPrompt, renderResolvedDecisions, renderFinalPlan,
|
|
837
|
+
planModePreamble, planExecutionPreamble, isPlanEntryUtterance,
|
|
838
|
+
});
|
|
839
|
+
const {
|
|
840
|
+
planChannelKey,
|
|
841
|
+
askNextDecision,
|
|
842
|
+
finalizePlanReady,
|
|
843
|
+
dispatchPlanModeUtterance,
|
|
844
|
+
planNarrationLines,
|
|
845
|
+
} = planDispatcher;
|
|
846
|
+
|
|
847
|
+
voiceTurnRunner = createVoiceTurnRunner({
|
|
848
|
+
bridge,
|
|
849
|
+
agentTurnLifecycle,
|
|
850
|
+
settings, client, log, warn, fs,
|
|
851
|
+
// From voice_io
|
|
852
|
+
transcribe,
|
|
853
|
+
// From tts_player
|
|
854
|
+
beginStreamingTurn, endStreamingTurn, speakText,
|
|
855
|
+
// From progress_handler
|
|
856
|
+
queueProgressSpeechText, stopProgressSpeech, speakImmediateNotice,
|
|
857
|
+
// From notification_handler
|
|
858
|
+
maybeNotifyTaskComplete,
|
|
859
|
+
// From utterance_router (sibling-module dispatch + adapter selection)
|
|
860
|
+
handleLanguageCommand, handleTtsVoiceCommand, handleVoiceCloneCommand,
|
|
861
|
+
dispatchPlanModeUtterance,
|
|
862
|
+
adapterForBackend, adapterForProjectSession,
|
|
863
|
+
planChannelKey, routingStateFor, recordUtterance, clearTransientRouting,
|
|
864
|
+
// Direct (imported in main or hoisted helpers)
|
|
865
|
+
isAllowed, isAbortError, sleep, sendText, sendEmbed,
|
|
866
|
+
reloadRuntimeLanguageFromEnv, drainDeferredProcessingUtterances,
|
|
867
|
+
resolveProjectSessionForChannel, projectSessionContextText,
|
|
868
|
+
ontologyStateFor, captureOntologyFromTurn,
|
|
869
|
+
formatRecentDiscordContext,
|
|
870
|
+
formatSttResultMessage, formatSttStartMessage,
|
|
871
|
+
formatVoiceErrorMessage, formatWakeRejectedMessage,
|
|
872
|
+
agentAnswerHeader, emptyAgentAnswer, spokenResultOnly,
|
|
873
|
+
stripWake, acceptsWake,
|
|
874
|
+
sensitivityChangedSpeech, sensitivityModeFromTranscript, sensitivityStatusText,
|
|
875
|
+
setSensitivityMode, isSensitivityOnlyRequest,
|
|
876
|
+
verboseChangedSpeech, verboseModeFromTranscript, verboseStatusText,
|
|
877
|
+
setVerboseProgress, isVerboseOnlyRequest,
|
|
878
|
+
isRoutingOnlyUtterance, parseAgentRoutingCommand, renderAgentPrefix,
|
|
879
|
+
buildCrossAgentPrompt, buildFallbackDecision,
|
|
880
|
+
parseDecisionAnswer,
|
|
881
|
+
parseResearchCommand, runResearchTurn,
|
|
882
|
+
PROGRESS_IDLE_CHECK_MS, PROGRESS_IDLE_NOTICE_INITIAL_MS,
|
|
883
|
+
PROGRESS_IDLE_NOTICE_LIMIT, PROGRESS_IDLE_NOTICE_MAX_MS,
|
|
884
|
+
PROGRESS_IDLE_NOTICE_MULTIPLIER, STT_START_VOICE_NOTICE,
|
|
885
|
+
});
|
|
886
|
+
const { handleRecording } = voiceTurnRunner;
|
|
878
887
|
|
|
879
888
|
function isAbortError(e) {
|
|
880
889
|
return e?.name === 'AbortError' || e?.code === 'ABORT_ERR';
|
|
@@ -914,274 +923,45 @@ async function refreshTtsRuntimeConfig() {
|
|
|
914
923
|
if (previousBackend !== settings.tts.backend) {
|
|
915
924
|
const rebuilt = buildTtsSettings(process.env, ROOT);
|
|
916
925
|
Object.assign(settings.tts, rebuilt);
|
|
917
|
-
ttsBackend
|
|
926
|
+
try { bridge.ttsBackend?.close?.(); } catch (e) { warn('tts backend close failed', e?.message || e); }
|
|
927
|
+
bridge.ttsBackend = createTtsBackend(settings.tts, { execFileAsync, spawn, log, warn, onFallback: ttsFallbackNotice, voiceProvider: () => settings.tts.edge.voice });
|
|
918
928
|
log('tts backend reloaded from voice config', settings.tts.backend, 'voiceType', selection.voiceType);
|
|
919
929
|
}
|
|
920
930
|
return selection;
|
|
921
931
|
}
|
|
922
932
|
|
|
923
|
-
async function synthTTS(text, signal) {
|
|
924
|
-
await refreshTtsRuntimeConfig();
|
|
925
|
-
let lastError = null;
|
|
926
|
-
for (let attempt = 1; attempt <= 3; attempt += 1) {
|
|
927
|
-
try {
|
|
928
|
-
log('final tts synth start', 'backend', ttsBackend.name, 'attempt', attempt, 'chars', String(text || '').length);
|
|
929
|
-
const out = await ttsBackend.synthesize(text, { signal, kind: 'final' });
|
|
930
|
-
log('final tts synth done', 'backend', ttsBackend.name, 'attempt', attempt, out, fs.statSync(out).size);
|
|
931
|
-
return out;
|
|
932
|
-
} catch (e) {
|
|
933
|
-
lastError = e;
|
|
934
|
-
if (isAbortError(e) || signal?.aborted) throw e;
|
|
935
|
-
warn('final tts synth failed', 'attempt', attempt, e?.stderr?.toString?.().slice(-500) || e?.message || e);
|
|
936
|
-
await sleep(1000 * attempt);
|
|
937
|
-
}
|
|
938
|
-
}
|
|
939
|
-
throw lastError;
|
|
940
|
-
}
|
|
941
|
-
|
|
942
|
-
async function synthProgressTTS(text, signal) {
|
|
943
|
-
await refreshTtsRuntimeConfig();
|
|
944
|
-
const ext = ttsBackend.outputExtension || 'mp3';
|
|
945
|
-
const cachePath = path.join(settings.tts.progressCacheDir, progressTtsCacheFileName({
|
|
946
|
-
backendKeyParts: ttsBackend.cacheKeyParts(),
|
|
947
|
-
text,
|
|
948
|
-
ext,
|
|
949
|
-
}));
|
|
950
|
-
if (fs.existsSync(cachePath) && fs.statSync(cachePath).size > 0) {
|
|
951
|
-
log('progress tts cache hit', text, cachePath);
|
|
952
|
-
return cachePath;
|
|
953
|
-
}
|
|
954
|
-
log('progress tts cache miss', text);
|
|
955
|
-
const tmp = await ttsBackend.synthesize(text, { signal, kind: 'progress' });
|
|
956
|
-
fs.renameSync(tmp, cachePath);
|
|
957
|
-
return cachePath;
|
|
958
|
-
}
|
|
959
|
-
|
|
960
|
-
async function playAudio(file, { deleteAfter = true } = {}) {
|
|
961
|
-
if (!connection) return;
|
|
962
|
-
speaking = true;
|
|
963
|
-
try {
|
|
964
|
-
const resource = createAudioResource(file, { inputType: StreamType.Arbitrary, inlineVolume: true });
|
|
965
|
-
resource.volume?.setVolume(settings.tts.volume);
|
|
966
|
-
player.play(resource);
|
|
967
|
-
connection.subscribe(player);
|
|
968
|
-
await waitEvent(player, AudioPlayerStatus.Idle, 120000).catch(() => {});
|
|
969
|
-
} finally {
|
|
970
|
-
speaking = false;
|
|
971
|
-
if (deleteAfter) fs.rm(file, { force: true }, () => {});
|
|
972
|
-
}
|
|
973
|
-
}
|
|
974
|
-
|
|
975
|
-
async function speakText(text, signal, metricsTurn = null, options = {}) {
|
|
976
|
-
const chunks = splitForTTS(text, settings.tts.maxChars);
|
|
977
|
-
if (!chunks.length) return;
|
|
978
|
-
if (options.mirrorText !== false) {
|
|
979
|
-
await sendText(`${options.mirrorPrefix || '🔊 음성으로 읽는 내용'}:\n${String(text || '')}`);
|
|
980
|
-
}
|
|
981
|
-
log('TTS chunks', chunks.length, 'maxChars', settings.tts.maxChars, 'backend', ttsBackend.name);
|
|
982
|
-
const playbackGeneration = speechPlaybackGeneration;
|
|
983
|
-
const playbackStopped = () => playbackGeneration !== speechPlaybackGeneration;
|
|
984
|
-
let synthMs = 0;
|
|
985
|
-
let playMs = 0;
|
|
986
|
-
const ttsStart = Date.now();
|
|
987
|
-
await playChunkedTTSWithPrefetch(chunks, {
|
|
988
|
-
signal,
|
|
989
|
-
log,
|
|
990
|
-
synth: async chunk => {
|
|
991
|
-
if (playbackStopped()) return null;
|
|
992
|
-
const start = Date.now();
|
|
993
|
-
try { return await synthTTS(chunk, signal); }
|
|
994
|
-
finally { synthMs += Date.now() - start; }
|
|
995
|
-
},
|
|
996
|
-
play: async file => {
|
|
997
|
-
if (playbackStopped()) {
|
|
998
|
-
await fs.promises.rm(file, { force: true }).catch(() => {});
|
|
999
|
-
return;
|
|
1000
|
-
}
|
|
1001
|
-
const start = Date.now();
|
|
1002
|
-
try { return await playAudio(file); }
|
|
1003
|
-
finally { playMs += Date.now() - start; }
|
|
1004
|
-
},
|
|
1005
|
-
cleanup: file => fs.promises.rm(file, { force: true }),
|
|
1006
|
-
});
|
|
1007
|
-
metricsTurn?.stage('tts_synth', synthMs, { ttsChunks: chunks.length, spokenChars: String(text || '').length });
|
|
1008
|
-
metricsTurn?.stage('tts_play', playMs);
|
|
1009
|
-
metricsTurn?.stage('tts_total', Date.now() - ttsStart);
|
|
1010
|
-
}
|
|
1011
|
-
|
|
1012
|
-
function beginStreamingTurn(signal) {
|
|
1013
|
-
if (!STREAMING_TTS_ENABLED || !connection) return false;
|
|
1014
|
-
streamingSpeechDelivered = false;
|
|
1015
|
-
const sentencer = createSentencer({ minChars: 40, maxLatencyMs: 800 });
|
|
1016
|
-
const queue = createStreamingTTSQueue({
|
|
1017
|
-
synth: async text => synthTTS(text, signal),
|
|
1018
|
-
play: async file => playAudio(file, { deleteAfter: false }),
|
|
1019
|
-
cleanup: async file => { try { await fs.promises.rm(file, { force: true }); } catch {} },
|
|
1020
|
-
signal,
|
|
1021
|
-
log,
|
|
1022
|
-
});
|
|
1023
|
-
sentencer.on('sentence', text => {
|
|
1024
|
-
if (signal?.aborted) return;
|
|
1025
|
-
queue.enqueue(text);
|
|
1026
|
-
});
|
|
1027
|
-
activeSentencer = sentencer;
|
|
1028
|
-
activeStreamingQueue = queue;
|
|
1029
|
-
log('streaming turn begin');
|
|
1030
|
-
return true;
|
|
1031
|
-
}
|
|
1032
|
-
|
|
1033
|
-
async function endStreamingTurn() {
|
|
1034
|
-
const sentencer = activeSentencer;
|
|
1035
|
-
const queue = activeStreamingQueue;
|
|
1036
|
-
activeSentencer = null;
|
|
1037
|
-
activeStreamingQueue = null;
|
|
1038
|
-
if (!sentencer || !queue) return;
|
|
1039
|
-
try { sentencer.flush(); } catch (e) { warn('streaming sentencer flush failed', e?.stack || e); }
|
|
1040
|
-
try { await queue.drain(); } catch (e) { warn('streaming queue drain failed', e?.stack || e); }
|
|
1041
|
-
streamingSpeechDelivered = queue.size === 0;
|
|
1042
|
-
log('streaming turn end');
|
|
1043
|
-
}
|
|
1044
|
-
|
|
1045
|
-
async function speakProgress(text, signal) {
|
|
1046
|
-
if (signal?.aborted) return;
|
|
1047
|
-
try {
|
|
1048
|
-
const mp3 = await synthProgressTTS(text, signal);
|
|
1049
|
-
if (signal?.aborted) return;
|
|
1050
|
-
await playAudio(mp3, { deleteAfter: false });
|
|
1051
|
-
} catch (e) {
|
|
1052
|
-
if (!isAbortError(e)) warn('progress tts failed', e?.stack || e);
|
|
1053
|
-
}
|
|
1054
|
-
}
|
|
1055
|
-
|
|
1056
|
-
async function speakImmediateNotice(text, signal, reason = 'notice') {
|
|
1057
|
-
if (signal?.aborted) return;
|
|
1058
|
-
try {
|
|
1059
|
-
log('immediate notice speech', reason, 'text', String(text || '').slice(0, 80));
|
|
1060
|
-
const mp3 = await synthProgressTTS(text, signal);
|
|
1061
|
-
if (signal?.aborted) return;
|
|
1062
|
-
await playAudio(mp3, { deleteAfter: false });
|
|
1063
|
-
} catch (e) {
|
|
1064
|
-
if (!isAbortError(e)) warn('immediate notice speech failed', reason, e?.stack || e);
|
|
1065
|
-
}
|
|
1066
|
-
}
|
|
1067
|
-
|
|
1068
|
-
function queueProgressSpeechText(text, signal, reason = 'status') {
|
|
1069
|
-
const spoken = String(text || '').replace(/\s+/g, ' ').trim();
|
|
1070
|
-
if (!spoken || !signal || signal.aborted || activeProgressSignal !== signal) return;
|
|
1071
|
-
verboseProgressSpeechQueue = verboseProgressSpeechQueue
|
|
1072
|
-
.catch(() => {})
|
|
1073
|
-
.then(async () => {
|
|
1074
|
-
if (signal.aborted || activeProgressSignal !== signal || !processing) return;
|
|
1075
|
-
log('progress speech queued', reason, 'text', spoken);
|
|
1076
|
-
await speakProgress(spoken, signal);
|
|
1077
|
-
});
|
|
1078
|
-
}
|
|
1079
|
-
|
|
1080
|
-
function flushProgressSpeechBatch(signal, reason = 'timer') {
|
|
1081
|
-
if (!signal || signal.aborted || activeProgressSignal !== signal) return;
|
|
1082
|
-
if (progressSpeechBatchTimer) {
|
|
1083
|
-
clearTimeout(progressSpeechBatchTimer);
|
|
1084
|
-
progressSpeechBatchTimer = null;
|
|
1085
|
-
}
|
|
1086
|
-
const events = progressSpeechBatch;
|
|
1087
|
-
progressSpeechBatch = [];
|
|
1088
|
-
progressSpeechBatchSignal = null;
|
|
1089
|
-
progressSpeechBatchStartedAt = 0;
|
|
1090
|
-
const text = summarizeProgressEvents(events, { maxCategories: 3, language: settings.voiceLanguage });
|
|
1091
|
-
if (!text) return;
|
|
1092
|
-
queueProgressSpeechText(text, signal, `batch-${reason}-${events.length}`);
|
|
1093
|
-
}
|
|
1094
|
-
|
|
1095
|
-
function queueVerboseProgressSpeech(event, signal) {
|
|
1096
|
-
if (!verboseProgress || !signal || signal.aborted || activeProgressSignal !== signal) return;
|
|
1097
|
-
const text = String(event || '').replace(/\s+/g, ' ').trim().slice(0, 120);
|
|
1098
|
-
if (!text) return;
|
|
1099
|
-
if (progressSpeechBatchSignal && progressSpeechBatchSignal !== signal) {
|
|
1100
|
-
progressSpeechBatch = [];
|
|
1101
|
-
if (progressSpeechBatchTimer) clearTimeout(progressSpeechBatchTimer);
|
|
1102
|
-
progressSpeechBatchTimer = null;
|
|
1103
|
-
progressSpeechBatchStartedAt = 0;
|
|
1104
|
-
}
|
|
1105
|
-
progressSpeechBatchSignal = signal;
|
|
1106
|
-
if (!progressSpeechBatchStartedAt) progressSpeechBatchStartedAt = Date.now();
|
|
1107
|
-
progressSpeechBatch.push(text);
|
|
1108
|
-
const elapsedMs = Date.now() - progressSpeechBatchStartedAt;
|
|
1109
|
-
const ratePerSecond = progressSpeechBatch.length / Math.max(0.2, elapsedMs / 1000);
|
|
1110
|
-
const maxBatchEvents = ratePerSecond >= 6 ? 5 : ratePerSecond >= 3 ? 4 : 3;
|
|
1111
|
-
const batchDelayMs = ratePerSecond >= 6 ? 650 : ratePerSecond >= 3 ? 550 : 450;
|
|
1112
|
-
if (progressSpeechBatch.length >= maxBatchEvents) {
|
|
1113
|
-
flushProgressSpeechBatch(signal, 'full');
|
|
1114
|
-
return;
|
|
1115
|
-
}
|
|
1116
|
-
if (progressSpeechBatchTimer) clearTimeout(progressSpeechBatchTimer);
|
|
1117
|
-
progressSpeechBatchTimer = setTimeout(() => flushProgressSpeechBatch(signal, 'timer'), batchDelayMs);
|
|
1118
|
-
}
|
|
1119
|
-
|
|
1120
|
-
function clearProgressSpeechBatch(signal = activeProgressSignal) {
|
|
1121
|
-
if (progressSpeechBatchTimer) {
|
|
1122
|
-
clearTimeout(progressSpeechBatchTimer);
|
|
1123
|
-
progressSpeechBatchTimer = null;
|
|
1124
|
-
}
|
|
1125
|
-
if (!signal || progressSpeechBatchSignal === signal) {
|
|
1126
|
-
progressSpeechBatch = [];
|
|
1127
|
-
progressSpeechBatchSignal = null;
|
|
1128
|
-
progressSpeechBatchStartedAt = 0;
|
|
1129
|
-
}
|
|
1130
|
-
}
|
|
1131
|
-
|
|
1132
|
-
function stopProgressSpeech(signal, reason = 'final-answer') {
|
|
1133
|
-
if (activeProgressSignal !== signal) return;
|
|
1134
|
-
clearProgressSpeechBatch(signal);
|
|
1135
|
-
activeProgressSignal = null;
|
|
1136
|
-
if (activeProgressAbortController && !activeProgressAbortController.signal.aborted) {
|
|
1137
|
-
try { activeProgressAbortController.abort(); } catch (e) { warn('abort progress speech failed', e?.stack || e); }
|
|
1138
|
-
}
|
|
1139
|
-
if (speaking) {
|
|
1140
|
-
log('stop progress speech before final answer', reason);
|
|
1141
|
-
try { player.stop(true); } catch (e) { warn('stop progress speech failed', e?.stack || e); }
|
|
1142
|
-
speaking = false;
|
|
1143
|
-
}
|
|
1144
|
-
}
|
|
1145
|
-
|
|
1146
933
|
async function handleTextAgentMessage(msg, text, { speakResponse = false } = {}) {
|
|
1147
|
-
if (processing) {
|
|
934
|
+
if (bridge.processing) {
|
|
1148
935
|
await msg.reply('지금 이전 작업을 처리 중이야. 끝나면 다시 보내줘.');
|
|
1149
936
|
return;
|
|
1150
937
|
}
|
|
1151
|
-
|
|
1152
|
-
const controller =
|
|
1153
|
-
currentAbortController = controller;
|
|
1154
|
-
const signal = controller.signal;
|
|
1155
|
-
const progressController = new AbortController();
|
|
1156
|
-
activeProgressAbortController = progressController;
|
|
1157
|
-
activeProgressSignal = progressController.signal;
|
|
1158
|
-
activeProgressLastEventAt = Date.now();
|
|
1159
|
-
const previousTranscriptChannelId = activeTranscriptChannelId;
|
|
938
|
+
const turn = agentTurnLifecycle.start();
|
|
939
|
+
const { controller, signal, progressController } = turn;
|
|
1160
940
|
const session = resolveProjectSessionForChannel(msg.channelId);
|
|
1161
|
-
activeTranscriptChannelId = session?.transcriptChannelId || msg.channelId;
|
|
941
|
+
bridge.activeTranscriptChannelId = session?.transcriptChannelId || msg.channelId;
|
|
1162
942
|
const selectedAgentAdapter = adapterForProjectSession(session);
|
|
1163
943
|
const projectContext = projectSessionContextText(session);
|
|
1164
|
-
const recentDiscordContext = formatRecentDiscordContext(recentDiscordTextByChannel, {
|
|
1165
|
-
channelId: activeTranscriptChannelId,
|
|
944
|
+
const recentDiscordContext = formatRecentDiscordContext(bridge.recentDiscordTextByChannel, {
|
|
945
|
+
channelId: bridge.activeTranscriptChannelId,
|
|
1166
946
|
});
|
|
1167
947
|
const plan = {
|
|
1168
948
|
task: true,
|
|
1169
949
|
label: selectedAgentAdapter.label,
|
|
1170
|
-
verboseProgress,
|
|
950
|
+
verboseProgress: bridge.verboseProgress,
|
|
1171
951
|
language: settings.voiceLanguage,
|
|
1172
952
|
cwd: session?.workdir,
|
|
1173
953
|
projectContext,
|
|
1174
954
|
recentDiscordContext,
|
|
1175
955
|
};
|
|
1176
956
|
const sessionBefore = selectedAgentAdapter.readSessionId?.();
|
|
1177
|
-
log('text agent request start', selectedAgentAdapter.label, sessionBefore ? 'resume-existing-session' : 'new-session', 'verbose', verboseProgress, session ? `project=${session.slug}` : 'project=default');
|
|
957
|
+
log('text agent request start', selectedAgentAdapter.label, sessionBefore ? 'resume-existing-session' : 'new-session', 'verbose', bridge.verboseProgress, session ? `project=${session.slug}` : 'project=default');
|
|
1178
958
|
try {
|
|
1179
959
|
const result = await selectedAgentAdapter.run(text, signal, plan);
|
|
1180
960
|
const answer = result.answer || emptyAgentAnswer(settings.voiceLanguage);
|
|
1181
961
|
const fullAnswerText = `${agentAnswerHeader(settings.voiceLanguage, selectedAgentAdapter.label)}\n${answer}`;
|
|
1182
962
|
await sendChannelText(msg.channel, fullAnswerText);
|
|
1183
963
|
stopProgressSpeech(progressController.signal, 'text-agent-answer-ready');
|
|
1184
|
-
if (speakResponse && connection) {
|
|
964
|
+
if (speakResponse && bridge.connection) {
|
|
1185
965
|
const spokenAnswer = spokenResultOnly(text, answer, settings.voiceLanguage);
|
|
1186
966
|
await speakText(spokenAnswer, signal, null, { mirrorText: false });
|
|
1187
967
|
}
|
|
@@ -1190,15 +970,11 @@ async function handleTextAgentMessage(msg, text, { speakResponse = false } = {})
|
|
|
1190
970
|
warn('text agent request failed', e?.stack || e);
|
|
1191
971
|
await sendChannelText(msg.channel, formatVoiceErrorMessage(settings.voiceLanguage, String(e?.message || e).slice(0, 800)));
|
|
1192
972
|
} finally {
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
if (activeProgressSignal === progressController.signal) activeProgressSignal = null;
|
|
1197
|
-
if (activeProgressAbortController?.signal === progressController.signal) activeProgressAbortController = null;
|
|
973
|
+
// Text-path-only behaviour pre-refactor: drain the verbose-progress batch
|
|
974
|
+
// before tearing the controllers down. Kept explicit so the lifecycle's
|
|
975
|
+
// finish() can stay path-agnostic.
|
|
1198
976
|
clearProgressSpeechBatch(progressController.signal);
|
|
1199
|
-
|
|
1200
|
-
activeTranscriptChannelId = previousTranscriptChannelId;
|
|
1201
|
-
processing = false;
|
|
977
|
+
agentTurnLifecycle.finish(turn);
|
|
1202
978
|
}
|
|
1203
979
|
}
|
|
1204
980
|
|
|
@@ -1222,53 +998,6 @@ async function saveCapturedVoiceCloneSample(userId, wavPath, pcmBytes, segments,
|
|
|
1222
998
|
return true;
|
|
1223
999
|
}
|
|
1224
1000
|
|
|
1225
|
-
async function handleVoiceCloneCommand(userId, prompt, signal = null) {
|
|
1226
|
-
const command = voiceCloneCommandFromText(prompt);
|
|
1227
|
-
if (!command) return false;
|
|
1228
|
-
if (command.action === 'cancel') {
|
|
1229
|
-
const cancelled = voiceCloneCapture.cancel(userId);
|
|
1230
|
-
await sendText(cancelled ? '🎙️ 보이스 클로닝 샘플 캡처를 취소했어.' : '🎙️ 대기 중인 보이스 클로닝 샘플 캡처가 없어.');
|
|
1231
|
-
await speakText(cancelled ? '목소리 샘플 녹음 대기를 취소했어.' : '대기 중인 목소리 샘플 녹음은 없어.', signal);
|
|
1232
|
-
return true;
|
|
1233
|
-
}
|
|
1234
|
-
if (command.action === 'status') {
|
|
1235
|
-
const current = voiceCloneCapture.current();
|
|
1236
|
-
const status = current?.userId === String(userId)
|
|
1237
|
-
? `🎙️ 다음 유효한 음성을 ${path.relative(ROOT, current.targetPath)}에 저장할게.`
|
|
1238
|
-
: '🎙️ 지금 대기 중인 보이스 클로닝 샘플 캡처는 없어.';
|
|
1239
|
-
await sendText(status);
|
|
1240
|
-
await speakText(current?.userId === String(userId) ? '다음에 말하는 목소리를 샘플로 저장할게.' : '대기 중인 목소리 샘플 녹음은 없어.', signal);
|
|
1241
|
-
return true;
|
|
1242
|
-
}
|
|
1243
|
-
const armed = voiceCloneCapture.arm({ userId, source: 'voice-command' });
|
|
1244
|
-
await sendText(`🎙️ 보이스 클로닝 샘플 캡처 대기 중. 다음 10초에서 30초 정도 말하면 ${path.relative(ROOT, armed.targetPath)}에 저장할게.`);
|
|
1245
|
-
await speakText('좋아. 다음에 10초에서 30초 정도 말하면 그 음성을 목소리 샘플로 저장할게.', signal);
|
|
1246
|
-
return true;
|
|
1247
|
-
}
|
|
1248
|
-
|
|
1249
|
-
function stopPlaybackForBargeIn(userId, reason = 'playback-barge-in') {
|
|
1250
|
-
if (!speaking) return false;
|
|
1251
|
-
log('stop playback for barge-in', 'byUser', userId, 'reason', reason, 'speaking', speaking, 'processing', processing, 'turn', activeTurnId);
|
|
1252
|
-
speechPlaybackGeneration += 1;
|
|
1253
|
-
try { player.stop(true); } catch (e) { warn('stop playback failed', e?.stack || e); }
|
|
1254
|
-
speaking = false;
|
|
1255
|
-
return true;
|
|
1256
|
-
}
|
|
1257
|
-
|
|
1258
|
-
function interruptCurrentResponse(userId, reason = 'barge-in') {
|
|
1259
|
-
if (!speaking && !processing) return false;
|
|
1260
|
-
const turnId = activeTurnId;
|
|
1261
|
-
if (turnId) interruptedTurns.add(turnId);
|
|
1262
|
-
log('interrupt current response', 'byUser', userId, 'reason', reason, 'speaking', speaking, 'processing', processing, 'turn', turnId);
|
|
1263
|
-
if (currentAbortController && !currentAbortController.signal.aborted) {
|
|
1264
|
-
try { currentAbortController.abort(); } catch (e) { warn('abort current response failed', e?.stack || e); }
|
|
1265
|
-
}
|
|
1266
|
-
try { player.stop(true); } catch (e) { warn('stop playback failed', e?.stack || e); }
|
|
1267
|
-
speaking = false;
|
|
1268
|
-
processing = false;
|
|
1269
|
-
return true;
|
|
1270
|
-
}
|
|
1271
|
-
|
|
1272
1001
|
function acceptsWake(text) {
|
|
1273
1002
|
if (!settings.requireWakeWord) return true;
|
|
1274
1003
|
const low = text.toLowerCase();
|
|
@@ -1316,17 +1045,6 @@ async function concatWavs(files, output) {
|
|
|
1316
1045
|
}
|
|
1317
1046
|
}
|
|
1318
1047
|
|
|
1319
|
-
function queueSegment(userId, file, pcmBytes, startedAtMs = Date.now(), endedAtMs = Date.now()) {
|
|
1320
|
-
const pending = bridgeState.appendSegment(userId, {
|
|
1321
|
-
file,
|
|
1322
|
-
pcmBytes,
|
|
1323
|
-
startedAtMs,
|
|
1324
|
-
endedAtMs,
|
|
1325
|
-
timerFactory: () => setTimeout(() => flushUtterance(userId).catch(e => warn('flushUtterance failed', userId, e?.stack || e)), UTTERANCE_IDLE_MS),
|
|
1326
|
-
});
|
|
1327
|
-
log('queued segment', userId, 'segments', pending.files.length, 'totalPcmBytes', pending.pcmBytes, 'idleMs', UTTERANCE_IDLE_MS, 'epoch', pending.epoch);
|
|
1328
|
-
}
|
|
1329
|
-
|
|
1330
1048
|
function isBargeInCandidate(pcmBytes, levels) {
|
|
1331
1049
|
const thresholds = currentBargeInThresholds();
|
|
1332
1050
|
return isValidatedBargeInCandidate(pcmBytes, levels, thresholds);
|
|
@@ -1334,7 +1052,7 @@ function isBargeInCandidate(pcmBytes, levels) {
|
|
|
1334
1052
|
|
|
1335
1053
|
function enqueueDeferredProcessingUtterance({ userId, wavPath, pcmBytes, segments, startedAtMs = Date.now() }) {
|
|
1336
1054
|
const item = { userId, wavPath, pcmBytes, segments, startedAtMs };
|
|
1337
|
-
const result = bridgeState.enqueueDeferred(item, enqueueDeferredUtterance, MAX_DEFERRED_PROCESSING_UTTERANCES);
|
|
1055
|
+
const result = bridge.bridgeState.enqueueDeferred(item, enqueueDeferredUtterance, MAX_DEFERRED_PROCESSING_UTTERANCES);
|
|
1338
1056
|
if (!result.queued) {
|
|
1339
1057
|
log('drop deferred utterance because queue disabled', userId, wavPath, 'max', MAX_DEFERRED_PROCESSING_UTTERANCES);
|
|
1340
1058
|
return false;
|
|
@@ -1342,15 +1060,15 @@ function enqueueDeferredProcessingUtterance({ userId, wavPath, pcmBytes, segment
|
|
|
1342
1060
|
if (result.dropped) {
|
|
1343
1061
|
log('drop oldest deferred utterance because queue is full', result.dropped?.userId, result.dropped?.wavPath);
|
|
1344
1062
|
}
|
|
1345
|
-
log('queued deferred utterance while processing', userId, wavPath, 'queueSize', bridgeState.deferredSize(), 'epoch', bridgeState.currentEpoch());
|
|
1063
|
+
log('queued deferred utterance while processing', userId, wavPath, 'queueSize', bridge.bridgeState.deferredSize(), 'epoch', bridge.bridgeState.currentEpoch());
|
|
1346
1064
|
return true;
|
|
1347
1065
|
}
|
|
1348
1066
|
|
|
1349
1067
|
async function drainDeferredProcessingUtterances() {
|
|
1350
|
-
if (processing || bridgeState.deferredSize() === 0) return;
|
|
1351
|
-
const next = bridgeState.shiftDeferred();
|
|
1068
|
+
if (bridge.processing || bridge.bridgeState.deferredSize() === 0) return;
|
|
1069
|
+
const next = bridge.bridgeState.shiftDeferred();
|
|
1352
1070
|
if (!next) return;
|
|
1353
|
-
log('drain deferred utterance', next.userId, next.wavPath, 'remaining', bridgeState.deferredSize());
|
|
1071
|
+
log('drain deferred utterance', next.userId, next.wavPath, 'remaining', bridge.bridgeState.deferredSize());
|
|
1354
1072
|
const metricsTurn = newLatencyTurn(next.userId, next.startedAtMs || Date.now());
|
|
1355
1073
|
metricsTurn.mark('voice_first_packet', next.startedAtMs || Date.now());
|
|
1356
1074
|
metricsTurn.mark('utterance_flush');
|
|
@@ -1374,398 +1092,6 @@ async function validateProcessingBargeIn(userId, wavPath, pcmBytes, segments) {
|
|
|
1374
1092
|
return { action: 'interrupt', text };
|
|
1375
1093
|
}
|
|
1376
1094
|
|
|
1377
|
-
async function flushUtterance(userId) {
|
|
1378
|
-
const pending = bridgeState.deletePending(userId);
|
|
1379
|
-
if (!pending) return;
|
|
1380
|
-
if (pending.timer) clearTimeout(pending.timer);
|
|
1381
|
-
const files = pending.files;
|
|
1382
|
-
const pcmBytes = pending.pcmBytes;
|
|
1383
|
-
const metricsTurn = newLatencyTurn(userId, pending.firstPacketAt || Date.now());
|
|
1384
|
-
metricsTurn.mark('voice_first_packet', pending.firstPacketAt || Date.now());
|
|
1385
|
-
metricsTurn.mark('voice_segment_end', pending.lastSegmentEndAt || Date.now());
|
|
1386
|
-
metricsTurn.mark('utterance_flush');
|
|
1387
|
-
metricsTurn.addMeta({ segments: files.length, pcmBytes, epoch: pending.epoch });
|
|
1388
|
-
if (pending.epoch !== bridgeState.currentEpoch()) {
|
|
1389
|
-
log('drop stale utterance after voice input queue reset', userId, 'utteranceEpoch', pending.epoch, 'currentEpoch', bridgeState.currentEpoch());
|
|
1390
|
-
for (const file of files) fs.rm(file, { force: true }, () => {});
|
|
1391
|
-
metricsTurn.finish({ status: 'stale_after_config_change' });
|
|
1392
|
-
return;
|
|
1393
|
-
}
|
|
1394
|
-
if (pcmBytes < MIN_UTTERANCE_BYTES) {
|
|
1395
|
-
log('skip short utterance', userId, 'segments', files.length, 'pcmBytes', pcmBytes, 'minBytes', MIN_UTTERANCE_BYTES);
|
|
1396
|
-
metricsTurn.finish({ status: 'skip_short' });
|
|
1397
|
-
return;
|
|
1398
|
-
}
|
|
1399
|
-
const merged = path.join(settings.debugDir, `utterance-merged-${stamp()}-${userId}.wav`);
|
|
1400
|
-
await concatWavs(files, merged);
|
|
1401
|
-
const levels = await analyzeAudio(merged);
|
|
1402
|
-
log('utterance levels', userId, 'segments', files.length, 'pcmBytes', pcmBytes, 'meanDb', levels.meanDb, 'maxDb', levels.maxDb);
|
|
1403
|
-
if (await saveCapturedVoiceCloneSample(userId, merged, pcmBytes, files.length)) {
|
|
1404
|
-
metricsTurn.addMeta({ meanDb: levels.meanDb, maxDb: levels.maxDb });
|
|
1405
|
-
metricsTurn.finish({ status: 'voice_clone_sample_saved' });
|
|
1406
|
-
return;
|
|
1407
|
-
}
|
|
1408
|
-
const candidate = isBargeInCandidate(pcmBytes, levels);
|
|
1409
|
-
if (speaking || processing) {
|
|
1410
|
-
const thresholds = currentBargeInThresholds();
|
|
1411
|
-
if (!candidate) {
|
|
1412
|
-
log('check weak barge-in for explicit stop transcript', userId, 'pcmBytes', pcmBytes, 'meanDb', levels.meanDb, 'maxDb', levels.maxDb, 'thresholdBytes', thresholds.minBytes, 'thresholds', thresholds.minMeanDb, thresholds.minMaxDb, 'mode', thresholds.mode);
|
|
1413
|
-
}
|
|
1414
|
-
const validation = await validateProcessingBargeIn(userId, merged, pcmBytes, files.length);
|
|
1415
|
-
if (validation?.action === 'interrupt') {
|
|
1416
|
-
metricsTurn.finish({ status: processing ? 'barge_in_processing_interrupt' : 'barge_in_playback_interrupt' });
|
|
1417
|
-
return;
|
|
1418
|
-
}
|
|
1419
|
-
if (processing && validation?.action === 'defer') {
|
|
1420
|
-
const queued = enqueueDeferredProcessingUtterance({
|
|
1421
|
-
userId,
|
|
1422
|
-
wavPath: merged,
|
|
1423
|
-
pcmBytes,
|
|
1424
|
-
segments: files.length,
|
|
1425
|
-
startedAtMs: pending.firstPacketAt || Date.now(),
|
|
1426
|
-
});
|
|
1427
|
-
metricsTurn.finish({ status: queued ? 'deferred_during_processing' : 'drop_deferred_during_processing' });
|
|
1428
|
-
return;
|
|
1429
|
-
}
|
|
1430
|
-
metricsTurn.finish({ status: speaking ? 'barge_in_playback_ignored' : 'barge_in_processing_ignored' });
|
|
1431
|
-
return;
|
|
1432
|
-
}
|
|
1433
|
-
// Drop only when BOTH overall energy and peak are low. Real Discord speech from this
|
|
1434
|
-
// mic can have low mean volume while still carrying intelligible peaks; using OR here
|
|
1435
|
-
// caused valid Korean utterances to be discarded as "low-energy".
|
|
1436
|
-
if (levels.meanDb < MIN_MEAN_VOLUME_DB && levels.maxDb < MIN_MAX_VOLUME_DB) {
|
|
1437
|
-
log('skip low-energy utterance', userId, 'meanDb', levels.meanDb, 'maxDb', levels.maxDb, 'thresholds', MIN_MEAN_VOLUME_DB, MIN_MAX_VOLUME_DB, 'mode', 'both-below');
|
|
1438
|
-
metricsTurn.addMeta({ meanDb: levels.meanDb, maxDb: levels.maxDb });
|
|
1439
|
-
metricsTurn.finish({ status: 'skip_low_energy' });
|
|
1440
|
-
return;
|
|
1441
|
-
}
|
|
1442
|
-
metricsTurn.addMeta({ meanDb: levels.meanDb, maxDb: levels.maxDb });
|
|
1443
|
-
await handleRecording(userId, merged, pcmBytes, files.length, metricsTurn);
|
|
1444
|
-
}
|
|
1445
|
-
|
|
1446
|
-
async function handleRecording(userId, wavPath, pcmBytes, segments = 1, metricsTurn = null) {
|
|
1447
|
-
if (processing) { log('drop while processing', userId); metricsTurn?.finish({ status: 'drop_processing' }); return; }
|
|
1448
|
-
if (!isAllowed(userId)) { warn('ignore unauthorized', userId); metricsTurn?.finish({ status: 'unauthorized' }); return; }
|
|
1449
|
-
processing = true;
|
|
1450
|
-
const turnId = ++activeTurnId;
|
|
1451
|
-
const controller = new AbortController();
|
|
1452
|
-
currentAbortController = controller;
|
|
1453
|
-
const signal = controller.signal;
|
|
1454
|
-
const sessionForVoice = resolveProjectSessionForChannel(activeVoiceChannelId || settings.transcriptChannelId);
|
|
1455
|
-
const previousTranscriptChannelId = activeTranscriptChannelId;
|
|
1456
|
-
activeTranscriptChannelId = sessionForVoice?.transcriptChannelId || settings.transcriptChannelId;
|
|
1457
|
-
try {
|
|
1458
|
-
const runtimeLanguage = reloadRuntimeLanguageFromEnv();
|
|
1459
|
-
if (runtimeLanguage.changed) {
|
|
1460
|
-
log('drop current utterance because language changed before STT', userId, 'turn', turnId, 'language', runtimeLanguage.voiceLanguage);
|
|
1461
|
-
fs.rm(wavPath, { force: true }, () => {});
|
|
1462
|
-
metricsTurn?.finish({ status: 'drop_stale_language_change' });
|
|
1463
|
-
return;
|
|
1464
|
-
}
|
|
1465
|
-
const session = resolveProjectSessionForChannel(activeVoiceChannelId || settings.transcriptChannelId);
|
|
1466
|
-
activeTranscriptChannelId = session?.transcriptChannelId || settings.transcriptChannelId;
|
|
1467
|
-
log('voice turn text target', session ? `project=${session.slug}` : 'project=default', 'channel', activeTranscriptChannelId ? 'project-or-default' : 'none');
|
|
1468
|
-
log('transcribing', userId, wavPath, 'pcmBytes', pcmBytes, 'segments', segments, 'turn', turnId);
|
|
1469
|
-
const sttNotice = formatSttStartMessage(settings.voiceLanguage);
|
|
1470
|
-
await sendText(sttNotice);
|
|
1471
|
-
const sttNoticeSpeech = STT_START_VOICE_NOTICE
|
|
1472
|
-
? speakImmediateNotice(sttNotice.replace(/^🎧\s*/u, ''), signal, 'stt-start')
|
|
1473
|
-
: Promise.resolve();
|
|
1474
|
-
const sttStart = Date.now();
|
|
1475
|
-
const text = await transcribe(wavPath);
|
|
1476
|
-
await sttNoticeSpeech;
|
|
1477
|
-
metricsTurn?.stage('stt', Date.now() - sttStart, { transcriptChars: String(text || '').length });
|
|
1478
|
-
if (interruptedTurns.has(turnId) || signal.aborted) { metricsTurn?.finish({ status: 'aborted_after_stt' }); return; }
|
|
1479
|
-
if (!text) { log('empty transcript', userId, wavPath); metricsTurn?.finish({ status: 'empty_transcript' }); return; }
|
|
1480
|
-
log(`user ${userId} said: ${text}`);
|
|
1481
|
-
await sendText(formatSttResultMessage(settings.voiceLanguage, userId, text));
|
|
1482
|
-
if (!acceptsWake(text)) { await sendText(formatWakeRejectedMessage(settings.voiceLanguage)); metricsTurn?.finish({ status: 'wake_rejected' }); return; }
|
|
1483
|
-
|
|
1484
|
-
const prompt = stripWake(text);
|
|
1485
|
-
if (await handleLanguageCommand(prompt, signal)) {
|
|
1486
|
-
metricsTurn?.finish({ status: 'language_command' });
|
|
1487
|
-
return;
|
|
1488
|
-
}
|
|
1489
|
-
if (await handleTtsVoiceCommand(prompt, signal)) {
|
|
1490
|
-
metricsTurn?.finish({ status: 'voice_command' });
|
|
1491
|
-
return;
|
|
1492
|
-
}
|
|
1493
|
-
if (await handleVoiceCloneCommand(userId, prompt, signal)) {
|
|
1494
|
-
metricsTurn?.finish({ status: 'voice_clone_command' });
|
|
1495
|
-
return;
|
|
1496
|
-
}
|
|
1497
|
-
const sensitivityRequest = sensitivityModeFromTranscript(prompt);
|
|
1498
|
-
if (sensitivityRequest) {
|
|
1499
|
-
const thresholds = setSensitivityMode(sensitivityRequest.mode, sensitivityRequest.reason);
|
|
1500
|
-
await sendText(`🎚️ ${sensitivityStatusText()}`);
|
|
1501
|
-
if (isSensitivityOnlyRequest(prompt)) {
|
|
1502
|
-
await speakText(sensitivityChangedSpeech(thresholds.mode, settings.voiceLanguage), signal, metricsTurn);
|
|
1503
|
-
metricsTurn?.finish({ status: 'sensitivity_only' });
|
|
1504
|
-
return;
|
|
1505
|
-
}
|
|
1506
|
-
}
|
|
1507
|
-
const verboseRequest = verboseModeFromTranscript(prompt);
|
|
1508
|
-
if (verboseRequest !== null) {
|
|
1509
|
-
setVerboseProgress(verboseRequest, 'voice-command');
|
|
1510
|
-
await sendText(`🔎 ${verboseStatusText()}`);
|
|
1511
|
-
if (isVerboseOnlyRequest(prompt)) {
|
|
1512
|
-
await speakText(verboseChangedSpeech(verboseRequest, settings.voiceLanguage), signal, metricsTurn);
|
|
1513
|
-
metricsTurn?.finish({ status: 'verbose_only' });
|
|
1514
|
-
return;
|
|
1515
|
-
}
|
|
1516
|
-
}
|
|
1517
|
-
let promptForAgent = prompt;
|
|
1518
|
-
try {
|
|
1519
|
-
const planOutcome = await dispatchPlanModeUtterance(prompt, signal);
|
|
1520
|
-
if (planOutcome.handled) {
|
|
1521
|
-
metricsTurn?.finish({ status: 'plan_mode' });
|
|
1522
|
-
return;
|
|
1523
|
-
}
|
|
1524
|
-
if (planOutcome.prompt) promptForAgent = planOutcome.prompt;
|
|
1525
|
-
} catch (e) {
|
|
1526
|
-
warn('plan mode dispatch failed', e?.stack || e);
|
|
1527
|
-
}
|
|
1528
|
-
const selectedAgentAdapter = adapterForProjectSession(session);
|
|
1529
|
-
const projectContext = projectSessionContextText(session);
|
|
1530
|
-
const recentDiscordContext = formatRecentDiscordContext(recentDiscordTextByChannel, {
|
|
1531
|
-
channelId: activeTranscriptChannelId,
|
|
1532
|
-
});
|
|
1533
|
-
const plan = {
|
|
1534
|
-
task: true,
|
|
1535
|
-
label: selectedAgentAdapter.label,
|
|
1536
|
-
verboseProgress,
|
|
1537
|
-
language: settings.voiceLanguage,
|
|
1538
|
-
cwd: session?.workdir,
|
|
1539
|
-
projectContext,
|
|
1540
|
-
recentDiscordContext,
|
|
1541
|
-
};
|
|
1542
|
-
log('Agent plan', plan.label, 'backend', selectedAgentAdapter.backend, 'task', plan.task, 'language', plan.language, session ? `project=${session.slug}` : 'project=default');
|
|
1543
|
-
const agentStart = Date.now();
|
|
1544
|
-
const progressController = new AbortController();
|
|
1545
|
-
activeProgressAbortController = progressController;
|
|
1546
|
-
activeProgressSignal = progressController.signal;
|
|
1547
|
-
activeProgressLastEventAt = Date.now();
|
|
1548
|
-
const streamingTurnActive = beginStreamingTurn(signal);
|
|
1549
|
-
const agentPromise = selectedAgentAdapter.ask(promptForAgent, signal, plan);
|
|
1550
|
-
let done = false;
|
|
1551
|
-
// Status announcements share one queue with verbose progress so they never
|
|
1552
|
-
// talk over each other. In verbose mode, skip the generic initial prompt;
|
|
1553
|
-
// the detailed tool/file/test events are the initial progress voice.
|
|
1554
|
-
const progressLoop = (async () => {
|
|
1555
|
-
if (!verboseProgress) {
|
|
1556
|
-
await sleep(2500);
|
|
1557
|
-
if (!done && !signal.aborted && !interruptedTurns.has(turnId)) {
|
|
1558
|
-
const initial = /^en/i.test(String(settings.voiceLanguage || ''))
|
|
1559
|
-
? 'calling the agent.'
|
|
1560
|
-
: '에이전트 호출했어. 응답 기다리는 중.';
|
|
1561
|
-
queueProgressSpeechText(initial, progressController.signal, 'generic-initial');
|
|
1562
|
-
}
|
|
1563
|
-
}
|
|
1564
|
-
let idleNotices = 0;
|
|
1565
|
-
let nextIdleNoticeMs = PROGRESS_IDLE_NOTICE_INITIAL_MS;
|
|
1566
|
-
let lastObservedProgressAt = activeProgressLastEventAt;
|
|
1567
|
-
while (!done && !signal.aborted && !interruptedTurns.has(turnId) && idleNotices < PROGRESS_IDLE_NOTICE_LIMIT) {
|
|
1568
|
-
await sleep(Math.min(PROGRESS_IDLE_CHECK_MS, nextIdleNoticeMs));
|
|
1569
|
-
if (done || signal.aborted || interruptedTurns.has(turnId)) break;
|
|
1570
|
-
if (activeProgressLastEventAt !== lastObservedProgressAt) {
|
|
1571
|
-
lastObservedProgressAt = activeProgressLastEventAt;
|
|
1572
|
-
nextIdleNoticeMs = PROGRESS_IDLE_NOTICE_INITIAL_MS;
|
|
1573
|
-
continue;
|
|
1574
|
-
}
|
|
1575
|
-
const idleMs = Date.now() - activeProgressLastEventAt;
|
|
1576
|
-
if (idleMs < nextIdleNoticeMs) continue;
|
|
1577
|
-
idleNotices += 1;
|
|
1578
|
-
activeProgressLastEventAt = Date.now();
|
|
1579
|
-
lastObservedProgressAt = activeProgressLastEventAt;
|
|
1580
|
-
const idle = /^en/i.test(String(settings.voiceLanguage || ''))
|
|
1581
|
-
? 'still working on that.'
|
|
1582
|
-
: '아직 작업 중이야.';
|
|
1583
|
-
queueProgressSpeechText(idle, progressController.signal, `idle-${idleNotices}-${Math.round(nextIdleNoticeMs / 1000)}s`);
|
|
1584
|
-
nextIdleNoticeMs = Math.min(
|
|
1585
|
-
PROGRESS_IDLE_NOTICE_MAX_MS,
|
|
1586
|
-
Math.max(nextIdleNoticeMs + 1000, Math.round(nextIdleNoticeMs * PROGRESS_IDLE_NOTICE_MULTIPLIER)),
|
|
1587
|
-
);
|
|
1588
|
-
}
|
|
1589
|
-
})().catch(e => {
|
|
1590
|
-
if (!isAbortError(e)) warn('progress loop failed', e?.stack || e);
|
|
1591
|
-
});
|
|
1592
|
-
const answer = await agentPromise.finally(() => { done = true; });
|
|
1593
|
-
if (streamingTurnActive) await endStreamingTurn();
|
|
1594
|
-
metricsTurn?.stage('agent', Date.now() - agentStart, { answerChars: String(answer || '').length, backend: selectedAgentAdapter.backend });
|
|
1595
|
-
void progressLoop;
|
|
1596
|
-
if (interruptedTurns.has(turnId) || signal.aborted) { metricsTurn?.finish({ status: 'aborted_after_agent' }); return; }
|
|
1597
|
-
|
|
1598
|
-
log('Agent answer', selectedAgentAdapter.label, answer.slice(0, 200));
|
|
1599
|
-
const spokenAnswer = spokenResultOnly(prompt, answer, settings.voiceLanguage);
|
|
1600
|
-
const fullAnswerText = `${agentAnswerHeader(settings.voiceLanguage, selectedAgentAdapter.label)}\n${answer || emptyAgentAnswer(settings.voiceLanguage)}`;
|
|
1601
|
-
log('send agent answer text', 'chars', fullAnswerText.length);
|
|
1602
|
-
const answerTextDelivered = await sendText(fullAnswerText);
|
|
1603
|
-
if (!answerTextDelivered) {
|
|
1604
|
-
warn('agent answer text delivery failed; still speaking answer');
|
|
1605
|
-
}
|
|
1606
|
-
log('spoken answer', spokenAnswer.slice(0, 200));
|
|
1607
|
-
stopProgressSpeech(progressController.signal, 'agent-answer-ready');
|
|
1608
|
-
if (streamingTurnActive && streamingSpeechDelivered) {
|
|
1609
|
-
log('skipping post-run speakText; streaming already delivered audio');
|
|
1610
|
-
} else {
|
|
1611
|
-
await speakText(spokenAnswer, signal, metricsTurn, { mirrorText: !answerTextDelivered });
|
|
1612
|
-
}
|
|
1613
|
-
try {
|
|
1614
|
-
const guildId = client.channels.cache.get(activeVoiceChannelId)?.guild?.id || '';
|
|
1615
|
-
await maybeNotifyTaskComplete({
|
|
1616
|
-
answer: spokenAnswer || answer,
|
|
1617
|
-
label: selectedAgentAdapter.label,
|
|
1618
|
-
elapsedMs: Date.now() - agentStart,
|
|
1619
|
-
guildId,
|
|
1620
|
-
});
|
|
1621
|
-
} catch (e) { warn('maybeNotifyTaskComplete failed', e?.message || e); }
|
|
1622
|
-
metricsTurn?.finish({ status: 'ok' });
|
|
1623
|
-
} catch (e) {
|
|
1624
|
-
if (isAbortError(e) || interruptedTurns.has(turnId)) {
|
|
1625
|
-
log('turn aborted', userId, 'turn', turnId);
|
|
1626
|
-
metricsTurn?.finish({ status: 'aborted' });
|
|
1627
|
-
return;
|
|
1628
|
-
}
|
|
1629
|
-
warn('handleRecording failed', e?.stack || e);
|
|
1630
|
-
const shortMsg = String(e?.message || e).slice(0, 800);
|
|
1631
|
-
metricsTurn?.finish({ status: 'error', error: shortMsg });
|
|
1632
|
-
await sendText(formatVoiceErrorMessage(settings.voiceLanguage, shortMsg));
|
|
1633
|
-
} finally {
|
|
1634
|
-
if (activeProgressAbortController && !activeProgressAbortController.signal.aborted) {
|
|
1635
|
-
try { activeProgressAbortController.abort(); } catch (e) { warn('abort progress speech in cleanup failed', e?.stack || e); }
|
|
1636
|
-
}
|
|
1637
|
-
if (activeProgressSignal === activeProgressAbortController?.signal) activeProgressSignal = null;
|
|
1638
|
-
activeProgressAbortController = null;
|
|
1639
|
-
if (currentAbortController === controller) currentAbortController = null;
|
|
1640
|
-
activeTranscriptChannelId = previousTranscriptChannelId;
|
|
1641
|
-
interruptedTurns.delete(turnId);
|
|
1642
|
-
if (activeTurnId === turnId) activeTurnId = 0;
|
|
1643
|
-
processing = false;
|
|
1644
|
-
if (bridgeState.deferredSize() > 0) {
|
|
1645
|
-
setImmediate(() => drainDeferredProcessingUtterances().catch(e => warn('drain deferred utterance failed', e?.stack || e)));
|
|
1646
|
-
}
|
|
1647
|
-
}
|
|
1648
|
-
}
|
|
1649
|
-
|
|
1650
|
-
function subscribeUser(receiver, userId) {
|
|
1651
|
-
if (!isAllowed(userId)) return;
|
|
1652
|
-
if (String(userId) === client.user?.id) return;
|
|
1653
|
-
const wasSpeaking = speaking;
|
|
1654
|
-
const wasProcessing = processing;
|
|
1655
|
-
if ((wasSpeaking || wasProcessing) && !activeStreams.has(userId)) {
|
|
1656
|
-
// Speaking-start alone is too noisy in Discord voice. Record and validate a
|
|
1657
|
-
// real segment first; only confirmed playback barge-in stops the current
|
|
1658
|
-
// audio chunk, and only explicit stop transcripts abort active agent work.
|
|
1659
|
-
log('possible barge-in start; waiting for segment validation', userId, 'speaking', wasSpeaking, 'processing', wasProcessing);
|
|
1660
|
-
}
|
|
1661
|
-
if (activeStreams.has(userId)) return;
|
|
1662
|
-
const pending = bridgeState.getPending(userId);
|
|
1663
|
-
if (pending?.timer) {
|
|
1664
|
-
bridgeState.clearPendingTimer(userId);
|
|
1665
|
-
log('extend pending utterance because new segment started', userId, 'segments', pending.files.length, 'totalPcmBytes', pending.pcmBytes);
|
|
1666
|
-
}
|
|
1667
|
-
|
|
1668
|
-
const file = path.join(settings.debugDir, `segment-${stamp()}-${userId}.wav`);
|
|
1669
|
-
log('subscribe user', userId, file);
|
|
1670
|
-
const opusStream = receiver.subscribe(userId, { end: { behavior: EndBehaviorType.AfterSilence, duration: SUBSCRIBE_AFTER_SILENCE_MS } });
|
|
1671
|
-
const decoder = new prism.opus.Decoder({ rate: 48000, channels: 2, frameSize: 960 });
|
|
1672
|
-
const writer = new wav.FileWriter(file, { sampleRate: 48000, channels: 2, bitDepth: 16 });
|
|
1673
|
-
activeStreams.set(userId, { opusStream, decoder, writer, file, startedAtMs: Date.now() });
|
|
1674
|
-
let pcmBytes = 0;
|
|
1675
|
-
const liveThresholds = wasSpeaking && !wasProcessing ? currentPlaybackBargeInThresholds() : currentBargeInThresholds();
|
|
1676
|
-
const liveBargeIn = shouldUseLivePlaybackBargeIn({ speaking: wasSpeaking, processing: wasProcessing }) ? createLiveBargeInMonitor({
|
|
1677
|
-
minBytes: liveThresholds.minBytes,
|
|
1678
|
-
minMeanDb: liveThresholds.minMeanDb,
|
|
1679
|
-
minMaxDb: liveThresholds.minMaxDb,
|
|
1680
|
-
requireBoth: liveThresholds.requireBoth,
|
|
1681
|
-
log,
|
|
1682
|
-
onConfirm: ({ pcmBytes: confirmedBytes, levels }) => {
|
|
1683
|
-
log('confirmed live playback barge-in before segment end', userId, 'pcmBytes', confirmedBytes, 'meanDb', levels.meanDb, 'maxDb', levels.maxDb);
|
|
1684
|
-
stopPlaybackForBargeIn(userId, 'confirmed-live-playback-barge-in');
|
|
1685
|
-
},
|
|
1686
|
-
}) : null;
|
|
1687
|
-
decoder.on('data', chunk => {
|
|
1688
|
-
pcmBytes += chunk.length;
|
|
1689
|
-
liveBargeIn?.push(chunk);
|
|
1690
|
-
});
|
|
1691
|
-
opusStream.on('error', e => warn('opus stream error', userId, e?.stack || e));
|
|
1692
|
-
decoder.on('error', e => warn('opus decoder error', userId, e?.stack || e));
|
|
1693
|
-
writer.on('error', e => warn('wav writer error', userId, e?.stack || e));
|
|
1694
|
-
opusStream.on('end', () => log('opus end', userId, 'pcmBytes', pcmBytes));
|
|
1695
|
-
writer.on('finish', () => {
|
|
1696
|
-
const streamState = activeStreams.get(userId);
|
|
1697
|
-
activeStreams.delete(userId);
|
|
1698
|
-
const endedAtMs = Date.now();
|
|
1699
|
-
log('saved segment', userId, 'pcmBytes', pcmBytes, file);
|
|
1700
|
-
queueSegment(userId, file, pcmBytes, streamState?.startedAtMs || endedAtMs, endedAtMs);
|
|
1701
|
-
});
|
|
1702
|
-
opusStream.pipe(decoder).pipe(writer);
|
|
1703
|
-
}
|
|
1704
|
-
|
|
1705
|
-
async function connectTo(channel) {
|
|
1706
|
-
if (connection) {
|
|
1707
|
-
try { connection.destroy(); } catch {}
|
|
1708
|
-
}
|
|
1709
|
-
activeVoiceChannelId = channel.id;
|
|
1710
|
-
connection = joinVoiceChannel({
|
|
1711
|
-
channelId: channel.id,
|
|
1712
|
-
guildId: channel.guild.id,
|
|
1713
|
-
adapterCreator: channel.guild.voiceAdapterCreator,
|
|
1714
|
-
selfDeaf: false,
|
|
1715
|
-
selfMute: false,
|
|
1716
|
-
});
|
|
1717
|
-
const voiceConnection = connection;
|
|
1718
|
-
voiceConnection.subscribe(player);
|
|
1719
|
-
voiceConnection.on('error', e => warn('voice connection error', e?.stack || e));
|
|
1720
|
-
voiceConnection.on('stateChange', async (oldState, newState) => {
|
|
1721
|
-
log('voice connection state', oldState.status, '->', newState.status);
|
|
1722
|
-
if (connection !== voiceConnection) {
|
|
1723
|
-
log('ignore stale voice connection state', oldState.status, '->', newState.status);
|
|
1724
|
-
return;
|
|
1725
|
-
}
|
|
1726
|
-
if (newState.status === VoiceConnectionStatus.Disconnected) {
|
|
1727
|
-
try {
|
|
1728
|
-
await Promise.race([
|
|
1729
|
-
entersState(voiceConnection, VoiceConnectionStatus.Signalling, 5000),
|
|
1730
|
-
entersState(voiceConnection, VoiceConnectionStatus.Connecting, 5000),
|
|
1731
|
-
]);
|
|
1732
|
-
} catch (e) {
|
|
1733
|
-
if (connection !== voiceConnection) return;
|
|
1734
|
-
warn('voice connection disconnected; reconnecting to channel', channel.guild.name, channel.name, e?.message || e);
|
|
1735
|
-
try { voiceConnection.destroy(); } catch {}
|
|
1736
|
-
connection = null;
|
|
1737
|
-
setTimeout(() => connectTo(channel).catch(err => warn('voice reconnect failed', err?.stack || err)), 1500);
|
|
1738
|
-
}
|
|
1739
|
-
}
|
|
1740
|
-
});
|
|
1741
|
-
await entersState(voiceConnection, VoiceConnectionStatus.Ready, 30000);
|
|
1742
|
-
voiceConnection.receiver.speaking.on('start', userId => subscribeUser(voiceConnection.receiver, userId));
|
|
1743
|
-
log(`Listening in voice channel ${channel.guild.name} / ${channel.name}`);
|
|
1744
|
-
}
|
|
1745
|
-
|
|
1746
|
-
async function autoJoin() {
|
|
1747
|
-
const attempted = [];
|
|
1748
|
-
for (const preferredName of settings.autoJoinVoiceChannels) {
|
|
1749
|
-
for (const guild of client.guilds.cache.values()) {
|
|
1750
|
-
const channels = await guild.channels.fetch();
|
|
1751
|
-
for (const ch of channels.values()) {
|
|
1752
|
-
if (!ch?.isVoiceBased?.() || ch.name.toLowerCase() !== preferredName) continue;
|
|
1753
|
-
attempted.push(`${guild.name}/${ch.name}`);
|
|
1754
|
-
try {
|
|
1755
|
-
await connectTo(ch);
|
|
1756
|
-
return;
|
|
1757
|
-
} catch (e) {
|
|
1758
|
-
warn('auto-join failed; trying next configured voice channel', guild.name, ch.name, e?.stack || e);
|
|
1759
|
-
try { connection?.destroy(); } catch {}
|
|
1760
|
-
connection = null;
|
|
1761
|
-
activeVoiceChannelId = '';
|
|
1762
|
-
}
|
|
1763
|
-
}
|
|
1764
|
-
}
|
|
1765
|
-
}
|
|
1766
|
-
warn('No auto-join channel found or reachable', settings.autoJoinVoiceChannels, 'attempted', attempted);
|
|
1767
|
-
}
|
|
1768
|
-
|
|
1769
1095
|
function consumeRestartNotice() {
|
|
1770
1096
|
const noticePath = path.join(ROOT, '.cache', 'restart-notice.txt');
|
|
1771
1097
|
try {
|
|
@@ -1787,76 +1113,8 @@ async function announceRestartComplete() {
|
|
|
1787
1113
|
await speakText(speech, undefined, null, { mirrorText: false });
|
|
1788
1114
|
}
|
|
1789
1115
|
|
|
1790
|
-
async function findVoiceChannelBySelector(guild, selector) {
|
|
1791
|
-
const wanted = String(selector || '').trim();
|
|
1792
|
-
if (!wanted || !guild) return null;
|
|
1793
|
-
const id = wanted.replace(/^<#(\d+)>$/, '$1');
|
|
1794
|
-
const channels = await guild.channels.fetch();
|
|
1795
|
-
const voiceChannels = [...channels.values()].filter(ch => ch?.isVoiceBased?.());
|
|
1796
|
-
const byId = voiceChannels.find(ch => ch.id === id);
|
|
1797
|
-
if (byId) return byId;
|
|
1798
|
-
const matches = voiceChannels.filter(ch => String(ch.name || '').toLowerCase() === wanted.toLowerCase());
|
|
1799
|
-
if (matches.length === 1) return matches[0];
|
|
1800
|
-
if (matches.length > 1) throw new Error(`같은 이름의 음성 채널이 여러 개야. 채널 ID나 멘션으로 지정해줘: ${wanted}`);
|
|
1801
|
-
throw new Error(`음성 채널을 찾지 못했어: ${wanted}`);
|
|
1802
|
-
}
|
|
1803
|
-
|
|
1804
|
-
async function voiceChannelLabel(guild, channelId) {
|
|
1805
|
-
if (!channelId || !guild) return '없음';
|
|
1806
|
-
try {
|
|
1807
|
-
const ch = await guild.channels.fetch(channelId);
|
|
1808
|
-
return ch?.name || '지정됨';
|
|
1809
|
-
} catch {
|
|
1810
|
-
return '지정됨';
|
|
1811
|
-
}
|
|
1812
|
-
}
|
|
1813
|
-
|
|
1814
|
-
async function resolveVoiceChannelForAttach(msg, selector = '') {
|
|
1815
|
-
if (selector) return findVoiceChannelBySelector(msg.guild, selector);
|
|
1816
|
-
if (msg.member?.voice?.channel) return msg.member.voice.channel;
|
|
1817
|
-
if (activeVoiceChannelId && msg.guild) {
|
|
1818
|
-
try {
|
|
1819
|
-
const ch = await msg.guild.channels.fetch(activeVoiceChannelId);
|
|
1820
|
-
if (ch?.isVoiceBased?.()) return ch;
|
|
1821
|
-
} catch {}
|
|
1822
|
-
}
|
|
1823
|
-
throw new Error('붙일 음성 채널을 못 찾았어. 음성채널에 들어가서 `!session attach-voice`를 치거나 `--voice "채널명"`을 붙여줘.');
|
|
1824
|
-
}
|
|
1825
|
-
|
|
1826
|
-
async function attachVoiceChannelToTextSession(msg, command) {
|
|
1827
|
-
const voiceChannel = await resolveVoiceChannelForAttach(msg, command.voice);
|
|
1828
|
-
let session = null;
|
|
1829
|
-
if (command.name) {
|
|
1830
|
-
session = bindProjectSessionToChannel({ state: projectSessionsState, nameOrSlug: command.name, channelId: msg.channelId });
|
|
1831
|
-
} else {
|
|
1832
|
-
session = resolveProjectSessionForChannel(msg.channelId)
|
|
1833
|
-
|| resolveProjectSessionForChannel(voiceChannel.id);
|
|
1834
|
-
if (!session) {
|
|
1835
|
-
const fallbackName = String(msg.channel?.name || `channel-${msg.channelId}`).trim() || `channel-${msg.channelId}`;
|
|
1836
|
-
session = createProjectSession({
|
|
1837
|
-
root: ROOT,
|
|
1838
|
-
state: projectSessionsState,
|
|
1839
|
-
name: fallbackName,
|
|
1840
|
-
workdir: settings.agent.cwd || ROOT,
|
|
1841
|
-
channelId: msg.channelId,
|
|
1842
|
-
voiceChannelId: voiceChannel.id,
|
|
1843
|
-
transcriptChannelId: msg.channelId,
|
|
1844
|
-
mcpContext: 'Ad-hoc Discord text channel session',
|
|
1845
|
-
});
|
|
1846
|
-
}
|
|
1847
|
-
}
|
|
1848
|
-
session.transcriptChannelId = msg.channelId;
|
|
1849
|
-
session.voiceChannelId = voiceChannel.id;
|
|
1850
|
-
projectSessionsState.channelSessions[msg.channelId] = session.slug;
|
|
1851
|
-
projectSessionsState.channelSessions[voiceChannel.id] = session.slug;
|
|
1852
|
-
saveProjectSessionsState();
|
|
1853
|
-
agentAdaptersBySession.delete(session.slug);
|
|
1854
|
-
if (activeVoiceChannelId !== voiceChannel.id) await connectTo(voiceChannel);
|
|
1855
|
-
return msg.reply(`${session.name} 세션을 이 텍스트 채널과 음성 채널 ${voiceChannel.name}에 붙였어. 이제 그 음성채널 발화의 STT/답변 텍스트는 이 채널로 가.`);
|
|
1856
|
-
}
|
|
1857
|
-
|
|
1858
1116
|
async function handleProjectSessionCommand(msg, command) {
|
|
1859
|
-
const activeSession = resolveProjectSessionForChannel(msg.channelId) || resolveProjectSessionForChannel(activeVoiceChannelId);
|
|
1117
|
+
const activeSession = resolveProjectSessionForChannel(msg.channelId) || resolveProjectSessionForChannel(bridge.activeVoiceChannelId);
|
|
1860
1118
|
if (command.action === 'attach-voice') return void await attachVoiceChannelToTextSession(msg, command);
|
|
1861
1119
|
if (command.action === 'status') {
|
|
1862
1120
|
if (!activeSession) return void msg.reply(`${agentAdapter.label} 기본 세션: ${agentAdapter.readSessionId?.() || '아직 없음'}`);
|
|
@@ -1914,7 +1172,8 @@ async function handleProjectSessionCommand(msg, command) {
|
|
|
1914
1172
|
mcpContext: command.mcpContext,
|
|
1915
1173
|
});
|
|
1916
1174
|
saveProjectSessionsState();
|
|
1917
|
-
agentAdaptersBySession.delete(session.slug);
|
|
1175
|
+
bridge.agentAdaptersBySession.delete(session.slug);
|
|
1176
|
+
invalidateBackendAdaptersForSession(session.slug);
|
|
1918
1177
|
return void msg.reply(`${session.name} 프로젝트 세션 만들었어. 작업실은 ${session.workdir}이고, 이 텍스트 채널${voiceChannel ? `과 음성 채널 ${voiceChannel.name}` : ''} 입력은 별도 Hermes 세션 파일로 이어져.`);
|
|
1919
1178
|
}
|
|
1920
1179
|
}
|
|
@@ -1925,130 +1184,22 @@ client.once('ready', async () => {
|
|
|
1925
1184
|
await announceRestartComplete();
|
|
1926
1185
|
});
|
|
1927
1186
|
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
warn('project session command failed', e?.stack || e);
|
|
1943
|
-
await msg.reply(String(e?.message || e).slice(0, 700));
|
|
1944
|
-
}
|
|
1945
|
-
return;
|
|
1946
|
-
}
|
|
1947
|
-
if (content === '!ping') return void msg.reply('pong');
|
|
1948
|
-
if (content === '!verbose') return void msg.reply(verboseStatusText());
|
|
1949
|
-
if (['!verbose on', '!verbose true', '!verbose 1', '!verbose 켜', '!verbose 켜줘'].includes(content.toLowerCase())) {
|
|
1950
|
-
setVerboseProgress(true, 'discord-command');
|
|
1951
|
-
return void msg.reply(verboseStatusText());
|
|
1952
|
-
}
|
|
1953
|
-
if (['!verbose off', '!verbose false', '!verbose 0', '!verbose 꺼', '!verbose 꺼줘'].includes(content.toLowerCase())) {
|
|
1954
|
-
setVerboseProgress(false, 'discord-command');
|
|
1955
|
-
return void msg.reply(verboseStatusText());
|
|
1956
|
-
}
|
|
1957
|
-
if (content === '!notify') return void msg.reply(notifyStatusText());
|
|
1958
|
-
if (['!notify on', '!notify always', '!notify 1'].includes(content.toLowerCase())) {
|
|
1959
|
-
notifyUserOptIn = true;
|
|
1960
|
-
return void msg.reply(notifyStatusText());
|
|
1961
|
-
}
|
|
1962
|
-
if (['!notify off', '!notify auto', '!notify 0'].includes(content.toLowerCase())) {
|
|
1963
|
-
notifyUserOptIn = false;
|
|
1964
|
-
return void msg.reply(notifyStatusText());
|
|
1965
|
-
}
|
|
1966
|
-
if (content === '!smart-progress' || content === '!smart_progress') return void msg.reply(smartProgressStatusText());
|
|
1967
|
-
if (['!smart-progress on', '!smart-progress true', '!smart-progress 1', '!smart_progress on'].includes(content.toLowerCase())) {
|
|
1968
|
-
smartProgressEnabled = true;
|
|
1969
|
-
return void msg.reply(smartProgressStatusText());
|
|
1970
|
-
}
|
|
1971
|
-
if (['!smart-progress off', '!smart-progress false', '!smart-progress 0', '!smart_progress off'].includes(content.toLowerCase())) {
|
|
1972
|
-
smartProgressEnabled = false;
|
|
1973
|
-
return void msg.reply(smartProgressStatusText());
|
|
1974
|
-
}
|
|
1975
|
-
if (content === '!sensitivity') return void msg.reply(sensitivityStatusText());
|
|
1976
|
-
if (content === '!latency' || content === '!metrics') {
|
|
1977
|
-
const summary = summarizeLatencyRecords(readJsonlRecords(settings.latencyLogPath, { limit: 200 }));
|
|
1978
|
-
return void msg.reply(`최근 latency 요약 (${settings.latencyLogPath}):\n${formatLatencySummary(summary)}`.slice(0, 1900));
|
|
1979
|
-
}
|
|
1980
|
-
if (content === '!sensitivity conservative') {
|
|
1981
|
-
setSensitivityMode('conservative', 'discord-command');
|
|
1982
|
-
return void msg.reply(sensitivityStatusText());
|
|
1983
|
-
}
|
|
1984
|
-
if (content === '!sensitivity normal') {
|
|
1985
|
-
setSensitivityMode('normal', 'discord-command');
|
|
1986
|
-
return void msg.reply(sensitivityStatusText());
|
|
1987
|
-
}
|
|
1988
|
-
if (content === '!session') return void handleProjectSessionCommand(msg, { action: 'status' });
|
|
1989
|
-
if (content === '!reset-session') return void handleProjectSessionCommand(msg, { action: 'reset' });
|
|
1990
|
-
if (content === '!join') {
|
|
1991
|
-
const ch = msg.member?.voice?.channel;
|
|
1992
|
-
if (!ch) return void msg.reply('먼저 음성 채널에 들어가줘.');
|
|
1993
|
-
await connectTo(ch);
|
|
1994
|
-
return void msg.reply('들어왔어. Node receiver로 듣는 중.');
|
|
1995
|
-
}
|
|
1996
|
-
if (content === '!leave') {
|
|
1997
|
-
try { connection?.destroy(); } catch {}
|
|
1998
|
-
connection = null;
|
|
1999
|
-
activeVoiceChannelId = '';
|
|
2000
|
-
return void msg.reply('나갈게.');
|
|
2001
|
-
}
|
|
2002
|
-
if (content.startsWith('!say ')) {
|
|
2003
|
-
const text = content.slice(5).trim();
|
|
2004
|
-
const mp3 = await synthTTS(text);
|
|
2005
|
-
await playAudio(mp3);
|
|
2006
|
-
return;
|
|
2007
|
-
}
|
|
2008
|
-
if (content.startsWith('!voice-test ')) {
|
|
2009
|
-
const text = content.slice('!voice-test '.length).trim();
|
|
2010
|
-
if (!text) return void msg.reply('테스트할 문장을 붙여줘.');
|
|
2011
|
-
const started = Date.now();
|
|
2012
|
-
try {
|
|
2013
|
-
await msg.reply(`TTS 백엔드 ${ttsBackend.name}로 음성 테스트할게.`);
|
|
2014
|
-
await speakText(text);
|
|
2015
|
-
await msg.channel.send(`음성 테스트 완료: ${ttsBackend.name}, ${Date.now() - started}ms`);
|
|
2016
|
-
} catch (e) {
|
|
2017
|
-
warn('voice-test failed', e?.stack || e);
|
|
2018
|
-
await msg.channel.send(`음성 테스트 실패: ${String(e?.message || e).slice(0, 700)}`);
|
|
2019
|
-
}
|
|
2020
|
-
return;
|
|
2021
|
-
}
|
|
2022
|
-
if (content === '!voice-clone' || content === '!voice-clone status') {
|
|
2023
|
-
const current = voiceCloneCapture.current();
|
|
2024
|
-
if (current?.userId === String(msg.author.id)) {
|
|
2025
|
-
return void msg.reply(`다음 유효한 음성을 ${path.relative(ROOT, current.targetPath)}에 저장할게.`);
|
|
2026
|
-
}
|
|
2027
|
-
return void msg.reply('대기 중인 보이스 클로닝 샘플 캡처가 없어. `!voice-clone capture`로 시작해.');
|
|
2028
|
-
}
|
|
2029
|
-
if (content === '!voice-clone cancel') {
|
|
2030
|
-
const cancelled = voiceCloneCapture.cancel(msg.author.id);
|
|
2031
|
-
return void msg.reply(cancelled ? '보이스 클로닝 샘플 캡처를 취소했어.' : '대기 중인 캡처가 없어.');
|
|
2032
|
-
}
|
|
2033
|
-
if (content === '!voice-clone capture') {
|
|
2034
|
-
const armed = voiceCloneCapture.arm({ userId: msg.author.id, source: 'discord-command' });
|
|
2035
|
-
return void msg.reply(`다음 유효한 음성을 ${path.relative(ROOT, armed.targetPath)}에 저장할게. 음성 채널에서 10~30초 정도 말해줘.`);
|
|
2036
|
-
}
|
|
2037
|
-
if (content.startsWith('!ask ')) {
|
|
2038
|
-
const text = content.slice(5).trim();
|
|
2039
|
-
if (!text) return void msg.reply('물어볼 내용을 붙여줘.');
|
|
2040
|
-
await handleTextAgentMessage(msg, text, { speakResponse: true });
|
|
2041
|
-
return;
|
|
2042
|
-
}
|
|
2043
|
-
if (shouldRouteDiscordTextToAgent({
|
|
2044
|
-
content,
|
|
2045
|
-
channelId: msg.channelId,
|
|
2046
|
-
transcriptChannelId: settings.transcriptChannelId,
|
|
2047
|
-
}) || resolveProjectSessionForChannel(msg.channelId)) {
|
|
2048
|
-
await handleTextAgentMessage(msg, content, { speakResponse: false });
|
|
2049
|
-
return;
|
|
2050
|
-
}
|
|
1187
|
+
const discordCommandRouter = createDiscordCommandRouter({
|
|
1188
|
+
bridge, settings, warn, path, ROOT,
|
|
1189
|
+
isAllowed,
|
|
1190
|
+
handleProjectSessionCommand,
|
|
1191
|
+
handleTextAgentMessage,
|
|
1192
|
+
resolveProjectSessionForChannel,
|
|
1193
|
+
verboseStatusText, setVerboseProgress,
|
|
1194
|
+
notifyStatusText,
|
|
1195
|
+
smartProgressStatusText,
|
|
1196
|
+
sensitivityStatusText, setSensitivityMode,
|
|
1197
|
+
summarizeLatencyRecords, readJsonlRecords, formatLatencySummary,
|
|
1198
|
+
connectTo,
|
|
1199
|
+
synthTTS, playAudio, speakText,
|
|
1200
|
+
voiceCloneCapture,
|
|
2051
1201
|
});
|
|
1202
|
+
client.on('messageCreate', msg => discordCommandRouter.handleDiscordMessage(msg).catch(e => warn('discord command router failed', e?.stack || e)));
|
|
2052
1203
|
|
|
2053
1204
|
process.stdout?.on?.('error', error => {
|
|
2054
1205
|
if (isBenignTransientNetworkError(error)) {
|
|
@@ -2078,37 +1229,6 @@ process.on('uncaughtException', error => {
|
|
|
2078
1229
|
client.on('error', e => warn('discord client error', e?.stack || e));
|
|
2079
1230
|
client.on('shardError', e => warn('discord shard error', e?.stack || e));
|
|
2080
1231
|
|
|
2081
|
-
let shutdownStarted = false;
|
|
2082
|
-
async function gracefulShutdown(signalName) {
|
|
2083
|
-
if (shutdownStarted) return;
|
|
2084
|
-
shutdownStarted = true;
|
|
2085
|
-
log('graceful shutdown requested', signalName, 'connection', Boolean(connection));
|
|
2086
|
-
try {
|
|
2087
|
-
if (currentAbortController && !currentAbortController.signal.aborted) currentAbortController.abort();
|
|
2088
|
-
} catch (e) {
|
|
2089
|
-
warn('abort before shutdown failed', e?.stack || e);
|
|
2090
|
-
}
|
|
2091
|
-
try {
|
|
2092
|
-
if (connection) {
|
|
2093
|
-
let detail = '';
|
|
2094
|
-
const noticePath = path.join(ROOT, '.cache', 'restart-notice.txt');
|
|
2095
|
-
try {
|
|
2096
|
-
if (fs.existsSync(noticePath)) {
|
|
2097
|
-
detail = fs.readFileSync(noticePath, 'utf8').replace(/\s+/g, ' ').trim().slice(0, 120);
|
|
2098
|
-
}
|
|
2099
|
-
} catch (e) {
|
|
2100
|
-
warn('read restart notice failed', e?.stack || e);
|
|
2101
|
-
}
|
|
2102
|
-
await speakText(formatRestartShutdownNotice(detail, settings.tts.edge.voice));
|
|
2103
|
-
await waitEvent(player, AudioPlayerStatus.Idle, 30000).catch(() => {});
|
|
2104
|
-
}
|
|
2105
|
-
} catch (e) {
|
|
2106
|
-
warn('shutdown voice notice failed', e?.stack || e);
|
|
2107
|
-
}
|
|
2108
|
-
try { connection?.destroy(); } catch {}
|
|
2109
|
-
try { client.destroy(); } catch {}
|
|
2110
|
-
process.exit(0);
|
|
2111
|
-
}
|
|
2112
1232
|
process.on('SIGTERM', () => { void gracefulShutdown('SIGTERM'); });
|
|
2113
1233
|
process.on('SIGINT', () => { void gracefulShutdown('SIGINT'); });
|
|
2114
1234
|
|