verbalcoding 0.2.11 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +98 -2
- package/README.es.md +134 -0
- package/README.fr.md +134 -0
- package/README.ja.md +134 -0
- package/README.ko.md +134 -0
- package/README.md +118 -74
- package/README.ru.md +134 -0
- package/README.zh.md +133 -0
- package/app-node/agent_adapters.mjs +37 -5
- package/app-node/agent_adapters.test.mjs +27 -1
- package/app-node/agent_detect.mjs +73 -0
- package/app-node/agent_detect.test.mjs +77 -0
- package/app-node/agent_routing.mjs +148 -0
- package/app-node/agent_routing.test.mjs +138 -0
- package/app-node/agent_turn.mjs +86 -0
- package/app-node/agent_turn.test.mjs +109 -0
- package/app-node/bridge_context.mjs +73 -0
- package/app-node/bridge_context.test.mjs +54 -0
- package/app-node/bridge_state.mjs +4 -0
- package/app-node/bridge_wireup.test.mjs +462 -0
- package/app-node/cli_install.test.mjs +31 -0
- package/app-node/cross_agent_routing.test.mjs +78 -0
- package/app-node/discord_command_router.mjs +204 -0
- package/app-node/discord_command_router.test.mjs +311 -0
- package/app-node/discord_voice_setup.mjs +251 -0
- package/app-node/discord_voice_setup.test.mjs +86 -0
- package/app-node/hermes_profiles.test.mjs +12 -1
- package/app-node/install_config.mjs +113 -3
- package/app-node/install_config.test.mjs +8 -0
- package/app-node/instance_doctor.test.mjs +9 -0
- package/app-node/instances.test.mjs +8 -1
- package/app-node/main.mjs +513 -1058
- package/app-node/mcp_tools.test.mjs +7 -0
- package/app-node/notification_handler.mjs +89 -0
- package/app-node/notification_handler.test.mjs +187 -0
- package/app-node/notify.mjs +73 -0
- package/app-node/notify.test.mjs +68 -0
- package/app-node/plan_dispatcher.mjs +215 -0
- package/app-node/plan_dispatcher.test.mjs +101 -0
- package/app-node/plan_mode.mjs +203 -0
- package/app-node/plan_mode.test.mjs +231 -0
- package/app-node/progress_handler.mjs +220 -0
- package/app-node/progress_handler.test.mjs +193 -0
- package/app-node/progress_speech.mjs +54 -32
- package/app-node/progress_speech.test.mjs +12 -3
- package/app-node/project_sessions.mjs +5 -2
- package/app-node/project_sessions.test.mjs +7 -0
- package/app-node/research_mode.mjs +282 -0
- package/app-node/research_mode.test.mjs +264 -0
- package/app-node/restart_notice.mjs +3 -0
- package/app-node/restart_notice.test.mjs +11 -0
- package/app-node/session_ontology.mjs +271 -0
- package/app-node/session_ontology.test.mjs +130 -0
- package/app-node/smart_progress.mjs +94 -0
- package/app-node/smart_progress.test.mjs +66 -0
- package/app-node/stream_sentencer.mjs +91 -0
- package/app-node/stream_sentencer.test.mjs +129 -0
- package/app-node/streaming_tts_queue.mjs +52 -0
- package/app-node/streaming_tts_queue.test.mjs +64 -0
- package/app-node/stt_whisper.mjs +24 -0
- package/app-node/stt_whisper.test.mjs +32 -0
- package/app-node/text_routing.mjs +22 -0
- package/app-node/text_routing.test.mjs +23 -1
- package/app-node/tts_backends.mjs +537 -3
- package/app-node/tts_backends.test.mjs +454 -0
- package/app-node/tts_player.mjs +164 -0
- package/app-node/tts_player.test.mjs +202 -0
- package/app-node/tts_runtime.mjs +134 -0
- package/app-node/tts_runtime.test.mjs +89 -0
- package/app-node/tts_settings.mjs +150 -3
- package/app-node/tts_settings.test.mjs +204 -0
- package/app-node/tts_voice_config.mjs +136 -2
- package/app-node/tts_voice_config.test.mjs +94 -0
- package/app-node/utterance_router.mjs +216 -0
- package/app-node/utterance_router.test.mjs +236 -0
- package/app-node/voice_autojoin.mjs +37 -0
- package/app-node/voice_autojoin.test.mjs +59 -0
- package/app-node/voice_io.mjs +272 -0
- package/app-node/voice_io.test.mjs +102 -0
- package/app-node/voice_turn_runner.mjs +449 -0
- package/app-node/voice_turn_runner.test.mjs +289 -0
- package/docs/CONFIGURATION.md +79 -96
- package/docs/FRESH_INSTALL.md +105 -63
- package/docs/HARNESSES.md +58 -0
- package/docs/HARNESS_AIDER.md +50 -0
- package/docs/HARNESS_CLAUDE.md +56 -0
- package/docs/HARNESS_CODEX.md +56 -0
- package/docs/HARNESS_CURSOR.md +45 -0
- package/docs/HARNESS_GEMINI.md +45 -0
- package/docs/HARNESS_HERMES.md +57 -0
- package/docs/HARNESS_OPENCLAW.md +44 -0
- package/docs/HARNESS_OPENCODE.md +44 -0
- package/docs/HERMES_VOICE.md +65 -0
- package/docs/MULTI_INSTANCE.md +16 -0
- package/docs/README.md +50 -0
- package/docs/RELEASE.md +42 -19
- package/docs/ROADMAP.md +53 -0
- package/docs/TROUBLESHOOTING.md +126 -0
- package/docs/TTS_BACKENDS.md +227 -0
- package/docs/USAGE.md +94 -40
- package/docs/assets/figures/verbalcoding-flow.svg +1 -1
- package/docs/i18n/AGENTS.es.md +34 -0
- package/docs/i18n/AGENTS.fr.md +34 -0
- package/docs/i18n/AGENTS.ja.md +34 -0
- package/docs/i18n/AGENTS.ko.md +34 -0
- package/docs/i18n/AGENTS.ru.md +34 -0
- package/docs/i18n/AGENTS.zh.md +34 -0
- package/docs/i18n/CONFIGURATION.es.md +25 -0
- package/docs/i18n/CONFIGURATION.fr.md +25 -0
- package/docs/i18n/CONFIGURATION.ja.md +25 -0
- package/docs/i18n/CONFIGURATION.ko.md +25 -0
- package/docs/i18n/CONFIGURATION.ru.md +25 -0
- package/docs/i18n/CONFIGURATION.zh.md +25 -0
- package/docs/i18n/FRESH_INSTALL.es.md +27 -2
- package/docs/i18n/FRESH_INSTALL.fr.md +27 -2
- package/docs/i18n/FRESH_INSTALL.ja.md +27 -2
- package/docs/i18n/FRESH_INSTALL.ko.md +27 -2
- package/docs/i18n/FRESH_INSTALL.ru.md +27 -2
- package/docs/i18n/FRESH_INSTALL.zh.md +27 -2
- package/docs/i18n/HARNESSES.es.md +58 -0
- package/docs/i18n/HARNESSES.fr.md +58 -0
- package/docs/i18n/HARNESSES.ja.md +58 -0
- package/docs/i18n/HARNESSES.ko.md +58 -0
- package/docs/i18n/HARNESSES.ru.md +58 -0
- package/docs/i18n/HARNESSES.zh.md +58 -0
- package/docs/i18n/HARNESS_AIDER.es.md +48 -0
- package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
- package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
- package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
- package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
- package/docs/i18n/HARNESS_CODEX.es.md +55 -0
- package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
- package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
- package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
- package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
- package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
- package/docs/i18n/HARNESS_HERMES.es.md +54 -0
- package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
- package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
- package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
- package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
- package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
- package/docs/i18n/HERMES_VOICE.es.md +46 -0
- package/docs/i18n/HERMES_VOICE.fr.md +46 -0
- package/docs/i18n/HERMES_VOICE.ja.md +46 -0
- package/docs/i18n/HERMES_VOICE.ko.md +65 -0
- package/docs/i18n/HERMES_VOICE.ru.md +46 -0
- package/docs/i18n/HERMES_VOICE.zh.md +46 -0
- package/docs/i18n/MULTI_INSTANCE.es.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.fr.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.ja.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.ko.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.ru.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.zh.md +25 -0
- package/docs/i18n/README.es.md +20 -134
- package/docs/i18n/README.fr.md +20 -134
- package/docs/i18n/README.ja.md +20 -134
- package/docs/i18n/README.ko.md +20 -133
- package/docs/i18n/README.ru.md +20 -134
- package/docs/i18n/README.zh.md +20 -133
- package/docs/i18n/RELEASE.es.md +26 -1
- package/docs/i18n/RELEASE.fr.md +26 -1
- package/docs/i18n/RELEASE.ja.md +26 -1
- package/docs/i18n/RELEASE.ko.md +26 -1
- package/docs/i18n/RELEASE.ru.md +26 -1
- package/docs/i18n/RELEASE.zh.md +26 -1
- package/docs/i18n/TROUBLESHOOTING.es.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.fr.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.ja.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.ko.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.ru.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.zh.md +39 -0
- package/docs/i18n/USAGE.es.md +25 -0
- package/docs/i18n/USAGE.fr.md +25 -0
- package/docs/i18n/USAGE.ja.md +25 -0
- package/docs/i18n/USAGE.ko.md +25 -0
- package/docs/i18n/USAGE.ru.md +25 -0
- package/docs/i18n/USAGE.zh.md +25 -0
- package/docs/superpowers/plans/2026-05-13-phase1-streaming-pipeline.md +122 -0
- package/docs/superpowers/plans/2026-05-13-phase10-push-notifications.md +152 -0
- package/docs/superpowers/plans/2026-05-13-phase2-agent-adapters.md +242 -0
- package/docs/superpowers/plans/2026-05-13-phase6-smart-progress.md +172 -0
- package/docs/superpowers/plans/2026-05-13-phase7-voice-plan-mode.md +108 -0
- package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
- package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
- package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
- package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
- package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
- package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
- package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
- package/integrations/fireredtts2/mlx_llm.py +183 -0
- package/integrations/fireredtts2/synth.py +156 -0
- package/integrations/fireredtts2/synth_mlx.py +196 -0
- package/integrations/mlxaudio/synth.py +74 -0
- package/integrations/neuttsair/synth.py +104 -0
- package/integrations/omnivoice/synth.py +110 -0
- package/package.json +7 -1
- package/scripts/cli.mjs +88 -3
- package/scripts/doctor.mjs +115 -4
- package/scripts/install.mjs +20 -2
- package/scripts/install_fireredtts2.sh +109 -0
- package/scripts/install_mlxaudio.sh +34 -0
- package/scripts/install_mossttsnano.sh +46 -0
- package/scripts/postinstall.mjs +34 -0
package/app-node/main.mjs
CHANGED
|
@@ -6,19 +6,8 @@ import { spawn, execFile } from 'node:child_process';
|
|
|
6
6
|
import { promisify } from 'node:util';
|
|
7
7
|
|
|
8
8
|
import { Client, GatewayIntentBits, Partials } from 'discord.js';
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
EndBehaviorType,
|
|
12
|
-
StreamType,
|
|
13
|
-
VoiceConnectionStatus,
|
|
14
|
-
createAudioPlayer,
|
|
15
|
-
createAudioResource,
|
|
16
|
-
entersState,
|
|
17
|
-
joinVoiceChannel,
|
|
18
|
-
} from '@discordjs/voice';
|
|
19
|
-
import prism from 'prism-media';
|
|
20
|
-
import wav from 'wav';
|
|
21
|
-
import { buildAgentSettings, createAgentAdapter, isPatchLikeOutput } from './agent_adapters.mjs';
|
|
9
|
+
import { createAudioPlayer } from '@discordjs/voice';
|
|
10
|
+
import { buildAgentSettings, createAgentAdapter, isPatchLikeOutput, shellSplit } from './agent_adapters.mjs';
|
|
22
11
|
import {
|
|
23
12
|
appendJsonl,
|
|
24
13
|
createLatencyTurn,
|
|
@@ -26,9 +15,28 @@ import {
|
|
|
26
15
|
readJsonlRecords,
|
|
27
16
|
summarizeLatencyRecords,
|
|
28
17
|
} from './latency_metrics.mjs';
|
|
29
|
-
import {
|
|
30
|
-
|
|
31
|
-
|
|
18
|
+
import {
|
|
19
|
+
isPlanEntryUtterance,
|
|
20
|
+
parsePlanOutput,
|
|
21
|
+
parseVoiceCommand as parsePlanVoiceCommand,
|
|
22
|
+
applyCommand as applyPlanCommand,
|
|
23
|
+
renderFinalPlan,
|
|
24
|
+
planModePreamble,
|
|
25
|
+
planExecutionPreamble,
|
|
26
|
+
parseDecisionAnswer,
|
|
27
|
+
renderDecisionPrompt,
|
|
28
|
+
renderResolvedDecisions,
|
|
29
|
+
} from './plan_mode.mjs';
|
|
30
|
+
import {
|
|
31
|
+
parseAgentRoutingCommand,
|
|
32
|
+
renderAgentPrefix,
|
|
33
|
+
buildCrossAgentPrompt,
|
|
34
|
+
isAgentRoutingDecision,
|
|
35
|
+
buildFallbackDecision,
|
|
36
|
+
isRoutingOnlyUtterance,
|
|
37
|
+
} from './agent_routing.mjs';
|
|
38
|
+
import { createSessionOntology } from './session_ontology.mjs';
|
|
39
|
+
import { parseResearchCommand, runResearchTurn } from './research_mode.mjs';
|
|
32
40
|
import { buildTtsSettings } from './tts_settings.mjs';
|
|
33
41
|
import { createTtsBackend } from './tts_backends.mjs';
|
|
34
42
|
import {
|
|
@@ -43,17 +51,30 @@ import {
|
|
|
43
51
|
} from './tts_voice_config.mjs';
|
|
44
52
|
import { createBridgeLogger, createTransientErrorReporter, isTransientNetworkError } from './bridge_logger.mjs';
|
|
45
53
|
import { createBridgeState } from './bridge_state.mjs';
|
|
54
|
+
import { createBridge } from './bridge_context.mjs';
|
|
55
|
+
import { createVoiceIO } from './voice_io.mjs';
|
|
56
|
+
import { createTtsPlayer } from './tts_player.mjs';
|
|
57
|
+
import { createUtteranceRouter } from './utterance_router.mjs';
|
|
58
|
+
import { createProgressHandler } from './progress_handler.mjs';
|
|
59
|
+
import { createNotificationHandler } from './notification_handler.mjs';
|
|
60
|
+
import { createTtsRuntime } from './tts_runtime.mjs';
|
|
61
|
+
import { createDiscordVoiceSetup } from './discord_voice_setup.mjs';
|
|
62
|
+
import { createAgentTurnLifecycle } from './agent_turn.mjs';
|
|
63
|
+
import { createDiscordCommandRouter } from './discord_command_router.mjs';
|
|
64
|
+
import { createVoiceTurnRunner } from './voice_turn_runner.mjs';
|
|
65
|
+
import { createPlanDispatcher } from './plan_dispatcher.mjs';
|
|
46
66
|
import { sendDiscordText, splitDiscordMessage } from './discord_text.mjs';
|
|
47
|
-
import { progressTtsCacheFileName } from './progress_cache.mjs';
|
|
48
67
|
import { shouldPassWhisperLanguage, voiceLanguageCommandFromTranscript, languagePreset } from './language_config.mjs';
|
|
49
|
-
import {
|
|
50
|
-
import {
|
|
68
|
+
import { whisperFailureMessage, whisperTimeoutMs } from './stt_whisper.mjs';
|
|
69
|
+
import { formatRestartCompleteNotice } from './restart_notice.mjs';
|
|
70
|
+
import {
|
|
71
|
+
formatRecentDiscordContext,
|
|
72
|
+
} from './text_routing.mjs';
|
|
51
73
|
import {
|
|
52
74
|
bindProjectSessionToChannel,
|
|
53
75
|
createProjectSession,
|
|
54
76
|
listProjectSessions,
|
|
55
77
|
loadProjectSessions,
|
|
56
|
-
parseProjectSessionCommand,
|
|
57
78
|
projectSessionContextText,
|
|
58
79
|
projectSessionForChannel,
|
|
59
80
|
saveProjectSessions,
|
|
@@ -141,13 +162,20 @@ function ensureTtsVoiceConfig() {
|
|
|
141
162
|
return readTtsVoiceConfig(TTS_VOICE_CONFIG_PATH);
|
|
142
163
|
}
|
|
143
164
|
function applyVoiceConfigToProcessEnv(config = ensureTtsVoiceConfig()) {
|
|
144
|
-
const selection = effectiveTtsVoiceSelection(config,
|
|
165
|
+
const selection = effectiveTtsVoiceSelection(config, process.env);
|
|
145
166
|
const configuredVoiceLanguage = process.env.VOICE_LANGUAGE;
|
|
146
167
|
const nextEnv = applyTtsVoiceSelectionToEnv(process.env, selection);
|
|
147
168
|
if (configuredVoiceLanguage) nextEnv.VOICE_LANGUAGE = configuredVoiceLanguage;
|
|
148
169
|
for (const [key, value] of Object.entries(nextEnv)) process.env[key] = value;
|
|
149
170
|
return { config, selection };
|
|
150
171
|
}
|
|
172
|
+
function rebuildTtsRuntimeSettings(selection = null) {
|
|
173
|
+
settings.tts = buildTtsSettings(process.env, ROOT);
|
|
174
|
+
if (selection?.backend === 'edge' && selection.voice?.voice) settings.tts.edge.voice = selection.voice.voice;
|
|
175
|
+
try { bridge.ttsBackend?.close?.(); } catch (e) { warn('tts backend close failed', e?.message || e); }
|
|
176
|
+
bridge.ttsBackend = createTtsBackend(settings.tts, { execFileAsync, spawn, log, warn, onFallback: ttsFallbackNotice, voiceProvider: () => settings.tts.edge.voice });
|
|
177
|
+
return settings.tts;
|
|
178
|
+
}
|
|
151
179
|
function reloadRuntimeLanguageFromEnv() {
|
|
152
180
|
const previousWhisperLanguage = settings?.whisperLanguage;
|
|
153
181
|
const previousVoiceLanguage = settings?.voiceLanguage;
|
|
@@ -170,6 +198,7 @@ const settings = {
|
|
|
170
198
|
whisperBin: process.env.WHISPER_CPP_BIN || 'whisper-cli',
|
|
171
199
|
whisperModel: process.env.WHISPER_CPP_MODEL || path.join(ROOT, 'models', 'ggml-small-q5_1.bin'),
|
|
172
200
|
whisperLanguage: process.env.WHISPER_CPP_LANGUAGE || process.env.STT_LANGUAGE || 'ko',
|
|
201
|
+
whisperTimeoutMs: whisperTimeoutMs(process.env),
|
|
173
202
|
voiceLanguage: process.env.VOICE_LANGUAGE || process.env.WHISPER_CPP_LANGUAGE || process.env.STT_LANGUAGE || 'ko',
|
|
174
203
|
tts: buildTtsSettings(process.env, ROOT),
|
|
175
204
|
requireWakeWord: ['1', 'true', 'yes'].includes((process.env.REQUIRE_WAKE_WORD || '0').toLowerCase()),
|
|
@@ -187,20 +216,33 @@ const client = new Client({
|
|
|
187
216
|
intents: [GatewayIntentBits.Guilds, GatewayIntentBits.GuildVoiceStates, GatewayIntentBits.GuildMessages, GatewayIntentBits.MessageContent],
|
|
188
217
|
partials: [Partials.Channel],
|
|
189
218
|
});
|
|
190
|
-
|
|
219
|
+
const announcedTtsFallbacks = new Set();
|
|
220
|
+
const pendingFallbackNoticePromises = new Set();
|
|
221
|
+
function ttsFallbackNotice({ backend } = {}) {
|
|
222
|
+
if (!backend || backend === 'edge') return;
|
|
223
|
+
if (announcedTtsFallbacks.has(backend)) return;
|
|
224
|
+
announcedTtsFallbacks.add(backend);
|
|
225
|
+
const en = /^en/i.test(String(settings.voiceLanguage || ''));
|
|
226
|
+
const msg = en
|
|
227
|
+
? `${backend} synthesis failed; using Edge for the rest of this session.`
|
|
228
|
+
: `${backend} 음성 생성에 실패해서 이번 세션은 Edge로 진행할게.`;
|
|
229
|
+
const textPromise = sendText(`⚠️ ${msg}`)
|
|
230
|
+
.catch(e => warn('tts fallback notice send failed', e?.message || e));
|
|
231
|
+
pendingFallbackNoticePromises.add(textPromise);
|
|
232
|
+
textPromise.finally(() => pendingFallbackNoticePromises.delete(textPromise));
|
|
233
|
+
const speakPromise = new Promise(resolve => queueMicrotask(() => {
|
|
234
|
+
speakText(msg, null, null, { mirrorText: false })
|
|
235
|
+
.catch(e => warn('tts fallback notice speak failed', e?.message || e))
|
|
236
|
+
.finally(resolve);
|
|
237
|
+
}));
|
|
238
|
+
pendingFallbackNoticePromises.add(speakPromise);
|
|
239
|
+
speakPromise.finally(() => pendingFallbackNoticePromises.delete(speakPromise));
|
|
240
|
+
}
|
|
241
|
+
const bridge = createBridge();
|
|
242
|
+
bridge.ttsBackend = createTtsBackend(settings.tts, { execFileAsync, spawn, log, warn, onFallback: ttsFallbackNotice, voiceProvider: () => settings.tts.edge.voice });
|
|
191
243
|
const voiceCloneCapture = createVoiceCloneCaptureState({ defaultTargetPath: settings.tts.openvoice.refAudio });
|
|
192
244
|
|
|
193
|
-
|
|
194
|
-
let activeVoiceChannelId = '';
|
|
195
|
-
let activeTranscriptChannelId = '';
|
|
196
|
-
let player = createAudioPlayer();
|
|
197
|
-
let speaking = false;
|
|
198
|
-
let processing = false;
|
|
199
|
-
let activeTurnId = 0;
|
|
200
|
-
let currentAbortController = null;
|
|
201
|
-
const interruptedTurns = new Set();
|
|
202
|
-
const activeStreams = new Map();
|
|
203
|
-
let bridgeState = null;
|
|
245
|
+
bridge.player = createAudioPlayer();
|
|
204
246
|
const MAX_DEFERRED_PROCESSING_UTTERANCES = Number(process.env.MAX_DEFERRED_PROCESSING_UTTERANCES || '0');
|
|
205
247
|
const MIN_UTTERANCE_SECONDS = Number(process.env.MIN_UTTERANCE_SECONDS || '1.4');
|
|
206
248
|
const MIN_UTTERANCE_BYTES = 48000 * 2 * 2 * MIN_UTTERANCE_SECONDS;
|
|
@@ -233,7 +275,7 @@ const bridgeLogger = createBridgeLogger({
|
|
|
233
275
|
});
|
|
234
276
|
function log(...args) { bridgeLogger.log(...args); }
|
|
235
277
|
function warn(...args) { bridgeLogger.warn(...args); }
|
|
236
|
-
bridgeState = createBridgeState({ log, cleanupFile: file => fs.rm(file, { force: true }, () => {}) });
|
|
278
|
+
bridge.bridgeState = createBridgeState({ log, cleanupFile: file => fs.rm(file, { force: true }, () => {}) });
|
|
237
279
|
const reportTransientProcessError = createTransientErrorReporter({ warn });
|
|
238
280
|
function isBenignTransientNetworkError(error) {
|
|
239
281
|
return isTransientNetworkError(error);
|
|
@@ -252,27 +294,80 @@ function newLatencyTurn(userId, startedAtMs) {
|
|
|
252
294
|
}
|
|
253
295
|
|
|
254
296
|
function discardVoiceInputQueues(reason = 'config-change') {
|
|
255
|
-
return bridgeState?.discardQueues(reason) || 0;
|
|
297
|
+
return bridge.bridgeState?.discardQueues(reason) || 0;
|
|
256
298
|
}
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
let progressSpeechBatchTimer = null;
|
|
264
|
-
let progressSpeechBatchSignal = null;
|
|
265
|
-
let progressSpeechBatchStartedAt = 0;
|
|
266
|
-
let activeProgressLastEventAt = 0;
|
|
267
|
-
let lastVerboseProgressText = '';
|
|
268
|
-
let lastVerboseProgressTextAt = 0;
|
|
299
|
+
bridge.verboseProgress = Boolean(settings.agent.verboseProgress);
|
|
300
|
+
|
|
301
|
+
const STREAMING_TTS_ENABLED = ['1', 'true', 'yes', 'on'].includes(String(process.env.STREAMING_TTS || '1').toLowerCase());
|
|
302
|
+
|
|
303
|
+
bridge.smartProgressEnabled = Boolean(process.env.SMART_PROGRESS_API_KEY);
|
|
304
|
+
const VOICE_CONNECT_TIMEOUT_MS = Number(process.env.VOICE_CONNECT_TIMEOUT_MS || '60000');
|
|
269
305
|
const PROGRESS_IDLE_NOTICE_INITIAL_MS = Number(process.env.PROGRESS_IDLE_NOTICE_INITIAL_MS || process.env.PROGRESS_IDLE_NOTICE_MS || '10000');
|
|
270
306
|
const PROGRESS_IDLE_NOTICE_MAX_MS = Number(process.env.PROGRESS_IDLE_NOTICE_MAX_MS || '30000');
|
|
271
307
|
const PROGRESS_IDLE_NOTICE_MULTIPLIER = Number(process.env.PROGRESS_IDLE_NOTICE_MULTIPLIER || '1.8');
|
|
272
308
|
const PROGRESS_IDLE_CHECK_MS = Number(process.env.PROGRESS_IDLE_CHECK_MS || '5000');
|
|
273
309
|
const PROGRESS_IDLE_NOTICE_LIMIT = Number(process.env.PROGRESS_IDLE_NOTICE_LIMIT || '20');
|
|
274
310
|
const projectSessionsState = loadProjectSessions(settings.projectSessionsPath);
|
|
275
|
-
const
|
|
311
|
+
const ttsPlayer = createTtsPlayer({
|
|
312
|
+
bridge,
|
|
313
|
+
settings,
|
|
314
|
+
log,
|
|
315
|
+
warn,
|
|
316
|
+
sleep,
|
|
317
|
+
sendText,
|
|
318
|
+
refreshTtsRuntimeConfig,
|
|
319
|
+
waitEvent,
|
|
320
|
+
isAbortError,
|
|
321
|
+
STREAMING_TTS_ENABLED,
|
|
322
|
+
});
|
|
323
|
+
const { synthTTS, playAudio, speakText, beginStreamingTurn, endStreamingTurn, stopPlaybackForBargeIn } = ttsPlayer;
|
|
324
|
+
|
|
325
|
+
const progressHandler = createProgressHandler({
|
|
326
|
+
bridge,
|
|
327
|
+
settings,
|
|
328
|
+
log,
|
|
329
|
+
warn,
|
|
330
|
+
isAbortError,
|
|
331
|
+
playAudio,
|
|
332
|
+
sendText,
|
|
333
|
+
refreshTtsRuntimeConfig,
|
|
334
|
+
});
|
|
335
|
+
const {
|
|
336
|
+
ensureSmartProgressSummarizer,
|
|
337
|
+
smartProgressStatusText,
|
|
338
|
+
progressEmoji,
|
|
339
|
+
formatProgressText,
|
|
340
|
+
sendVerboseProgressText,
|
|
341
|
+
synthProgressTTS,
|
|
342
|
+
speakProgress,
|
|
343
|
+
speakImmediateNotice,
|
|
344
|
+
queueProgressSpeechText,
|
|
345
|
+
flushProgressSpeechBatch,
|
|
346
|
+
queueVerboseProgressSpeech,
|
|
347
|
+
clearProgressSpeechBatch,
|
|
348
|
+
stopProgressSpeech,
|
|
349
|
+
} = progressHandler;
|
|
350
|
+
|
|
351
|
+
const agentTurnLifecycle = createAgentTurnLifecycle({ bridge, warn });
|
|
352
|
+
|
|
353
|
+
const notificationHandler = createNotificationHandler({ bridge, client, log, warn });
|
|
354
|
+
const {
|
|
355
|
+
ensureNotifier,
|
|
356
|
+
notifyStatusText,
|
|
357
|
+
getVoiceChannelHumanCount,
|
|
358
|
+
maybeNotifyTaskComplete,
|
|
359
|
+
} = notificationHandler;
|
|
360
|
+
|
|
361
|
+
const ttsRuntime = createTtsRuntime({
|
|
362
|
+
bridge,
|
|
363
|
+
ROOT,
|
|
364
|
+
execFileAsync,
|
|
365
|
+
speakText,
|
|
366
|
+
warn,
|
|
367
|
+
persistEnvValues,
|
|
368
|
+
});
|
|
369
|
+
const { ensureSelectedTtsBackendInstalled, commandIsInstalled } = ttsRuntime;
|
|
370
|
+
|
|
276
371
|
function createBridgeAgentAdapter(agentSettings) {
|
|
277
372
|
return createAgentAdapter(agentSettings, {
|
|
278
373
|
execFileAsync,
|
|
@@ -280,43 +375,72 @@ function createBridgeAgentAdapter(agentSettings) {
|
|
|
280
375
|
log,
|
|
281
376
|
warn,
|
|
282
377
|
onProgress: event => {
|
|
283
|
-
if (!verboseProgress) return;
|
|
284
|
-
activeProgressLastEventAt = Date.now();
|
|
285
|
-
sendVerboseProgressText(event, activeProgressSignal);
|
|
286
|
-
|
|
378
|
+
if (!bridge.verboseProgress) return;
|
|
379
|
+
bridge.activeProgressLastEventAt = Date.now();
|
|
380
|
+
sendVerboseProgressText(event, bridge.activeProgressSignal);
|
|
381
|
+
if (bridge.smartProgressEnabled && process.env.SMART_PROGRESS_API_KEY) {
|
|
382
|
+
try { ensureSmartProgressSummarizer().ingest(event); }
|
|
383
|
+
catch (e) { warn('smart progress ingest failed', e?.stack || e); queueVerboseProgressSpeech(event, bridge.activeProgressSignal); }
|
|
384
|
+
} else {
|
|
385
|
+
queueVerboseProgressSpeech(event, bridge.activeProgressSignal);
|
|
386
|
+
}
|
|
387
|
+
},
|
|
388
|
+
onStdoutChunk: chunk => {
|
|
389
|
+
if (bridge.activeSentencer) {
|
|
390
|
+
try { bridge.activeSentencer.push(chunk); } catch (e) { warn('streaming sentencer push failed', e?.stack || e); }
|
|
391
|
+
}
|
|
287
392
|
},
|
|
288
393
|
});
|
|
289
394
|
}
|
|
290
395
|
const agentAdapter = createBridgeAgentAdapter(settings.agent);
|
|
291
|
-
function adapterForProjectSession(session) {
|
|
292
|
-
if (!session) return agentAdapter;
|
|
293
|
-
const key = session.slug || session.name;
|
|
294
|
-
if (!agentAdaptersBySession.has(key)) {
|
|
295
|
-
agentAdaptersBySession.set(key, createBridgeAgentAdapter({
|
|
296
|
-
...settings.agent,
|
|
297
|
-
label: `${settings.agent.label} · ${session.name}`,
|
|
298
|
-
sessionFile: session.sessionFile,
|
|
299
|
-
cwd: session.workdir,
|
|
300
|
-
projectContext: projectSessionContextText(session),
|
|
301
|
-
}));
|
|
302
|
-
}
|
|
303
|
-
return agentAdaptersBySession.get(key);
|
|
304
|
-
}
|
|
305
396
|
function resolveProjectSessionForChannel(channelId) {
|
|
306
397
|
return projectSessionForChannel(projectSessionsState, channelId) || null;
|
|
307
398
|
}
|
|
399
|
+
|
|
400
|
+
function ontologyStateFor(channelKey) {
|
|
401
|
+
const key = String(channelKey || 'default');
|
|
402
|
+
let store = bridge.ontologyByChannel.get(key);
|
|
403
|
+
if (!store) {
|
|
404
|
+
store = createSessionOntology({ channelKey: key });
|
|
405
|
+
try { store.load(); } catch {}
|
|
406
|
+
bridge.ontologyByChannel.set(key, store);
|
|
407
|
+
}
|
|
408
|
+
return store;
|
|
409
|
+
}
|
|
410
|
+
function captureOntologyFromTurn(channelKey, { prompt, answer, backend }) {
|
|
411
|
+
try {
|
|
412
|
+
const store = ontologyStateFor(channelKey);
|
|
413
|
+
const promptEntities = store.entitiesFromText(String(prompt || ''), { by: backend, kind: 'utterance' });
|
|
414
|
+
const answerEntities = store.entitiesFromText(String(answer || ''), { by: backend, kind: 'result' });
|
|
415
|
+
store.add(promptEntities);
|
|
416
|
+
store.add(answerEntities);
|
|
417
|
+
store.save();
|
|
418
|
+
} catch (e) {
|
|
419
|
+
warn('ontology capture failed', e?.message || e);
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
function resetRoutingState(channelKey) {
|
|
423
|
+
const state = routingStateFor(channelKey);
|
|
424
|
+
state.activeRouting = { backend: settings.agent.backend, sticky: false };
|
|
425
|
+
state.pendingFallbackPrompt = null;
|
|
426
|
+
}
|
|
427
|
+
function invalidateBackendAdaptersForSession(sessionSlug) {
|
|
428
|
+
if (!sessionSlug) return;
|
|
429
|
+
for (const key of Array.from(bridge.agentAdaptersByBackend.keys())) {
|
|
430
|
+
if (key.endsWith(`::${sessionSlug}`)) bridge.agentAdaptersByBackend.delete(key);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
308
433
|
function saveProjectSessionsState() {
|
|
309
434
|
saveProjectSessions(settings.projectSessionsPath, projectSessionsState);
|
|
310
435
|
}
|
|
311
|
-
|
|
312
|
-
let sensitivityModeExpiresAt = 0;
|
|
436
|
+
bridge.sensitivityMode = SENSITIVITY_MODE_DEFAULT;
|
|
313
437
|
function currentBargeInThresholds() {
|
|
314
|
-
if (sensitivityModeExpiresAt && Date.now() > sensitivityModeExpiresAt) {
|
|
315
|
-
sensitivityMode = SENSITIVITY_MODE_DEFAULT;
|
|
316
|
-
sensitivityModeExpiresAt = 0;
|
|
317
|
-
log('barge-in sensitivity mode expired; restored', sensitivityMode);
|
|
438
|
+
if (bridge.sensitivityModeExpiresAt && Date.now() > bridge.sensitivityModeExpiresAt) {
|
|
439
|
+
bridge.sensitivityMode = SENSITIVITY_MODE_DEFAULT;
|
|
440
|
+
bridge.sensitivityModeExpiresAt = 0;
|
|
441
|
+
log('barge-in sensitivity mode expired; restored', bridge.sensitivityMode);
|
|
318
442
|
}
|
|
319
|
-
return bargeInThresholdsForMode(sensitivityMode, {
|
|
443
|
+
return bargeInThresholdsForMode(bridge.sensitivityMode, {
|
|
320
444
|
minSeconds: BARGE_IN_MIN_SECONDS,
|
|
321
445
|
minMeanDb: BARGE_IN_MIN_MEAN_VOLUME_DB,
|
|
322
446
|
minMaxDb: BARGE_IN_MIN_MAX_VOLUME_DB,
|
|
@@ -336,48 +460,28 @@ function currentPlaybackBargeInThresholds() {
|
|
|
336
460
|
};
|
|
337
461
|
}
|
|
338
462
|
function setSensitivityMode(mode, reason = 'manual') {
|
|
339
|
-
sensitivityMode = mode === 'conservative' ? 'conservative' : 'normal';
|
|
340
|
-
sensitivityModeExpiresAt = sensitivityMode === 'conservative' && SENSITIVITY_OUTDOOR_SECONDS > 0
|
|
463
|
+
bridge.sensitivityMode = mode === 'conservative' ? 'conservative' : 'normal';
|
|
464
|
+
bridge.sensitivityModeExpiresAt = bridge.sensitivityMode === 'conservative' && SENSITIVITY_OUTDOOR_SECONDS > 0
|
|
341
465
|
? Date.now() + SENSITIVITY_OUTDOOR_SECONDS * 1000
|
|
342
466
|
: 0;
|
|
343
467
|
const thresholds = currentBargeInThresholds();
|
|
344
|
-
log('barge-in sensitivity mode set', sensitivityMode, 'reason', reason, 'expiresAt', sensitivityModeExpiresAt || 'never', 'thresholds', thresholds);
|
|
468
|
+
log('barge-in sensitivity mode set', bridge.sensitivityMode, 'reason', reason, 'expiresAt', bridge.sensitivityModeExpiresAt || 'never', 'thresholds', thresholds);
|
|
345
469
|
return thresholds;
|
|
346
470
|
}
|
|
347
471
|
function sensitivityStatusText() {
|
|
348
472
|
const thresholds = currentBargeInThresholds();
|
|
349
|
-
const ttl = sensitivityModeExpiresAt ? Math.max(0, Math.round((sensitivityModeExpiresAt - Date.now()) / 1000)) : 0;
|
|
473
|
+
const ttl = bridge.sensitivityModeExpiresAt ? Math.max(0, Math.round((bridge.sensitivityModeExpiresAt - Date.now()) / 1000)) : 0;
|
|
350
474
|
return sensitivityStatusTextForLanguage(thresholds, ttl, settings.voiceLanguage);
|
|
351
475
|
}
|
|
352
476
|
|
|
353
477
|
function verboseStatusText() {
|
|
354
|
-
return verboseStatusTextForLanguage(verboseProgress, settings.voiceLanguage);
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
function progressEmoji(event) {
|
|
358
|
-
const category = progressCategory(event, { language: settings.voiceLanguage })?.key;
|
|
359
|
-
return {
|
|
360
|
-
test: '🧪',
|
|
361
|
-
edit: '✏️',
|
|
362
|
-
read: '📖',
|
|
363
|
-
search: '🔎',
|
|
364
|
-
terminal: '⌨️',
|
|
365
|
-
skill: '🧰',
|
|
366
|
-
browser: '🌐',
|
|
367
|
-
tool: '🛠️',
|
|
368
|
-
agent: '🤖',
|
|
369
|
-
work: '⚙️',
|
|
370
|
-
}[category] || '⚙️';
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
function formatProgressText(event) {
|
|
374
|
-
return formatProgressMessage(event, { language: settings.voiceLanguage });
|
|
478
|
+
return verboseStatusTextForLanguage(bridge.verboseProgress, settings.voiceLanguage);
|
|
375
479
|
}
|
|
376
480
|
|
|
377
481
|
function setVerboseProgress(enabled, reason = 'manual') {
|
|
378
|
-
verboseProgress = Boolean(enabled);
|
|
379
|
-
log('verbose progress mode set', verboseProgress, 'reason', reason);
|
|
380
|
-
return verboseProgress;
|
|
482
|
+
bridge.verboseProgress = Boolean(enabled);
|
|
483
|
+
log('verbose progress mode set', bridge.verboseProgress, 'reason', reason);
|
|
484
|
+
return bridge.verboseProgress;
|
|
381
485
|
}
|
|
382
486
|
|
|
383
487
|
function persistEnvValues(values) {
|
|
@@ -388,7 +492,7 @@ function persistEnvValues(values) {
|
|
|
388
492
|
} catch (e) {
|
|
389
493
|
warn('read .env for update failed', e?.stack || e);
|
|
390
494
|
}
|
|
391
|
-
const pending = new Map(Object.entries(values));
|
|
495
|
+
const pending = new Map(Object.entries(values).filter(([, value]) => value !== undefined));
|
|
392
496
|
const updated = lines.map(line => {
|
|
393
497
|
const match = line.match(/^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=.*$/);
|
|
394
498
|
if (!match || !pending.has(match[1])) return line;
|
|
@@ -410,8 +514,8 @@ function applyRuntimeLanguage(language) {
|
|
|
410
514
|
config = updateTtsVoiceConfig(config, { voiceType: preferredVoiceTypeForLanguage(config, preset.voiceLanguage) });
|
|
411
515
|
writeTtsVoiceConfig(TTS_VOICE_CONFIG_PATH, config);
|
|
412
516
|
const { selection } = applyVoiceConfigToProcessEnv(config);
|
|
413
|
-
|
|
414
|
-
|
|
517
|
+
rebuildTtsRuntimeSettings(selection);
|
|
518
|
+
if (selection.backend !== 'edge') settings.tts.edge.voice = preset.ttsVoice;
|
|
415
519
|
process.env.VOICE_LANGUAGE = preset.voiceLanguage;
|
|
416
520
|
process.env.WHISPER_CPP_LANGUAGE = preset.sttLanguage;
|
|
417
521
|
process.env.STT_LANGUAGE = preset.sttLanguage;
|
|
@@ -440,33 +544,23 @@ function voiceChangedText(selection) {
|
|
|
440
544
|
return `Voice changed to ${selection.voice?.label || selection.voiceType}.`;
|
|
441
545
|
}
|
|
442
546
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
if (!request) return false;
|
|
446
|
-
discardVoiceInputQueues('voice-change');
|
|
447
|
-
let config = ensureTtsVoiceConfig();
|
|
448
|
-
config = updateTtsVoiceConfig(config, request);
|
|
449
|
-
writeTtsVoiceConfig(TTS_VOICE_CONFIG_PATH, config);
|
|
450
|
-
const { selection } = applyVoiceConfigToProcessEnv(config);
|
|
451
|
-
settings.tts.backend = selection.backend;
|
|
452
|
-
if (selection.backend === 'edge') settings.tts.edge.voice = selection.voice.voice;
|
|
453
|
-
if (selection.voice?.language) settings.voiceLanguage = selection.voice.language;
|
|
454
|
-
persistEnvValues({
|
|
455
|
-
TTS_BACKEND: selection.backend,
|
|
456
|
-
TTS_VOICE_TYPE: selection.voiceType,
|
|
457
|
-
TTS_VOICE: selection.backend === 'edge' ? selection.voice.voice : process.env.TTS_VOICE,
|
|
458
|
-
VOICE_LANGUAGE: settings.voiceLanguage,
|
|
459
|
-
});
|
|
460
|
-
await speakText(voiceChangedText(selection), signal);
|
|
461
|
-
return true;
|
|
547
|
+
function isCloneVoiceType(voiceType) {
|
|
548
|
+
return /^(cloned_reference|prompt_reference|cosyvoice_reference)$/i.test(String(voiceType || ''));
|
|
462
549
|
}
|
|
463
550
|
|
|
464
|
-
async function
|
|
465
|
-
|
|
466
|
-
if (!
|
|
467
|
-
const
|
|
468
|
-
|
|
469
|
-
|
|
551
|
+
async function notifyVoiceCloneSampleGapIfNeeded(selection, signal) {
|
|
552
|
+
if (!selection || selection.backend === 'edge') return;
|
|
553
|
+
if (!isCloneVoiceType(selection.voiceType)) return;
|
|
554
|
+
const ref = String(selection.voice?.voice || '').trim();
|
|
555
|
+
if (!ref) return;
|
|
556
|
+
const candidatePath = path.isAbsolute(ref) ? ref : path.resolve(ROOT, ref);
|
|
557
|
+
if (fs.existsSync(candidatePath)) return;
|
|
558
|
+
const en = /^en/i.test(String(settings.voiceLanguage || ''));
|
|
559
|
+
const msg = en
|
|
560
|
+
? `${selection.backend} needs a voice clone sample at ${ref}. Say "voice clone capture" to record one, or pick a non-clone voice.`
|
|
561
|
+
: `${selection.backend} 백엔드는 음성 클론 샘플(${ref})이 필요해. "보이스 클로닝 캡처"라고 하거나 다른 보이스를 골라줘.`;
|
|
562
|
+
await sendText(`🎙️ ${msg}`);
|
|
563
|
+
await speakText(msg, signal, null);
|
|
470
564
|
}
|
|
471
565
|
|
|
472
566
|
function isAllowed(userId) { return settings.allowedUsers.size === 0 || settings.allowedUsers.has(String(userId)); }
|
|
@@ -512,13 +606,28 @@ function spokenResultOnly(userPrompt, answer, language = settings.voiceLanguage)
|
|
|
512
606
|
async function sendText(text) {
|
|
513
607
|
return sendDiscordText({
|
|
514
608
|
client,
|
|
515
|
-
channelId: activeTranscriptChannelId || settings.transcriptChannelId,
|
|
609
|
+
channelId: bridge.activeTranscriptChannelId || settings.transcriptChannelId,
|
|
516
610
|
text,
|
|
517
611
|
log,
|
|
518
612
|
warn,
|
|
519
613
|
});
|
|
520
614
|
}
|
|
521
615
|
|
|
616
|
+
async function sendEmbed(embed, { content = '' } = {}) {
|
|
617
|
+
if (!embed) return false;
|
|
618
|
+
try {
|
|
619
|
+
const channelId = bridge.activeTranscriptChannelId || settings.transcriptChannelId;
|
|
620
|
+
if (!channelId) return false;
|
|
621
|
+
const channel = await client.channels.fetch(channelId).catch(() => null);
|
|
622
|
+
if (!channel?.send) return false;
|
|
623
|
+
await channel.send(content ? { content, embeds: [embed] } : { embeds: [embed] });
|
|
624
|
+
return true;
|
|
625
|
+
} catch (e) {
|
|
626
|
+
warn('sendEmbed failed', e?.message || e);
|
|
627
|
+
return false;
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
522
631
|
async function sendChannelText(channel, text) {
|
|
523
632
|
const body = String(text || '');
|
|
524
633
|
const chunks = splitDiscordMessage(body);
|
|
@@ -526,18 +635,6 @@ async function sendChannelText(channel, text) {
|
|
|
526
635
|
return true;
|
|
527
636
|
}
|
|
528
637
|
|
|
529
|
-
function sendVerboseProgressText(event, signal) {
|
|
530
|
-
if (!verboseProgress || !signal || signal.aborted || activeProgressSignal !== signal) return;
|
|
531
|
-
const formatted = formatProgressText(event).replace(/\s+/g, ' ').trim();
|
|
532
|
-
if (!formatted) return;
|
|
533
|
-
const message = formatted.slice(0, 1900);
|
|
534
|
-
const now = Date.now();
|
|
535
|
-
if (message === lastVerboseProgressText && now - lastVerboseProgressTextAt < 2000) return;
|
|
536
|
-
lastVerboseProgressText = message;
|
|
537
|
-
lastVerboseProgressTextAt = now;
|
|
538
|
-
void sendText(message).catch(e => warn('verbose progress text delivery failed', e?.stack || e));
|
|
539
|
-
}
|
|
540
|
-
|
|
541
638
|
function sleep(ms) {
|
|
542
639
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
543
640
|
}
|
|
@@ -553,83 +650,240 @@ function waitEvent(emitter, event, timeoutMs = 60000) {
|
|
|
553
650
|
});
|
|
554
651
|
}
|
|
555
652
|
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
653
|
+
// handleRecording lives inside utteranceRouter (extracted in Phase 4b) but
|
|
654
|
+
// voiceIO.flushUtterance needs to call it. Use a forward-declared `let` plus
|
|
655
|
+
// a thunk so the deps for createVoiceIO resolve before createUtteranceRouter
|
|
656
|
+
// is constructed.
|
|
657
|
+
let utteranceRouter;
|
|
658
|
+
let voiceTurnRunner;
|
|
659
|
+
const voiceIO = createVoiceIO({
|
|
660
|
+
bridge,
|
|
661
|
+
settings,
|
|
662
|
+
client,
|
|
663
|
+
execFileAsync,
|
|
664
|
+
log,
|
|
665
|
+
warn,
|
|
666
|
+
stamp,
|
|
667
|
+
sleep,
|
|
668
|
+
isAllowed,
|
|
669
|
+
UTTERANCE_IDLE_MS,
|
|
670
|
+
SUBSCRIBE_AFTER_SILENCE_MS,
|
|
671
|
+
MIN_UTTERANCE_BYTES,
|
|
672
|
+
MIN_MEAN_VOLUME_DB,
|
|
673
|
+
MIN_MAX_VOLUME_DB,
|
|
674
|
+
currentBargeInThresholds,
|
|
675
|
+
currentPlaybackBargeInThresholds,
|
|
676
|
+
createLiveBargeInMonitor,
|
|
677
|
+
shouldUseLivePlaybackBargeIn,
|
|
678
|
+
stopPlaybackForBargeIn,
|
|
679
|
+
analyzeAudio,
|
|
680
|
+
concatWavs,
|
|
681
|
+
saveCapturedVoiceCloneSample,
|
|
682
|
+
isBargeInCandidate,
|
|
683
|
+
validateProcessingBargeIn,
|
|
684
|
+
enqueueDeferredProcessingUtterance,
|
|
685
|
+
newLatencyTurn,
|
|
686
|
+
handleRecording: (...args) => voiceTurnRunner.handleRecording(...args),
|
|
687
|
+
});
|
|
688
|
+
const { transcribeOnce, transcribe, cleanTranscript, queueSegment, flushUtterance, subscribeUser } = voiceIO;
|
|
689
|
+
|
|
690
|
+
const discordVoiceSetup = createDiscordVoiceSetup({
|
|
691
|
+
bridge,
|
|
692
|
+
client,
|
|
693
|
+
settings,
|
|
694
|
+
ROOT,
|
|
695
|
+
log,
|
|
696
|
+
warn,
|
|
697
|
+
speakText,
|
|
698
|
+
waitEvent,
|
|
699
|
+
subscribeUser,
|
|
700
|
+
pendingFallbackNoticePromises,
|
|
701
|
+
bindProjectSessionToChannel,
|
|
702
|
+
createProjectSession,
|
|
703
|
+
resolveProjectSessionForChannel,
|
|
704
|
+
saveProjectSessionsState,
|
|
705
|
+
projectSessionsState,
|
|
706
|
+
invalidateBackendAdaptersForSession,
|
|
707
|
+
VOICE_CONNECT_TIMEOUT_MS,
|
|
708
|
+
});
|
|
709
|
+
const {
|
|
710
|
+
connectTo,
|
|
711
|
+
autoJoin,
|
|
712
|
+
findVoiceChannelBySelector,
|
|
713
|
+
voiceChannelLabel,
|
|
714
|
+
resolveVoiceChannelForAttach,
|
|
715
|
+
attachVoiceChannelToTextSession,
|
|
716
|
+
gracefulShutdown,
|
|
717
|
+
} = discordVoiceSetup;
|
|
718
|
+
utteranceRouter = createUtteranceRouter({
|
|
719
|
+
bridge,
|
|
720
|
+
agentTurnLifecycle,
|
|
721
|
+
log,
|
|
722
|
+
warn,
|
|
723
|
+
path,
|
|
724
|
+
fs,
|
|
725
|
+
ROOT,
|
|
726
|
+
TTS_VOICE_CONFIG_PATH,
|
|
727
|
+
agentAdapter,
|
|
728
|
+
settings,
|
|
729
|
+
isPlanEntryUtterance,
|
|
730
|
+
parsePlanOutput,
|
|
731
|
+
parsePlanVoiceCommand,
|
|
732
|
+
applyPlanCommand,
|
|
733
|
+
renderFinalPlan,
|
|
734
|
+
planModePreamble,
|
|
735
|
+
planExecutionPreamble,
|
|
736
|
+
parseDecisionAnswer,
|
|
737
|
+
renderDecisionPrompt,
|
|
738
|
+
renderResolvedDecisions,
|
|
739
|
+
isAgentRoutingDecision,
|
|
740
|
+
projectSessionContextText,
|
|
741
|
+
resolveProjectSessionForChannel,
|
|
742
|
+
createBridgeAgentAdapter,
|
|
743
|
+
buildAgentSettings,
|
|
744
|
+
commandIsInstalled,
|
|
745
|
+
shellSplit,
|
|
746
|
+
sendText,
|
|
747
|
+
speakText,
|
|
748
|
+
ensureTtsVoiceConfig,
|
|
749
|
+
updateTtsVoiceConfig,
|
|
750
|
+
writeTtsVoiceConfig,
|
|
751
|
+
applyVoiceConfigToProcessEnv,
|
|
752
|
+
ensureSelectedTtsBackendInstalled,
|
|
753
|
+
rebuildTtsRuntimeSettings,
|
|
754
|
+
voiceCommandFromTranscript,
|
|
755
|
+
voiceChangedText,
|
|
756
|
+
voiceLanguageCommandFromTranscript,
|
|
757
|
+
voiceCloneCommandFromText,
|
|
758
|
+
voiceCloneCapture,
|
|
759
|
+
notifyVoiceCloneSampleGapIfNeeded,
|
|
760
|
+
languageChangedText,
|
|
761
|
+
applyRuntimeLanguage,
|
|
762
|
+
persistEnvValues,
|
|
763
|
+
discardVoiceInputQueues,
|
|
764
|
+
// Phase 4b deps
|
|
765
|
+
transcribe,
|
|
766
|
+
beginStreamingTurn,
|
|
767
|
+
endStreamingTurn,
|
|
768
|
+
client,
|
|
769
|
+
isAllowed,
|
|
770
|
+
isAbortError,
|
|
771
|
+
sleep,
|
|
772
|
+
sendEmbed,
|
|
773
|
+
speakImmediateNotice,
|
|
774
|
+
reloadRuntimeLanguageFromEnv,
|
|
775
|
+
drainDeferredProcessingUtterances,
|
|
776
|
+
maybeNotifyTaskComplete,
|
|
777
|
+
ontologyStateFor,
|
|
778
|
+
captureOntologyFromTurn,
|
|
779
|
+
queueProgressSpeechText,
|
|
780
|
+
stopProgressSpeech,
|
|
781
|
+
agentAnswerHeader,
|
|
782
|
+
emptyAgentAnswer,
|
|
783
|
+
formatRecentDiscordContext,
|
|
784
|
+
formatSttResultMessage,
|
|
785
|
+
formatSttStartMessage,
|
|
786
|
+
formatVoiceErrorMessage,
|
|
787
|
+
formatWakeRejectedMessage,
|
|
788
|
+
spokenResultOnly,
|
|
789
|
+
stripWake,
|
|
790
|
+
acceptsWake,
|
|
791
|
+
sensitivityChangedSpeech,
|
|
792
|
+
sensitivityModeFromTranscript,
|
|
793
|
+
sensitivityStatusText,
|
|
794
|
+
setSensitivityMode,
|
|
795
|
+
isSensitivityOnlyRequest,
|
|
796
|
+
verboseChangedSpeech,
|
|
797
|
+
verboseModeFromTranscript,
|
|
798
|
+
verboseStatusText,
|
|
799
|
+
setVerboseProgress,
|
|
800
|
+
isVerboseOnlyRequest,
|
|
801
|
+
isRoutingOnlyUtterance,
|
|
802
|
+
parseAgentRoutingCommand,
|
|
803
|
+
renderAgentPrefix,
|
|
804
|
+
buildCrossAgentPrompt,
|
|
805
|
+
buildFallbackDecision,
|
|
806
|
+
parseResearchCommand,
|
|
807
|
+
runResearchTurn,
|
|
808
|
+
PROGRESS_IDLE_CHECK_MS,
|
|
809
|
+
PROGRESS_IDLE_NOTICE_INITIAL_MS,
|
|
810
|
+
PROGRESS_IDLE_NOTICE_LIMIT,
|
|
811
|
+
PROGRESS_IDLE_NOTICE_MAX_MS,
|
|
812
|
+
PROGRESS_IDLE_NOTICE_MULTIPLIER,
|
|
813
|
+
STT_START_VOICE_NOTICE,
|
|
814
|
+
});
|
|
815
|
+
const {
|
|
816
|
+
adapterForProjectSession,
|
|
817
|
+
routingStateFor,
|
|
818
|
+
recordUtterance,
|
|
819
|
+
clearTransientRouting,
|
|
820
|
+
adapterForBackend,
|
|
821
|
+
handleTtsVoiceCommand,
|
|
822
|
+
handleLanguageCommand,
|
|
823
|
+
handleVoiceCloneCommand,
|
|
824
|
+
interruptCurrentResponse,
|
|
825
|
+
} = utteranceRouter;
|
|
826
|
+
|
|
827
|
+
const planDispatcher = createPlanDispatcher({
|
|
828
|
+
bridge, settings,
|
|
829
|
+
sendText, speakText,
|
|
830
|
+
routingStateFor, adapterForBackend, adapterForProjectSession,
|
|
831
|
+
resolveProjectSessionForChannel,
|
|
832
|
+
isAgentRoutingDecision,
|
|
833
|
+
parseDecisionAnswer, parsePlanVoiceCommand: parsePlanVoiceCommand,
|
|
834
|
+
applyPlanCommand: applyPlanCommand,
|
|
835
|
+
parsePlanOutput,
|
|
836
|
+
renderDecisionPrompt, renderResolvedDecisions, renderFinalPlan,
|
|
837
|
+
planModePreamble, planExecutionPreamble, isPlanEntryUtterance,
|
|
838
|
+
});
|
|
839
|
+
const {
|
|
840
|
+
planChannelKey,
|
|
841
|
+
askNextDecision,
|
|
842
|
+
finalizePlanReady,
|
|
843
|
+
dispatchPlanModeUtterance,
|
|
844
|
+
planNarrationLines,
|
|
845
|
+
} = planDispatcher;
|
|
846
|
+
|
|
847
|
+
voiceTurnRunner = createVoiceTurnRunner({
|
|
848
|
+
bridge,
|
|
849
|
+
agentTurnLifecycle,
|
|
850
|
+
settings, client, log, warn, fs,
|
|
851
|
+
// From voice_io
|
|
852
|
+
transcribe,
|
|
853
|
+
// From tts_player
|
|
854
|
+
beginStreamingTurn, endStreamingTurn, speakText,
|
|
855
|
+
// From progress_handler
|
|
856
|
+
queueProgressSpeechText, stopProgressSpeech, speakImmediateNotice,
|
|
857
|
+
// From notification_handler
|
|
858
|
+
maybeNotifyTaskComplete,
|
|
859
|
+
// From utterance_router (sibling-module dispatch + adapter selection)
|
|
860
|
+
handleLanguageCommand, handleTtsVoiceCommand, handleVoiceCloneCommand,
|
|
861
|
+
dispatchPlanModeUtterance,
|
|
862
|
+
adapterForBackend, adapterForProjectSession,
|
|
863
|
+
planChannelKey, routingStateFor, recordUtterance, clearTransientRouting,
|
|
864
|
+
// Direct (imported in main or hoisted helpers)
|
|
865
|
+
isAllowed, isAbortError, sleep, sendText, sendEmbed,
|
|
866
|
+
reloadRuntimeLanguageFromEnv, drainDeferredProcessingUtterances,
|
|
867
|
+
resolveProjectSessionForChannel, projectSessionContextText,
|
|
868
|
+
ontologyStateFor, captureOntologyFromTurn,
|
|
869
|
+
formatRecentDiscordContext,
|
|
870
|
+
formatSttResultMessage, formatSttStartMessage,
|
|
871
|
+
formatVoiceErrorMessage, formatWakeRejectedMessage,
|
|
872
|
+
agentAnswerHeader, emptyAgentAnswer, spokenResultOnly,
|
|
873
|
+
stripWake, acceptsWake,
|
|
874
|
+
sensitivityChangedSpeech, sensitivityModeFromTranscript, sensitivityStatusText,
|
|
875
|
+
setSensitivityMode, isSensitivityOnlyRequest,
|
|
876
|
+
verboseChangedSpeech, verboseModeFromTranscript, verboseStatusText,
|
|
877
|
+
setVerboseProgress, isVerboseOnlyRequest,
|
|
878
|
+
isRoutingOnlyUtterance, parseAgentRoutingCommand, renderAgentPrefix,
|
|
879
|
+
buildCrossAgentPrompt, buildFallbackDecision,
|
|
880
|
+
parseDecisionAnswer,
|
|
881
|
+
parseResearchCommand, runResearchTurn,
|
|
882
|
+
PROGRESS_IDLE_CHECK_MS, PROGRESS_IDLE_NOTICE_INITIAL_MS,
|
|
883
|
+
PROGRESS_IDLE_NOTICE_LIMIT, PROGRESS_IDLE_NOTICE_MAX_MS,
|
|
884
|
+
PROGRESS_IDLE_NOTICE_MULTIPLIER, STT_START_VOICE_NOTICE,
|
|
885
|
+
});
|
|
886
|
+
const { handleRecording } = voiceTurnRunner;
|
|
633
887
|
|
|
634
888
|
function isAbortError(e) {
|
|
635
889
|
return e?.name === 'AbortError' || e?.code === 'ABORT_ERR';
|
|
@@ -669,237 +923,45 @@ async function refreshTtsRuntimeConfig() {
|
|
|
669
923
|
if (previousBackend !== settings.tts.backend) {
|
|
670
924
|
const rebuilt = buildTtsSettings(process.env, ROOT);
|
|
671
925
|
Object.assign(settings.tts, rebuilt);
|
|
672
|
-
ttsBackend
|
|
926
|
+
try { bridge.ttsBackend?.close?.(); } catch (e) { warn('tts backend close failed', e?.message || e); }
|
|
927
|
+
bridge.ttsBackend = createTtsBackend(settings.tts, { execFileAsync, spawn, log, warn, onFallback: ttsFallbackNotice, voiceProvider: () => settings.tts.edge.voice });
|
|
673
928
|
log('tts backend reloaded from voice config', settings.tts.backend, 'voiceType', selection.voiceType);
|
|
674
929
|
}
|
|
675
930
|
return selection;
|
|
676
931
|
}
|
|
677
932
|
|
|
678
|
-
async function synthTTS(text, signal) {
|
|
679
|
-
await refreshTtsRuntimeConfig();
|
|
680
|
-
let lastError = null;
|
|
681
|
-
for (let attempt = 1; attempt <= 3; attempt += 1) {
|
|
682
|
-
try {
|
|
683
|
-
log('final tts synth start', 'backend', ttsBackend.name, 'attempt', attempt, 'chars', String(text || '').length);
|
|
684
|
-
const out = await ttsBackend.synthesize(text, { signal, kind: 'final' });
|
|
685
|
-
log('final tts synth done', 'backend', ttsBackend.name, 'attempt', attempt, out, fs.statSync(out).size);
|
|
686
|
-
return out;
|
|
687
|
-
} catch (e) {
|
|
688
|
-
lastError = e;
|
|
689
|
-
if (isAbortError(e) || signal?.aborted) throw e;
|
|
690
|
-
warn('final tts synth failed', 'attempt', attempt, e?.stderr?.toString?.().slice(-500) || e?.message || e);
|
|
691
|
-
await sleep(1000 * attempt);
|
|
692
|
-
}
|
|
693
|
-
}
|
|
694
|
-
throw lastError;
|
|
695
|
-
}
|
|
696
|
-
|
|
697
|
-
async function synthProgressTTS(text, signal) {
|
|
698
|
-
await refreshTtsRuntimeConfig();
|
|
699
|
-
const ext = ttsBackend.outputExtension || 'mp3';
|
|
700
|
-
const cachePath = path.join(settings.tts.progressCacheDir, progressTtsCacheFileName({
|
|
701
|
-
backendKeyParts: ttsBackend.cacheKeyParts(),
|
|
702
|
-
text,
|
|
703
|
-
ext,
|
|
704
|
-
}));
|
|
705
|
-
if (fs.existsSync(cachePath) && fs.statSync(cachePath).size > 0) {
|
|
706
|
-
log('progress tts cache hit', text, cachePath);
|
|
707
|
-
return cachePath;
|
|
708
|
-
}
|
|
709
|
-
log('progress tts cache miss', text);
|
|
710
|
-
const tmp = await ttsBackend.synthesize(text, { signal, kind: 'progress' });
|
|
711
|
-
fs.renameSync(tmp, cachePath);
|
|
712
|
-
return cachePath;
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
async function playAudio(file, { deleteAfter = true } = {}) {
|
|
716
|
-
if (!connection) return;
|
|
717
|
-
speaking = true;
|
|
718
|
-
try {
|
|
719
|
-
const resource = createAudioResource(file, { inputType: StreamType.Arbitrary, inlineVolume: true });
|
|
720
|
-
resource.volume?.setVolume(settings.tts.volume);
|
|
721
|
-
player.play(resource);
|
|
722
|
-
connection.subscribe(player);
|
|
723
|
-
await waitEvent(player, AudioPlayerStatus.Idle, 120000).catch(() => {});
|
|
724
|
-
} finally {
|
|
725
|
-
speaking = false;
|
|
726
|
-
if (deleteAfter) fs.rm(file, { force: true }, () => {});
|
|
727
|
-
}
|
|
728
|
-
}
|
|
729
|
-
|
|
730
|
-
async function speakText(text, signal, metricsTurn = null, options = {}) {
|
|
731
|
-
const chunks = splitForTTS(text, settings.tts.maxChars);
|
|
732
|
-
if (!chunks.length) return;
|
|
733
|
-
if (options.mirrorText !== false) {
|
|
734
|
-
await sendText(`${options.mirrorPrefix || '🔊 음성으로 읽는 내용'}:\n${String(text || '')}`);
|
|
735
|
-
}
|
|
736
|
-
log('TTS chunks', chunks.length, 'maxChars', settings.tts.maxChars, 'backend', ttsBackend.name);
|
|
737
|
-
const playbackGeneration = speechPlaybackGeneration;
|
|
738
|
-
const playbackStopped = () => playbackGeneration !== speechPlaybackGeneration;
|
|
739
|
-
let synthMs = 0;
|
|
740
|
-
let playMs = 0;
|
|
741
|
-
const ttsStart = Date.now();
|
|
742
|
-
await playChunkedTTSWithPrefetch(chunks, {
|
|
743
|
-
signal,
|
|
744
|
-
log,
|
|
745
|
-
synth: async chunk => {
|
|
746
|
-
if (playbackStopped()) return null;
|
|
747
|
-
const start = Date.now();
|
|
748
|
-
try { return await synthTTS(chunk, signal); }
|
|
749
|
-
finally { synthMs += Date.now() - start; }
|
|
750
|
-
},
|
|
751
|
-
play: async file => {
|
|
752
|
-
if (playbackStopped()) {
|
|
753
|
-
await fs.promises.rm(file, { force: true }).catch(() => {});
|
|
754
|
-
return;
|
|
755
|
-
}
|
|
756
|
-
const start = Date.now();
|
|
757
|
-
try { return await playAudio(file); }
|
|
758
|
-
finally { playMs += Date.now() - start; }
|
|
759
|
-
},
|
|
760
|
-
cleanup: file => fs.promises.rm(file, { force: true }),
|
|
761
|
-
});
|
|
762
|
-
metricsTurn?.stage('tts_synth', synthMs, { ttsChunks: chunks.length, spokenChars: String(text || '').length });
|
|
763
|
-
metricsTurn?.stage('tts_play', playMs);
|
|
764
|
-
metricsTurn?.stage('tts_total', Date.now() - ttsStart);
|
|
765
|
-
}
|
|
766
|
-
|
|
767
|
-
async function speakProgress(text, signal) {
|
|
768
|
-
if (signal?.aborted) return;
|
|
769
|
-
try {
|
|
770
|
-
const mp3 = await synthProgressTTS(text, signal);
|
|
771
|
-
if (signal?.aborted) return;
|
|
772
|
-
await playAudio(mp3, { deleteAfter: false });
|
|
773
|
-
} catch (e) {
|
|
774
|
-
if (!isAbortError(e)) warn('progress tts failed', e?.stack || e);
|
|
775
|
-
}
|
|
776
|
-
}
|
|
777
|
-
|
|
778
|
-
async function speakImmediateNotice(text, signal, reason = 'notice') {
|
|
779
|
-
if (signal?.aborted) return;
|
|
780
|
-
try {
|
|
781
|
-
log('immediate notice speech', reason, 'text', String(text || '').slice(0, 80));
|
|
782
|
-
const mp3 = await synthProgressTTS(text, signal);
|
|
783
|
-
if (signal?.aborted) return;
|
|
784
|
-
await playAudio(mp3, { deleteAfter: false });
|
|
785
|
-
} catch (e) {
|
|
786
|
-
if (!isAbortError(e)) warn('immediate notice speech failed', reason, e?.stack || e);
|
|
787
|
-
}
|
|
788
|
-
}
|
|
789
|
-
|
|
790
|
-
function queueProgressSpeechText(text, signal, reason = 'status') {
|
|
791
|
-
const spoken = String(text || '').replace(/\s+/g, ' ').trim();
|
|
792
|
-
if (!spoken || !signal || signal.aborted || activeProgressSignal !== signal) return;
|
|
793
|
-
verboseProgressSpeechQueue = verboseProgressSpeechQueue
|
|
794
|
-
.catch(() => {})
|
|
795
|
-
.then(async () => {
|
|
796
|
-
if (signal.aborted || activeProgressSignal !== signal || !processing) return;
|
|
797
|
-
log('progress speech queued', reason, 'text', spoken);
|
|
798
|
-
await speakProgress(spoken, signal);
|
|
799
|
-
});
|
|
800
|
-
}
|
|
801
|
-
|
|
802
|
-
function flushProgressSpeechBatch(signal, reason = 'timer') {
|
|
803
|
-
if (!signal || signal.aborted || activeProgressSignal !== signal) return;
|
|
804
|
-
if (progressSpeechBatchTimer) {
|
|
805
|
-
clearTimeout(progressSpeechBatchTimer);
|
|
806
|
-
progressSpeechBatchTimer = null;
|
|
807
|
-
}
|
|
808
|
-
const events = progressSpeechBatch;
|
|
809
|
-
progressSpeechBatch = [];
|
|
810
|
-
progressSpeechBatchSignal = null;
|
|
811
|
-
progressSpeechBatchStartedAt = 0;
|
|
812
|
-
const text = summarizeProgressEvents(events, { maxCategories: 3, language: settings.voiceLanguage });
|
|
813
|
-
if (!text) return;
|
|
814
|
-
queueProgressSpeechText(text, signal, `batch-${reason}-${events.length}`);
|
|
815
|
-
}
|
|
816
|
-
|
|
817
|
-
function queueVerboseProgressSpeech(event, signal) {
|
|
818
|
-
if (!verboseProgress || !signal || signal.aborted || activeProgressSignal !== signal) return;
|
|
819
|
-
const text = String(event || '').replace(/\s+/g, ' ').trim().slice(0, 120);
|
|
820
|
-
if (!text) return;
|
|
821
|
-
if (progressSpeechBatchSignal && progressSpeechBatchSignal !== signal) {
|
|
822
|
-
progressSpeechBatch = [];
|
|
823
|
-
if (progressSpeechBatchTimer) clearTimeout(progressSpeechBatchTimer);
|
|
824
|
-
progressSpeechBatchTimer = null;
|
|
825
|
-
progressSpeechBatchStartedAt = 0;
|
|
826
|
-
}
|
|
827
|
-
progressSpeechBatchSignal = signal;
|
|
828
|
-
if (!progressSpeechBatchStartedAt) progressSpeechBatchStartedAt = Date.now();
|
|
829
|
-
progressSpeechBatch.push(text);
|
|
830
|
-
const elapsedMs = Date.now() - progressSpeechBatchStartedAt;
|
|
831
|
-
const ratePerSecond = progressSpeechBatch.length / Math.max(0.2, elapsedMs / 1000);
|
|
832
|
-
const maxBatchEvents = ratePerSecond >= 6 ? 5 : ratePerSecond >= 3 ? 4 : 3;
|
|
833
|
-
const batchDelayMs = ratePerSecond >= 6 ? 650 : ratePerSecond >= 3 ? 550 : 450;
|
|
834
|
-
if (progressSpeechBatch.length >= maxBatchEvents) {
|
|
835
|
-
flushProgressSpeechBatch(signal, 'full');
|
|
836
|
-
return;
|
|
837
|
-
}
|
|
838
|
-
if (progressSpeechBatchTimer) clearTimeout(progressSpeechBatchTimer);
|
|
839
|
-
progressSpeechBatchTimer = setTimeout(() => flushProgressSpeechBatch(signal, 'timer'), batchDelayMs);
|
|
840
|
-
}
|
|
841
|
-
|
|
842
|
-
function clearProgressSpeechBatch(signal = activeProgressSignal) {
|
|
843
|
-
if (progressSpeechBatchTimer) {
|
|
844
|
-
clearTimeout(progressSpeechBatchTimer);
|
|
845
|
-
progressSpeechBatchTimer = null;
|
|
846
|
-
}
|
|
847
|
-
if (!signal || progressSpeechBatchSignal === signal) {
|
|
848
|
-
progressSpeechBatch = [];
|
|
849
|
-
progressSpeechBatchSignal = null;
|
|
850
|
-
progressSpeechBatchStartedAt = 0;
|
|
851
|
-
}
|
|
852
|
-
}
|
|
853
|
-
|
|
854
|
-
function stopProgressSpeech(signal, reason = 'final-answer') {
|
|
855
|
-
if (activeProgressSignal !== signal) return;
|
|
856
|
-
clearProgressSpeechBatch(signal);
|
|
857
|
-
activeProgressSignal = null;
|
|
858
|
-
if (activeProgressAbortController && !activeProgressAbortController.signal.aborted) {
|
|
859
|
-
try { activeProgressAbortController.abort(); } catch (e) { warn('abort progress speech failed', e?.stack || e); }
|
|
860
|
-
}
|
|
861
|
-
if (speaking) {
|
|
862
|
-
log('stop progress speech before final answer', reason);
|
|
863
|
-
try { player.stop(true); } catch (e) { warn('stop progress speech failed', e?.stack || e); }
|
|
864
|
-
speaking = false;
|
|
865
|
-
}
|
|
866
|
-
}
|
|
867
|
-
|
|
868
933
|
async function handleTextAgentMessage(msg, text, { speakResponse = false } = {}) {
|
|
869
|
-
if (processing) {
|
|
934
|
+
if (bridge.processing) {
|
|
870
935
|
await msg.reply('지금 이전 작업을 처리 중이야. 끝나면 다시 보내줘.');
|
|
871
936
|
return;
|
|
872
937
|
}
|
|
873
|
-
|
|
874
|
-
const controller =
|
|
875
|
-
currentAbortController = controller;
|
|
876
|
-
const signal = controller.signal;
|
|
877
|
-
const progressController = new AbortController();
|
|
878
|
-
activeProgressAbortController = progressController;
|
|
879
|
-
activeProgressSignal = progressController.signal;
|
|
880
|
-
activeProgressLastEventAt = Date.now();
|
|
881
|
-
const previousTranscriptChannelId = activeTranscriptChannelId;
|
|
938
|
+
const turn = agentTurnLifecycle.start();
|
|
939
|
+
const { controller, signal, progressController } = turn;
|
|
882
940
|
const session = resolveProjectSessionForChannel(msg.channelId);
|
|
883
|
-
activeTranscriptChannelId = session?.transcriptChannelId || msg.channelId;
|
|
941
|
+
bridge.activeTranscriptChannelId = session?.transcriptChannelId || msg.channelId;
|
|
884
942
|
const selectedAgentAdapter = adapterForProjectSession(session);
|
|
885
943
|
const projectContext = projectSessionContextText(session);
|
|
944
|
+
const recentDiscordContext = formatRecentDiscordContext(bridge.recentDiscordTextByChannel, {
|
|
945
|
+
channelId: bridge.activeTranscriptChannelId,
|
|
946
|
+
});
|
|
886
947
|
const plan = {
|
|
887
948
|
task: true,
|
|
888
949
|
label: selectedAgentAdapter.label,
|
|
889
|
-
verboseProgress,
|
|
950
|
+
verboseProgress: bridge.verboseProgress,
|
|
890
951
|
language: settings.voiceLanguage,
|
|
891
952
|
cwd: session?.workdir,
|
|
892
953
|
projectContext,
|
|
954
|
+
recentDiscordContext,
|
|
893
955
|
};
|
|
894
956
|
const sessionBefore = selectedAgentAdapter.readSessionId?.();
|
|
895
|
-
log('text agent request start', selectedAgentAdapter.label, sessionBefore ? 'resume-existing-session' : 'new-session', 'verbose', verboseProgress, session ? `project=${session.slug}` : 'project=default');
|
|
957
|
+
log('text agent request start', selectedAgentAdapter.label, sessionBefore ? 'resume-existing-session' : 'new-session', 'verbose', bridge.verboseProgress, session ? `project=${session.slug}` : 'project=default');
|
|
896
958
|
try {
|
|
897
959
|
const result = await selectedAgentAdapter.run(text, signal, plan);
|
|
898
960
|
const answer = result.answer || emptyAgentAnswer(settings.voiceLanguage);
|
|
899
961
|
const fullAnswerText = `${agentAnswerHeader(settings.voiceLanguage, selectedAgentAdapter.label)}\n${answer}`;
|
|
900
962
|
await sendChannelText(msg.channel, fullAnswerText);
|
|
901
963
|
stopProgressSpeech(progressController.signal, 'text-agent-answer-ready');
|
|
902
|
-
if (speakResponse && connection) {
|
|
964
|
+
if (speakResponse && bridge.connection) {
|
|
903
965
|
const spokenAnswer = spokenResultOnly(text, answer, settings.voiceLanguage);
|
|
904
966
|
await speakText(spokenAnswer, signal, null, { mirrorText: false });
|
|
905
967
|
}
|
|
@@ -908,15 +970,11 @@ async function handleTextAgentMessage(msg, text, { speakResponse = false } = {})
|
|
|
908
970
|
warn('text agent request failed', e?.stack || e);
|
|
909
971
|
await sendChannelText(msg.channel, formatVoiceErrorMessage(settings.voiceLanguage, String(e?.message || e).slice(0, 800)));
|
|
910
972
|
} finally {
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
if (activeProgressSignal === progressController.signal) activeProgressSignal = null;
|
|
915
|
-
if (activeProgressAbortController?.signal === progressController.signal) activeProgressAbortController = null;
|
|
973
|
+
// Text-path-only behaviour pre-refactor: drain the verbose-progress batch
|
|
974
|
+
// before tearing the controllers down. Kept explicit so the lifecycle's
|
|
975
|
+
// finish() can stay path-agnostic.
|
|
916
976
|
clearProgressSpeechBatch(progressController.signal);
|
|
917
|
-
|
|
918
|
-
activeTranscriptChannelId = previousTranscriptChannelId;
|
|
919
|
-
processing = false;
|
|
977
|
+
agentTurnLifecycle.finish(turn);
|
|
920
978
|
}
|
|
921
979
|
}
|
|
922
980
|
|
|
@@ -940,53 +998,6 @@ async function saveCapturedVoiceCloneSample(userId, wavPath, pcmBytes, segments,
|
|
|
940
998
|
return true;
|
|
941
999
|
}
|
|
942
1000
|
|
|
943
|
-
async function handleVoiceCloneCommand(userId, prompt, signal = null) {
|
|
944
|
-
const command = voiceCloneCommandFromText(prompt);
|
|
945
|
-
if (!command) return false;
|
|
946
|
-
if (command.action === 'cancel') {
|
|
947
|
-
const cancelled = voiceCloneCapture.cancel(userId);
|
|
948
|
-
await sendText(cancelled ? '🎙️ 보이스 클로닝 샘플 캡처를 취소했어.' : '🎙️ 대기 중인 보이스 클로닝 샘플 캡처가 없어.');
|
|
949
|
-
await speakText(cancelled ? '목소리 샘플 녹음 대기를 취소했어.' : '대기 중인 목소리 샘플 녹음은 없어.', signal);
|
|
950
|
-
return true;
|
|
951
|
-
}
|
|
952
|
-
if (command.action === 'status') {
|
|
953
|
-
const current = voiceCloneCapture.current();
|
|
954
|
-
const status = current?.userId === String(userId)
|
|
955
|
-
? `🎙️ 다음 유효한 음성을 ${path.relative(ROOT, current.targetPath)}에 저장할게.`
|
|
956
|
-
: '🎙️ 지금 대기 중인 보이스 클로닝 샘플 캡처는 없어.';
|
|
957
|
-
await sendText(status);
|
|
958
|
-
await speakText(current?.userId === String(userId) ? '다음에 말하는 목소리를 샘플로 저장할게.' : '대기 중인 목소리 샘플 녹음은 없어.', signal);
|
|
959
|
-
return true;
|
|
960
|
-
}
|
|
961
|
-
const armed = voiceCloneCapture.arm({ userId, source: 'voice-command' });
|
|
962
|
-
await sendText(`🎙️ 보이스 클로닝 샘플 캡처 대기 중. 다음 10초에서 30초 정도 말하면 ${path.relative(ROOT, armed.targetPath)}에 저장할게.`);
|
|
963
|
-
await speakText('좋아. 다음에 10초에서 30초 정도 말하면 그 음성을 목소리 샘플로 저장할게.', signal);
|
|
964
|
-
return true;
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
function stopPlaybackForBargeIn(userId, reason = 'playback-barge-in') {
|
|
968
|
-
if (!speaking) return false;
|
|
969
|
-
log('stop playback for barge-in', 'byUser', userId, 'reason', reason, 'speaking', speaking, 'processing', processing, 'turn', activeTurnId);
|
|
970
|
-
speechPlaybackGeneration += 1;
|
|
971
|
-
try { player.stop(true); } catch (e) { warn('stop playback failed', e?.stack || e); }
|
|
972
|
-
speaking = false;
|
|
973
|
-
return true;
|
|
974
|
-
}
|
|
975
|
-
|
|
976
|
-
function interruptCurrentResponse(userId, reason = 'barge-in') {
|
|
977
|
-
if (!speaking && !processing) return false;
|
|
978
|
-
const turnId = activeTurnId;
|
|
979
|
-
if (turnId) interruptedTurns.add(turnId);
|
|
980
|
-
log('interrupt current response', 'byUser', userId, 'reason', reason, 'speaking', speaking, 'processing', processing, 'turn', turnId);
|
|
981
|
-
if (currentAbortController && !currentAbortController.signal.aborted) {
|
|
982
|
-
try { currentAbortController.abort(); } catch (e) { warn('abort current response failed', e?.stack || e); }
|
|
983
|
-
}
|
|
984
|
-
try { player.stop(true); } catch (e) { warn('stop playback failed', e?.stack || e); }
|
|
985
|
-
speaking = false;
|
|
986
|
-
processing = false;
|
|
987
|
-
return true;
|
|
988
|
-
}
|
|
989
|
-
|
|
990
1001
|
function acceptsWake(text) {
|
|
991
1002
|
if (!settings.requireWakeWord) return true;
|
|
992
1003
|
const low = text.toLowerCase();
|
|
@@ -1034,17 +1045,6 @@ async function concatWavs(files, output) {
|
|
|
1034
1045
|
}
|
|
1035
1046
|
}
|
|
1036
1047
|
|
|
1037
|
-
function queueSegment(userId, file, pcmBytes, startedAtMs = Date.now(), endedAtMs = Date.now()) {
|
|
1038
|
-
const pending = bridgeState.appendSegment(userId, {
|
|
1039
|
-
file,
|
|
1040
|
-
pcmBytes,
|
|
1041
|
-
startedAtMs,
|
|
1042
|
-
endedAtMs,
|
|
1043
|
-
timerFactory: () => setTimeout(() => flushUtterance(userId).catch(e => warn('flushUtterance failed', userId, e?.stack || e)), UTTERANCE_IDLE_MS),
|
|
1044
|
-
});
|
|
1045
|
-
log('queued segment', userId, 'segments', pending.files.length, 'totalPcmBytes', pending.pcmBytes, 'idleMs', UTTERANCE_IDLE_MS, 'epoch', pending.epoch);
|
|
1046
|
-
}
|
|
1047
|
-
|
|
1048
1048
|
function isBargeInCandidate(pcmBytes, levels) {
|
|
1049
1049
|
const thresholds = currentBargeInThresholds();
|
|
1050
1050
|
return isValidatedBargeInCandidate(pcmBytes, levels, thresholds);
|
|
@@ -1052,7 +1052,7 @@ function isBargeInCandidate(pcmBytes, levels) {
|
|
|
1052
1052
|
|
|
1053
1053
|
function enqueueDeferredProcessingUtterance({ userId, wavPath, pcmBytes, segments, startedAtMs = Date.now() }) {
|
|
1054
1054
|
const item = { userId, wavPath, pcmBytes, segments, startedAtMs };
|
|
1055
|
-
const result = bridgeState.enqueueDeferred(item, enqueueDeferredUtterance, MAX_DEFERRED_PROCESSING_UTTERANCES);
|
|
1055
|
+
const result = bridge.bridgeState.enqueueDeferred(item, enqueueDeferredUtterance, MAX_DEFERRED_PROCESSING_UTTERANCES);
|
|
1056
1056
|
if (!result.queued) {
|
|
1057
1057
|
log('drop deferred utterance because queue disabled', userId, wavPath, 'max', MAX_DEFERRED_PROCESSING_UTTERANCES);
|
|
1058
1058
|
return false;
|
|
@@ -1060,15 +1060,15 @@ function enqueueDeferredProcessingUtterance({ userId, wavPath, pcmBytes, segment
|
|
|
1060
1060
|
if (result.dropped) {
|
|
1061
1061
|
log('drop oldest deferred utterance because queue is full', result.dropped?.userId, result.dropped?.wavPath);
|
|
1062
1062
|
}
|
|
1063
|
-
log('queued deferred utterance while processing', userId, wavPath, 'queueSize', bridgeState.deferredSize(), 'epoch', bridgeState.currentEpoch());
|
|
1063
|
+
log('queued deferred utterance while processing', userId, wavPath, 'queueSize', bridge.bridgeState.deferredSize(), 'epoch', bridge.bridgeState.currentEpoch());
|
|
1064
1064
|
return true;
|
|
1065
1065
|
}
|
|
1066
1066
|
|
|
1067
1067
|
async function drainDeferredProcessingUtterances() {
|
|
1068
|
-
if (processing || bridgeState.deferredSize() === 0) return;
|
|
1069
|
-
const next = bridgeState.shiftDeferred();
|
|
1068
|
+
if (bridge.processing || bridge.bridgeState.deferredSize() === 0) return;
|
|
1069
|
+
const next = bridge.bridgeState.shiftDeferred();
|
|
1070
1070
|
if (!next) return;
|
|
1071
|
-
log('drain deferred utterance', next.userId, next.wavPath, 'remaining', bridgeState.deferredSize());
|
|
1071
|
+
log('drain deferred utterance', next.userId, next.wavPath, 'remaining', bridge.bridgeState.deferredSize());
|
|
1072
1072
|
const metricsTurn = newLatencyTurn(next.userId, next.startedAtMs || Date.now());
|
|
1073
1073
|
metricsTurn.mark('voice_first_packet', next.startedAtMs || Date.now());
|
|
1074
1074
|
metricsTurn.mark('utterance_flush');
|
|
@@ -1092,368 +1092,6 @@ async function validateProcessingBargeIn(userId, wavPath, pcmBytes, segments) {
|
|
|
1092
1092
|
return { action: 'interrupt', text };
|
|
1093
1093
|
}
|
|
1094
1094
|
|
|
1095
|
-
async function flushUtterance(userId) {
|
|
1096
|
-
const pending = bridgeState.deletePending(userId);
|
|
1097
|
-
if (!pending) return;
|
|
1098
|
-
if (pending.timer) clearTimeout(pending.timer);
|
|
1099
|
-
const files = pending.files;
|
|
1100
|
-
const pcmBytes = pending.pcmBytes;
|
|
1101
|
-
const metricsTurn = newLatencyTurn(userId, pending.firstPacketAt || Date.now());
|
|
1102
|
-
metricsTurn.mark('voice_first_packet', pending.firstPacketAt || Date.now());
|
|
1103
|
-
metricsTurn.mark('voice_segment_end', pending.lastSegmentEndAt || Date.now());
|
|
1104
|
-
metricsTurn.mark('utterance_flush');
|
|
1105
|
-
metricsTurn.addMeta({ segments: files.length, pcmBytes, epoch: pending.epoch });
|
|
1106
|
-
if (pending.epoch !== bridgeState.currentEpoch()) {
|
|
1107
|
-
log('drop stale utterance after voice input queue reset', userId, 'utteranceEpoch', pending.epoch, 'currentEpoch', bridgeState.currentEpoch());
|
|
1108
|
-
for (const file of files) fs.rm(file, { force: true }, () => {});
|
|
1109
|
-
metricsTurn.finish({ status: 'stale_after_config_change' });
|
|
1110
|
-
return;
|
|
1111
|
-
}
|
|
1112
|
-
if (pcmBytes < MIN_UTTERANCE_BYTES) {
|
|
1113
|
-
log('skip short utterance', userId, 'segments', files.length, 'pcmBytes', pcmBytes, 'minBytes', MIN_UTTERANCE_BYTES);
|
|
1114
|
-
metricsTurn.finish({ status: 'skip_short' });
|
|
1115
|
-
return;
|
|
1116
|
-
}
|
|
1117
|
-
const merged = path.join(settings.debugDir, `utterance-merged-${stamp()}-${userId}.wav`);
|
|
1118
|
-
await concatWavs(files, merged);
|
|
1119
|
-
const levels = await analyzeAudio(merged);
|
|
1120
|
-
log('utterance levels', userId, 'segments', files.length, 'pcmBytes', pcmBytes, 'meanDb', levels.meanDb, 'maxDb', levels.maxDb);
|
|
1121
|
-
if (await saveCapturedVoiceCloneSample(userId, merged, pcmBytes, files.length)) {
|
|
1122
|
-
metricsTurn.addMeta({ meanDb: levels.meanDb, maxDb: levels.maxDb });
|
|
1123
|
-
metricsTurn.finish({ status: 'voice_clone_sample_saved' });
|
|
1124
|
-
return;
|
|
1125
|
-
}
|
|
1126
|
-
const candidate = isBargeInCandidate(pcmBytes, levels);
|
|
1127
|
-
if (speaking || processing) {
|
|
1128
|
-
const thresholds = currentBargeInThresholds();
|
|
1129
|
-
if (!candidate) {
|
|
1130
|
-
log('check weak barge-in for explicit stop transcript', userId, 'pcmBytes', pcmBytes, 'meanDb', levels.meanDb, 'maxDb', levels.maxDb, 'thresholdBytes', thresholds.minBytes, 'thresholds', thresholds.minMeanDb, thresholds.minMaxDb, 'mode', thresholds.mode);
|
|
1131
|
-
}
|
|
1132
|
-
const validation = await validateProcessingBargeIn(userId, merged, pcmBytes, files.length);
|
|
1133
|
-
if (validation?.action === 'interrupt') {
|
|
1134
|
-
metricsTurn.finish({ status: processing ? 'barge_in_processing_interrupt' : 'barge_in_playback_interrupt' });
|
|
1135
|
-
return;
|
|
1136
|
-
}
|
|
1137
|
-
if (processing && validation?.action === 'defer') {
|
|
1138
|
-
const queued = enqueueDeferredProcessingUtterance({
|
|
1139
|
-
userId,
|
|
1140
|
-
wavPath: merged,
|
|
1141
|
-
pcmBytes,
|
|
1142
|
-
segments: files.length,
|
|
1143
|
-
startedAtMs: pending.firstPacketAt || Date.now(),
|
|
1144
|
-
});
|
|
1145
|
-
metricsTurn.finish({ status: queued ? 'deferred_during_processing' : 'drop_deferred_during_processing' });
|
|
1146
|
-
return;
|
|
1147
|
-
}
|
|
1148
|
-
metricsTurn.finish({ status: speaking ? 'barge_in_playback_ignored' : 'barge_in_processing_ignored' });
|
|
1149
|
-
return;
|
|
1150
|
-
}
|
|
1151
|
-
// Drop only when BOTH overall energy and peak are low. Real Discord speech from this
|
|
1152
|
-
// mic can have low mean volume while still carrying intelligible peaks; using OR here
|
|
1153
|
-
// caused valid Korean utterances to be discarded as "low-energy".
|
|
1154
|
-
if (levels.meanDb < MIN_MEAN_VOLUME_DB && levels.maxDb < MIN_MAX_VOLUME_DB) {
|
|
1155
|
-
log('skip low-energy utterance', userId, 'meanDb', levels.meanDb, 'maxDb', levels.maxDb, 'thresholds', MIN_MEAN_VOLUME_DB, MIN_MAX_VOLUME_DB, 'mode', 'both-below');
|
|
1156
|
-
metricsTurn.addMeta({ meanDb: levels.meanDb, maxDb: levels.maxDb });
|
|
1157
|
-
metricsTurn.finish({ status: 'skip_low_energy' });
|
|
1158
|
-
return;
|
|
1159
|
-
}
|
|
1160
|
-
metricsTurn.addMeta({ meanDb: levels.meanDb, maxDb: levels.maxDb });
|
|
1161
|
-
await handleRecording(userId, merged, pcmBytes, files.length, metricsTurn);
|
|
1162
|
-
}
|
|
1163
|
-
|
|
1164
|
-
async function handleRecording(userId, wavPath, pcmBytes, segments = 1, metricsTurn = null) {
|
|
1165
|
-
if (processing) { log('drop while processing', userId); metricsTurn?.finish({ status: 'drop_processing' }); return; }
|
|
1166
|
-
if (!isAllowed(userId)) { warn('ignore unauthorized', userId); metricsTurn?.finish({ status: 'unauthorized' }); return; }
|
|
1167
|
-
processing = true;
|
|
1168
|
-
const turnId = ++activeTurnId;
|
|
1169
|
-
const controller = new AbortController();
|
|
1170
|
-
currentAbortController = controller;
|
|
1171
|
-
const signal = controller.signal;
|
|
1172
|
-
const sessionForVoice = resolveProjectSessionForChannel(activeVoiceChannelId || settings.transcriptChannelId);
|
|
1173
|
-
const previousTranscriptChannelId = activeTranscriptChannelId;
|
|
1174
|
-
activeTranscriptChannelId = sessionForVoice?.transcriptChannelId || settings.transcriptChannelId;
|
|
1175
|
-
try {
|
|
1176
|
-
const runtimeLanguage = reloadRuntimeLanguageFromEnv();
|
|
1177
|
-
if (runtimeLanguage.changed) {
|
|
1178
|
-
log('drop current utterance because language changed before STT', userId, 'turn', turnId, 'language', runtimeLanguage.voiceLanguage);
|
|
1179
|
-
fs.rm(wavPath, { force: true }, () => {});
|
|
1180
|
-
metricsTurn?.finish({ status: 'drop_stale_language_change' });
|
|
1181
|
-
return;
|
|
1182
|
-
}
|
|
1183
|
-
const session = resolveProjectSessionForChannel(activeVoiceChannelId || settings.transcriptChannelId);
|
|
1184
|
-
activeTranscriptChannelId = session?.transcriptChannelId || settings.transcriptChannelId;
|
|
1185
|
-
log('voice turn text target', session ? `project=${session.slug}` : 'project=default', 'channel', activeTranscriptChannelId ? 'project-or-default' : 'none');
|
|
1186
|
-
log('transcribing', userId, wavPath, 'pcmBytes', pcmBytes, 'segments', segments, 'turn', turnId);
|
|
1187
|
-
const sttNotice = formatSttStartMessage(settings.voiceLanguage);
|
|
1188
|
-
await sendText(sttNotice);
|
|
1189
|
-
const sttNoticeSpeech = STT_START_VOICE_NOTICE
|
|
1190
|
-
? speakImmediateNotice(sttNotice.replace(/^🎧\s*/u, ''), signal, 'stt-start')
|
|
1191
|
-
: Promise.resolve();
|
|
1192
|
-
const sttStart = Date.now();
|
|
1193
|
-
const text = await transcribe(wavPath);
|
|
1194
|
-
await sttNoticeSpeech;
|
|
1195
|
-
metricsTurn?.stage('stt', Date.now() - sttStart, { transcriptChars: String(text || '').length });
|
|
1196
|
-
if (interruptedTurns.has(turnId) || signal.aborted) { metricsTurn?.finish({ status: 'aborted_after_stt' }); return; }
|
|
1197
|
-
if (!text) { log('empty transcript', userId, wavPath); metricsTurn?.finish({ status: 'empty_transcript' }); return; }
|
|
1198
|
-
log(`user ${userId} said: ${text}`);
|
|
1199
|
-
await sendText(formatSttResultMessage(settings.voiceLanguage, userId, text));
|
|
1200
|
-
if (!acceptsWake(text)) { await sendText(formatWakeRejectedMessage(settings.voiceLanguage)); metricsTurn?.finish({ status: 'wake_rejected' }); return; }
|
|
1201
|
-
|
|
1202
|
-
const prompt = stripWake(text);
|
|
1203
|
-
if (await handleLanguageCommand(prompt, signal)) {
|
|
1204
|
-
metricsTurn?.finish({ status: 'language_command' });
|
|
1205
|
-
return;
|
|
1206
|
-
}
|
|
1207
|
-
if (await handleTtsVoiceCommand(prompt, signal)) {
|
|
1208
|
-
metricsTurn?.finish({ status: 'voice_command' });
|
|
1209
|
-
return;
|
|
1210
|
-
}
|
|
1211
|
-
if (await handleVoiceCloneCommand(userId, prompt, signal)) {
|
|
1212
|
-
metricsTurn?.finish({ status: 'voice_clone_command' });
|
|
1213
|
-
return;
|
|
1214
|
-
}
|
|
1215
|
-
const sensitivityRequest = sensitivityModeFromTranscript(prompt);
|
|
1216
|
-
if (sensitivityRequest) {
|
|
1217
|
-
const thresholds = setSensitivityMode(sensitivityRequest.mode, sensitivityRequest.reason);
|
|
1218
|
-
await sendText(`🎚️ ${sensitivityStatusText()}`);
|
|
1219
|
-
if (isSensitivityOnlyRequest(prompt)) {
|
|
1220
|
-
await speakText(sensitivityChangedSpeech(thresholds.mode, settings.voiceLanguage), signal, metricsTurn);
|
|
1221
|
-
metricsTurn?.finish({ status: 'sensitivity_only' });
|
|
1222
|
-
return;
|
|
1223
|
-
}
|
|
1224
|
-
}
|
|
1225
|
-
const verboseRequest = verboseModeFromTranscript(prompt);
|
|
1226
|
-
if (verboseRequest !== null) {
|
|
1227
|
-
setVerboseProgress(verboseRequest, 'voice-command');
|
|
1228
|
-
await sendText(`🔎 ${verboseStatusText()}`);
|
|
1229
|
-
if (isVerboseOnlyRequest(prompt)) {
|
|
1230
|
-
await speakText(verboseChangedSpeech(verboseRequest, settings.voiceLanguage), signal, metricsTurn);
|
|
1231
|
-
metricsTurn?.finish({ status: 'verbose_only' });
|
|
1232
|
-
return;
|
|
1233
|
-
}
|
|
1234
|
-
}
|
|
1235
|
-
const selectedAgentAdapter = adapterForProjectSession(session);
|
|
1236
|
-
const projectContext = projectSessionContextText(session);
|
|
1237
|
-
const plan = {
|
|
1238
|
-
task: true,
|
|
1239
|
-
label: selectedAgentAdapter.label,
|
|
1240
|
-
verboseProgress,
|
|
1241
|
-
language: settings.voiceLanguage,
|
|
1242
|
-
cwd: session?.workdir,
|
|
1243
|
-
projectContext,
|
|
1244
|
-
};
|
|
1245
|
-
log('Agent plan', plan.label, 'backend', selectedAgentAdapter.backend, 'task', plan.task, 'language', plan.language, session ? `project=${session.slug}` : 'project=default');
|
|
1246
|
-
const agentStart = Date.now();
|
|
1247
|
-
const progressController = new AbortController();
|
|
1248
|
-
activeProgressAbortController = progressController;
|
|
1249
|
-
activeProgressSignal = progressController.signal;
|
|
1250
|
-
activeProgressLastEventAt = Date.now();
|
|
1251
|
-
const agentPromise = selectedAgentAdapter.ask(prompt, signal, plan);
|
|
1252
|
-
let done = false;
|
|
1253
|
-
// Status announcements share one queue with verbose progress so they never
|
|
1254
|
-
// talk over each other. In verbose mode, skip the generic initial prompt;
|
|
1255
|
-
// the detailed tool/file/test events are the initial progress voice.
|
|
1256
|
-
const progressLoop = (async () => {
|
|
1257
|
-
if (!verboseProgress) {
|
|
1258
|
-
await sleep(2500);
|
|
1259
|
-
if (!done && !signal.aborted && !interruptedTurns.has(turnId)) {
|
|
1260
|
-
const initial = /^en/i.test(String(settings.voiceLanguage || ''))
|
|
1261
|
-
? 'calling the agent.'
|
|
1262
|
-
: '에이전트 호출했어. 응답 기다리는 중.';
|
|
1263
|
-
queueProgressSpeechText(initial, progressController.signal, 'generic-initial');
|
|
1264
|
-
}
|
|
1265
|
-
}
|
|
1266
|
-
let idleNotices = 0;
|
|
1267
|
-
let nextIdleNoticeMs = PROGRESS_IDLE_NOTICE_INITIAL_MS;
|
|
1268
|
-
let lastObservedProgressAt = activeProgressLastEventAt;
|
|
1269
|
-
while (!done && !signal.aborted && !interruptedTurns.has(turnId) && idleNotices < PROGRESS_IDLE_NOTICE_LIMIT) {
|
|
1270
|
-
await sleep(Math.min(PROGRESS_IDLE_CHECK_MS, nextIdleNoticeMs));
|
|
1271
|
-
if (done || signal.aborted || interruptedTurns.has(turnId)) break;
|
|
1272
|
-
if (activeProgressLastEventAt !== lastObservedProgressAt) {
|
|
1273
|
-
lastObservedProgressAt = activeProgressLastEventAt;
|
|
1274
|
-
nextIdleNoticeMs = PROGRESS_IDLE_NOTICE_INITIAL_MS;
|
|
1275
|
-
continue;
|
|
1276
|
-
}
|
|
1277
|
-
const idleMs = Date.now() - activeProgressLastEventAt;
|
|
1278
|
-
if (idleMs < nextIdleNoticeMs) continue;
|
|
1279
|
-
idleNotices += 1;
|
|
1280
|
-
activeProgressLastEventAt = Date.now();
|
|
1281
|
-
lastObservedProgressAt = activeProgressLastEventAt;
|
|
1282
|
-
const idle = /^en/i.test(String(settings.voiceLanguage || ''))
|
|
1283
|
-
? 'still working on that.'
|
|
1284
|
-
: '아직 작업 중이야.';
|
|
1285
|
-
queueProgressSpeechText(idle, progressController.signal, `idle-${idleNotices}-${Math.round(nextIdleNoticeMs / 1000)}s`);
|
|
1286
|
-
nextIdleNoticeMs = Math.min(
|
|
1287
|
-
PROGRESS_IDLE_NOTICE_MAX_MS,
|
|
1288
|
-
Math.max(nextIdleNoticeMs + 1000, Math.round(nextIdleNoticeMs * PROGRESS_IDLE_NOTICE_MULTIPLIER)),
|
|
1289
|
-
);
|
|
1290
|
-
}
|
|
1291
|
-
})().catch(e => {
|
|
1292
|
-
if (!isAbortError(e)) warn('progress loop failed', e?.stack || e);
|
|
1293
|
-
});
|
|
1294
|
-
const answer = await agentPromise.finally(() => { done = true; });
|
|
1295
|
-
metricsTurn?.stage('agent', Date.now() - agentStart, { answerChars: String(answer || '').length, backend: selectedAgentAdapter.backend });
|
|
1296
|
-
void progressLoop;
|
|
1297
|
-
if (interruptedTurns.has(turnId) || signal.aborted) { metricsTurn?.finish({ status: 'aborted_after_agent' }); return; }
|
|
1298
|
-
|
|
1299
|
-
log('Agent answer', selectedAgentAdapter.label, answer.slice(0, 200));
|
|
1300
|
-
const spokenAnswer = spokenResultOnly(prompt, answer, settings.voiceLanguage);
|
|
1301
|
-
const fullAnswerText = `${agentAnswerHeader(settings.voiceLanguage, selectedAgentAdapter.label)}\n${answer || emptyAgentAnswer(settings.voiceLanguage)}`;
|
|
1302
|
-
log('send agent answer text', 'chars', fullAnswerText.length);
|
|
1303
|
-
const answerTextDelivered = await sendText(fullAnswerText);
|
|
1304
|
-
if (!answerTextDelivered) {
|
|
1305
|
-
warn('agent answer text delivery failed; still speaking answer');
|
|
1306
|
-
}
|
|
1307
|
-
log('spoken answer', spokenAnswer.slice(0, 200));
|
|
1308
|
-
stopProgressSpeech(progressController.signal, 'agent-answer-ready');
|
|
1309
|
-
await speakText(spokenAnswer, signal, metricsTurn, { mirrorText: !answerTextDelivered });
|
|
1310
|
-
metricsTurn?.finish({ status: 'ok' });
|
|
1311
|
-
} catch (e) {
|
|
1312
|
-
if (isAbortError(e) || interruptedTurns.has(turnId)) {
|
|
1313
|
-
log('turn aborted', userId, 'turn', turnId);
|
|
1314
|
-
metricsTurn?.finish({ status: 'aborted' });
|
|
1315
|
-
return;
|
|
1316
|
-
}
|
|
1317
|
-
warn('handleRecording failed', e?.stack || e);
|
|
1318
|
-
const shortMsg = String(e?.message || e).slice(0, 800);
|
|
1319
|
-
metricsTurn?.finish({ status: 'error', error: shortMsg });
|
|
1320
|
-
await sendText(formatVoiceErrorMessage(settings.voiceLanguage, shortMsg));
|
|
1321
|
-
} finally {
|
|
1322
|
-
if (activeProgressAbortController && !activeProgressAbortController.signal.aborted) {
|
|
1323
|
-
try { activeProgressAbortController.abort(); } catch (e) { warn('abort progress speech in cleanup failed', e?.stack || e); }
|
|
1324
|
-
}
|
|
1325
|
-
if (activeProgressSignal === activeProgressAbortController?.signal) activeProgressSignal = null;
|
|
1326
|
-
activeProgressAbortController = null;
|
|
1327
|
-
if (currentAbortController === controller) currentAbortController = null;
|
|
1328
|
-
activeTranscriptChannelId = previousTranscriptChannelId;
|
|
1329
|
-
interruptedTurns.delete(turnId);
|
|
1330
|
-
if (activeTurnId === turnId) activeTurnId = 0;
|
|
1331
|
-
processing = false;
|
|
1332
|
-
if (bridgeState.deferredSize() > 0) {
|
|
1333
|
-
setImmediate(() => drainDeferredProcessingUtterances().catch(e => warn('drain deferred utterance failed', e?.stack || e)));
|
|
1334
|
-
}
|
|
1335
|
-
}
|
|
1336
|
-
}
|
|
1337
|
-
|
|
1338
|
-
function subscribeUser(receiver, userId) {
|
|
1339
|
-
if (!isAllowed(userId)) return;
|
|
1340
|
-
if (String(userId) === client.user?.id) return;
|
|
1341
|
-
const wasSpeaking = speaking;
|
|
1342
|
-
const wasProcessing = processing;
|
|
1343
|
-
if ((wasSpeaking || wasProcessing) && !activeStreams.has(userId)) {
|
|
1344
|
-
// Speaking-start alone is too noisy in Discord voice. Record and validate a
|
|
1345
|
-
// real segment first; only confirmed playback barge-in stops the current
|
|
1346
|
-
// audio chunk, and only explicit stop transcripts abort active agent work.
|
|
1347
|
-
log('possible barge-in start; waiting for segment validation', userId, 'speaking', wasSpeaking, 'processing', wasProcessing);
|
|
1348
|
-
}
|
|
1349
|
-
if (activeStreams.has(userId)) return;
|
|
1350
|
-
const pending = bridgeState.getPending(userId);
|
|
1351
|
-
if (pending?.timer) {
|
|
1352
|
-
bridgeState.clearPendingTimer(userId);
|
|
1353
|
-
log('extend pending utterance because new segment started', userId, 'segments', pending.files.length, 'totalPcmBytes', pending.pcmBytes);
|
|
1354
|
-
}
|
|
1355
|
-
|
|
1356
|
-
const file = path.join(settings.debugDir, `segment-${stamp()}-${userId}.wav`);
|
|
1357
|
-
log('subscribe user', userId, file);
|
|
1358
|
-
const opusStream = receiver.subscribe(userId, { end: { behavior: EndBehaviorType.AfterSilence, duration: SUBSCRIBE_AFTER_SILENCE_MS } });
|
|
1359
|
-
const decoder = new prism.opus.Decoder({ rate: 48000, channels: 2, frameSize: 960 });
|
|
1360
|
-
const writer = new wav.FileWriter(file, { sampleRate: 48000, channels: 2, bitDepth: 16 });
|
|
1361
|
-
activeStreams.set(userId, { opusStream, decoder, writer, file, startedAtMs: Date.now() });
|
|
1362
|
-
let pcmBytes = 0;
|
|
1363
|
-
const liveThresholds = wasSpeaking && !wasProcessing ? currentPlaybackBargeInThresholds() : currentBargeInThresholds();
|
|
1364
|
-
const liveBargeIn = shouldUseLivePlaybackBargeIn({ speaking: wasSpeaking, processing: wasProcessing }) ? createLiveBargeInMonitor({
|
|
1365
|
-
minBytes: liveThresholds.minBytes,
|
|
1366
|
-
minMeanDb: liveThresholds.minMeanDb,
|
|
1367
|
-
minMaxDb: liveThresholds.minMaxDb,
|
|
1368
|
-
requireBoth: liveThresholds.requireBoth,
|
|
1369
|
-
log,
|
|
1370
|
-
onConfirm: ({ pcmBytes: confirmedBytes, levels }) => {
|
|
1371
|
-
log('confirmed live playback barge-in before segment end', userId, 'pcmBytes', confirmedBytes, 'meanDb', levels.meanDb, 'maxDb', levels.maxDb);
|
|
1372
|
-
stopPlaybackForBargeIn(userId, 'confirmed-live-playback-barge-in');
|
|
1373
|
-
},
|
|
1374
|
-
}) : null;
|
|
1375
|
-
decoder.on('data', chunk => {
|
|
1376
|
-
pcmBytes += chunk.length;
|
|
1377
|
-
liveBargeIn?.push(chunk);
|
|
1378
|
-
});
|
|
1379
|
-
opusStream.on('error', e => warn('opus stream error', userId, e?.stack || e));
|
|
1380
|
-
decoder.on('error', e => warn('opus decoder error', userId, e?.stack || e));
|
|
1381
|
-
writer.on('error', e => warn('wav writer error', userId, e?.stack || e));
|
|
1382
|
-
opusStream.on('end', () => log('opus end', userId, 'pcmBytes', pcmBytes));
|
|
1383
|
-
writer.on('finish', () => {
|
|
1384
|
-
const streamState = activeStreams.get(userId);
|
|
1385
|
-
activeStreams.delete(userId);
|
|
1386
|
-
const endedAtMs = Date.now();
|
|
1387
|
-
log('saved segment', userId, 'pcmBytes', pcmBytes, file);
|
|
1388
|
-
queueSegment(userId, file, pcmBytes, streamState?.startedAtMs || endedAtMs, endedAtMs);
|
|
1389
|
-
});
|
|
1390
|
-
opusStream.pipe(decoder).pipe(writer);
|
|
1391
|
-
}
|
|
1392
|
-
|
|
1393
|
-
async function connectTo(channel) {
|
|
1394
|
-
if (connection) {
|
|
1395
|
-
try { connection.destroy(); } catch {}
|
|
1396
|
-
}
|
|
1397
|
-
activeVoiceChannelId = channel.id;
|
|
1398
|
-
connection = joinVoiceChannel({
|
|
1399
|
-
channelId: channel.id,
|
|
1400
|
-
guildId: channel.guild.id,
|
|
1401
|
-
adapterCreator: channel.guild.voiceAdapterCreator,
|
|
1402
|
-
selfDeaf: false,
|
|
1403
|
-
selfMute: false,
|
|
1404
|
-
});
|
|
1405
|
-
const voiceConnection = connection;
|
|
1406
|
-
voiceConnection.subscribe(player);
|
|
1407
|
-
voiceConnection.on('error', e => warn('voice connection error', e?.stack || e));
|
|
1408
|
-
voiceConnection.on('stateChange', async (oldState, newState) => {
|
|
1409
|
-
log('voice connection state', oldState.status, '->', newState.status);
|
|
1410
|
-
if (connection !== voiceConnection) {
|
|
1411
|
-
log('ignore stale voice connection state', oldState.status, '->', newState.status);
|
|
1412
|
-
return;
|
|
1413
|
-
}
|
|
1414
|
-
if (newState.status === VoiceConnectionStatus.Disconnected) {
|
|
1415
|
-
try {
|
|
1416
|
-
await Promise.race([
|
|
1417
|
-
entersState(voiceConnection, VoiceConnectionStatus.Signalling, 5000),
|
|
1418
|
-
entersState(voiceConnection, VoiceConnectionStatus.Connecting, 5000),
|
|
1419
|
-
]);
|
|
1420
|
-
} catch (e) {
|
|
1421
|
-
if (connection !== voiceConnection) return;
|
|
1422
|
-
warn('voice connection disconnected; reconnecting to channel', channel.guild.name, channel.name, e?.message || e);
|
|
1423
|
-
try { voiceConnection.destroy(); } catch {}
|
|
1424
|
-
connection = null;
|
|
1425
|
-
setTimeout(() => connectTo(channel).catch(err => warn('voice reconnect failed', err?.stack || err)), 1500);
|
|
1426
|
-
}
|
|
1427
|
-
}
|
|
1428
|
-
});
|
|
1429
|
-
await entersState(voiceConnection, VoiceConnectionStatus.Ready, 30000);
|
|
1430
|
-
voiceConnection.receiver.speaking.on('start', userId => subscribeUser(voiceConnection.receiver, userId));
|
|
1431
|
-
log(`Listening in voice channel ${channel.guild.name} / ${channel.name}`);
|
|
1432
|
-
}
|
|
1433
|
-
|
|
1434
|
-
async function autoJoin() {
|
|
1435
|
-
const attempted = [];
|
|
1436
|
-
for (const preferredName of settings.autoJoinVoiceChannels) {
|
|
1437
|
-
for (const guild of client.guilds.cache.values()) {
|
|
1438
|
-
const channels = await guild.channels.fetch();
|
|
1439
|
-
for (const ch of channels.values()) {
|
|
1440
|
-
if (!ch?.isVoiceBased?.() || ch.name.toLowerCase() !== preferredName) continue;
|
|
1441
|
-
attempted.push(`${guild.name}/${ch.name}`);
|
|
1442
|
-
try {
|
|
1443
|
-
await connectTo(ch);
|
|
1444
|
-
return;
|
|
1445
|
-
} catch (e) {
|
|
1446
|
-
warn('auto-join failed; trying next configured voice channel', guild.name, ch.name, e?.stack || e);
|
|
1447
|
-
try { connection?.destroy(); } catch {}
|
|
1448
|
-
connection = null;
|
|
1449
|
-
activeVoiceChannelId = '';
|
|
1450
|
-
}
|
|
1451
|
-
}
|
|
1452
|
-
}
|
|
1453
|
-
}
|
|
1454
|
-
warn('No auto-join channel found or reachable', settings.autoJoinVoiceChannels, 'attempted', attempted);
|
|
1455
|
-
}
|
|
1456
|
-
|
|
1457
1095
|
function consumeRestartNotice() {
|
|
1458
1096
|
const noticePath = path.join(ROOT, '.cache', 'restart-notice.txt');
|
|
1459
1097
|
try {
|
|
@@ -1475,76 +1113,8 @@ async function announceRestartComplete() {
|
|
|
1475
1113
|
await speakText(speech, undefined, null, { mirrorText: false });
|
|
1476
1114
|
}
|
|
1477
1115
|
|
|
1478
|
-
async function findVoiceChannelBySelector(guild, selector) {
|
|
1479
|
-
const wanted = String(selector || '').trim();
|
|
1480
|
-
if (!wanted || !guild) return null;
|
|
1481
|
-
const id = wanted.replace(/^<#(\d+)>$/, '$1');
|
|
1482
|
-
const channels = await guild.channels.fetch();
|
|
1483
|
-
const voiceChannels = [...channels.values()].filter(ch => ch?.isVoiceBased?.());
|
|
1484
|
-
const byId = voiceChannels.find(ch => ch.id === id);
|
|
1485
|
-
if (byId) return byId;
|
|
1486
|
-
const matches = voiceChannels.filter(ch => String(ch.name || '').toLowerCase() === wanted.toLowerCase());
|
|
1487
|
-
if (matches.length === 1) return matches[0];
|
|
1488
|
-
if (matches.length > 1) throw new Error(`같은 이름의 음성 채널이 여러 개야. 채널 ID나 멘션으로 지정해줘: ${wanted}`);
|
|
1489
|
-
throw new Error(`음성 채널을 찾지 못했어: ${wanted}`);
|
|
1490
|
-
}
|
|
1491
|
-
|
|
1492
|
-
async function voiceChannelLabel(guild, channelId) {
|
|
1493
|
-
if (!channelId || !guild) return '없음';
|
|
1494
|
-
try {
|
|
1495
|
-
const ch = await guild.channels.fetch(channelId);
|
|
1496
|
-
return ch?.name || '지정됨';
|
|
1497
|
-
} catch {
|
|
1498
|
-
return '지정됨';
|
|
1499
|
-
}
|
|
1500
|
-
}
|
|
1501
|
-
|
|
1502
|
-
async function resolveVoiceChannelForAttach(msg, selector = '') {
|
|
1503
|
-
if (selector) return findVoiceChannelBySelector(msg.guild, selector);
|
|
1504
|
-
if (msg.member?.voice?.channel) return msg.member.voice.channel;
|
|
1505
|
-
if (activeVoiceChannelId && msg.guild) {
|
|
1506
|
-
try {
|
|
1507
|
-
const ch = await msg.guild.channels.fetch(activeVoiceChannelId);
|
|
1508
|
-
if (ch?.isVoiceBased?.()) return ch;
|
|
1509
|
-
} catch {}
|
|
1510
|
-
}
|
|
1511
|
-
throw new Error('붙일 음성 채널을 못 찾았어. 음성채널에 들어가서 `!session attach-voice`를 치거나 `--voice "채널명"`을 붙여줘.');
|
|
1512
|
-
}
|
|
1513
|
-
|
|
1514
|
-
async function attachVoiceChannelToTextSession(msg, command) {
|
|
1515
|
-
const voiceChannel = await resolveVoiceChannelForAttach(msg, command.voice);
|
|
1516
|
-
let session = null;
|
|
1517
|
-
if (command.name) {
|
|
1518
|
-
session = bindProjectSessionToChannel({ state: projectSessionsState, nameOrSlug: command.name, channelId: msg.channelId });
|
|
1519
|
-
} else {
|
|
1520
|
-
session = resolveProjectSessionForChannel(msg.channelId)
|
|
1521
|
-
|| resolveProjectSessionForChannel(voiceChannel.id);
|
|
1522
|
-
if (!session) {
|
|
1523
|
-
const fallbackName = String(msg.channel?.name || `channel-${msg.channelId}`).trim() || `channel-${msg.channelId}`;
|
|
1524
|
-
session = createProjectSession({
|
|
1525
|
-
root: ROOT,
|
|
1526
|
-
state: projectSessionsState,
|
|
1527
|
-
name: fallbackName,
|
|
1528
|
-
workdir: settings.agent.cwd || ROOT,
|
|
1529
|
-
channelId: msg.channelId,
|
|
1530
|
-
voiceChannelId: voiceChannel.id,
|
|
1531
|
-
transcriptChannelId: msg.channelId,
|
|
1532
|
-
mcpContext: 'Ad-hoc Discord text channel session',
|
|
1533
|
-
});
|
|
1534
|
-
}
|
|
1535
|
-
}
|
|
1536
|
-
session.transcriptChannelId = msg.channelId;
|
|
1537
|
-
session.voiceChannelId = voiceChannel.id;
|
|
1538
|
-
projectSessionsState.channelSessions[msg.channelId] = session.slug;
|
|
1539
|
-
projectSessionsState.channelSessions[voiceChannel.id] = session.slug;
|
|
1540
|
-
saveProjectSessionsState();
|
|
1541
|
-
agentAdaptersBySession.delete(session.slug);
|
|
1542
|
-
if (activeVoiceChannelId !== voiceChannel.id) await connectTo(voiceChannel);
|
|
1543
|
-
return msg.reply(`${session.name} 세션을 이 텍스트 채널과 음성 채널 ${voiceChannel.name}에 붙였어. 이제 그 음성채널 발화의 STT/답변 텍스트는 이 채널로 가.`);
|
|
1544
|
-
}
|
|
1545
|
-
|
|
1546
1116
|
async function handleProjectSessionCommand(msg, command) {
|
|
1547
|
-
const activeSession = resolveProjectSessionForChannel(msg.channelId) || resolveProjectSessionForChannel(activeVoiceChannelId);
|
|
1117
|
+
const activeSession = resolveProjectSessionForChannel(msg.channelId) || resolveProjectSessionForChannel(bridge.activeVoiceChannelId);
|
|
1548
1118
|
if (command.action === 'attach-voice') return void await attachVoiceChannelToTextSession(msg, command);
|
|
1549
1119
|
if (command.action === 'status') {
|
|
1550
1120
|
if (!activeSession) return void msg.reply(`${agentAdapter.label} 기본 세션: ${agentAdapter.readSessionId?.() || '아직 없음'}`);
|
|
@@ -1602,7 +1172,8 @@ async function handleProjectSessionCommand(msg, command) {
|
|
|
1602
1172
|
mcpContext: command.mcpContext,
|
|
1603
1173
|
});
|
|
1604
1174
|
saveProjectSessionsState();
|
|
1605
|
-
agentAdaptersBySession.delete(session.slug);
|
|
1175
|
+
bridge.agentAdaptersBySession.delete(session.slug);
|
|
1176
|
+
invalidateBackendAdaptersForSession(session.slug);
|
|
1606
1177
|
return void msg.reply(`${session.name} 프로젝트 세션 만들었어. 작업실은 ${session.workdir}이고, 이 텍스트 채널${voiceChannel ? `과 음성 채널 ${voiceChannel.name}` : ''} 입력은 별도 Hermes 세션 파일로 이어져.`);
|
|
1607
1178
|
}
|
|
1608
1179
|
}
|
|
@@ -1613,107 +1184,22 @@ client.once('ready', async () => {
|
|
|
1613
1184
|
await announceRestartComplete();
|
|
1614
1185
|
});
|
|
1615
1186
|
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
if (content === '!ping') return void msg.reply('pong');
|
|
1631
|
-
if (content === '!verbose') return void msg.reply(verboseStatusText());
|
|
1632
|
-
if (['!verbose on', '!verbose true', '!verbose 1', '!verbose 켜', '!verbose 켜줘'].includes(content.toLowerCase())) {
|
|
1633
|
-
setVerboseProgress(true, 'discord-command');
|
|
1634
|
-
return void msg.reply(verboseStatusText());
|
|
1635
|
-
}
|
|
1636
|
-
if (['!verbose off', '!verbose false', '!verbose 0', '!verbose 꺼', '!verbose 꺼줘'].includes(content.toLowerCase())) {
|
|
1637
|
-
setVerboseProgress(false, 'discord-command');
|
|
1638
|
-
return void msg.reply(verboseStatusText());
|
|
1639
|
-
}
|
|
1640
|
-
if (content === '!sensitivity') return void msg.reply(sensitivityStatusText());
|
|
1641
|
-
if (content === '!latency' || content === '!metrics') {
|
|
1642
|
-
const summary = summarizeLatencyRecords(readJsonlRecords(settings.latencyLogPath, { limit: 200 }));
|
|
1643
|
-
return void msg.reply(`최근 latency 요약 (${settings.latencyLogPath}):\n${formatLatencySummary(summary)}`.slice(0, 1900));
|
|
1644
|
-
}
|
|
1645
|
-
if (content === '!sensitivity conservative') {
|
|
1646
|
-
setSensitivityMode('conservative', 'discord-command');
|
|
1647
|
-
return void msg.reply(sensitivityStatusText());
|
|
1648
|
-
}
|
|
1649
|
-
if (content === '!sensitivity normal') {
|
|
1650
|
-
setSensitivityMode('normal', 'discord-command');
|
|
1651
|
-
return void msg.reply(sensitivityStatusText());
|
|
1652
|
-
}
|
|
1653
|
-
if (content === '!session') return void handleProjectSessionCommand(msg, { action: 'status' });
|
|
1654
|
-
if (content === '!reset-session') return void handleProjectSessionCommand(msg, { action: 'reset' });
|
|
1655
|
-
if (content === '!join') {
|
|
1656
|
-
const ch = msg.member?.voice?.channel;
|
|
1657
|
-
if (!ch) return void msg.reply('먼저 음성 채널에 들어가줘.');
|
|
1658
|
-
await connectTo(ch);
|
|
1659
|
-
return void msg.reply('들어왔어. Node receiver로 듣는 중.');
|
|
1660
|
-
}
|
|
1661
|
-
if (content === '!leave') {
|
|
1662
|
-
try { connection?.destroy(); } catch {}
|
|
1663
|
-
connection = null;
|
|
1664
|
-
activeVoiceChannelId = '';
|
|
1665
|
-
return void msg.reply('나갈게.');
|
|
1666
|
-
}
|
|
1667
|
-
if (content.startsWith('!say ')) {
|
|
1668
|
-
const text = content.slice(5).trim();
|
|
1669
|
-
const mp3 = await synthTTS(text);
|
|
1670
|
-
await playAudio(mp3);
|
|
1671
|
-
return;
|
|
1672
|
-
}
|
|
1673
|
-
if (content.startsWith('!voice-test ')) {
|
|
1674
|
-
const text = content.slice('!voice-test '.length).trim();
|
|
1675
|
-
if (!text) return void msg.reply('테스트할 문장을 붙여줘.');
|
|
1676
|
-
const started = Date.now();
|
|
1677
|
-
try {
|
|
1678
|
-
await msg.reply(`TTS 백엔드 ${ttsBackend.name}로 음성 테스트할게.`);
|
|
1679
|
-
await speakText(text);
|
|
1680
|
-
await msg.channel.send(`음성 테스트 완료: ${ttsBackend.name}, ${Date.now() - started}ms`);
|
|
1681
|
-
} catch (e) {
|
|
1682
|
-
warn('voice-test failed', e?.stack || e);
|
|
1683
|
-
await msg.channel.send(`음성 테스트 실패: ${String(e?.message || e).slice(0, 700)}`);
|
|
1684
|
-
}
|
|
1685
|
-
return;
|
|
1686
|
-
}
|
|
1687
|
-
if (content === '!voice-clone' || content === '!voice-clone status') {
|
|
1688
|
-
const current = voiceCloneCapture.current();
|
|
1689
|
-
if (current?.userId === String(msg.author.id)) {
|
|
1690
|
-
return void msg.reply(`다음 유효한 음성을 ${path.relative(ROOT, current.targetPath)}에 저장할게.`);
|
|
1691
|
-
}
|
|
1692
|
-
return void msg.reply('대기 중인 보이스 클로닝 샘플 캡처가 없어. `!voice-clone capture`로 시작해.');
|
|
1693
|
-
}
|
|
1694
|
-
if (content === '!voice-clone cancel') {
|
|
1695
|
-
const cancelled = voiceCloneCapture.cancel(msg.author.id);
|
|
1696
|
-
return void msg.reply(cancelled ? '보이스 클로닝 샘플 캡처를 취소했어.' : '대기 중인 캡처가 없어.');
|
|
1697
|
-
}
|
|
1698
|
-
if (content === '!voice-clone capture') {
|
|
1699
|
-
const armed = voiceCloneCapture.arm({ userId: msg.author.id, source: 'discord-command' });
|
|
1700
|
-
return void msg.reply(`다음 유효한 음성을 ${path.relative(ROOT, armed.targetPath)}에 저장할게. 음성 채널에서 10~30초 정도 말해줘.`);
|
|
1701
|
-
}
|
|
1702
|
-
if (content.startsWith('!ask ')) {
|
|
1703
|
-
const text = content.slice(5).trim();
|
|
1704
|
-
if (!text) return void msg.reply('물어볼 내용을 붙여줘.');
|
|
1705
|
-
await handleTextAgentMessage(msg, text, { speakResponse: true });
|
|
1706
|
-
return;
|
|
1707
|
-
}
|
|
1708
|
-
if (shouldRouteDiscordTextToAgent({
|
|
1709
|
-
content,
|
|
1710
|
-
channelId: msg.channelId,
|
|
1711
|
-
transcriptChannelId: settings.transcriptChannelId,
|
|
1712
|
-
}) || resolveProjectSessionForChannel(msg.channelId)) {
|
|
1713
|
-
await handleTextAgentMessage(msg, content, { speakResponse: false });
|
|
1714
|
-
return;
|
|
1715
|
-
}
|
|
1187
|
+
const discordCommandRouter = createDiscordCommandRouter({
|
|
1188
|
+
bridge, settings, warn, path, ROOT,
|
|
1189
|
+
isAllowed,
|
|
1190
|
+
handleProjectSessionCommand,
|
|
1191
|
+
handleTextAgentMessage,
|
|
1192
|
+
resolveProjectSessionForChannel,
|
|
1193
|
+
verboseStatusText, setVerboseProgress,
|
|
1194
|
+
notifyStatusText,
|
|
1195
|
+
smartProgressStatusText,
|
|
1196
|
+
sensitivityStatusText, setSensitivityMode,
|
|
1197
|
+
summarizeLatencyRecords, readJsonlRecords, formatLatencySummary,
|
|
1198
|
+
connectTo,
|
|
1199
|
+
synthTTS, playAudio, speakText,
|
|
1200
|
+
voiceCloneCapture,
|
|
1716
1201
|
});
|
|
1202
|
+
client.on('messageCreate', msg => discordCommandRouter.handleDiscordMessage(msg).catch(e => warn('discord command router failed', e?.stack || e)));
|
|
1717
1203
|
|
|
1718
1204
|
process.stdout?.on?.('error', error => {
|
|
1719
1205
|
if (isBenignTransientNetworkError(error)) {
|
|
@@ -1743,37 +1229,6 @@ process.on('uncaughtException', error => {
|
|
|
1743
1229
|
client.on('error', e => warn('discord client error', e?.stack || e));
|
|
1744
1230
|
client.on('shardError', e => warn('discord shard error', e?.stack || e));
|
|
1745
1231
|
|
|
1746
|
-
let shutdownStarted = false;
|
|
1747
|
-
async function gracefulShutdown(signalName) {
|
|
1748
|
-
if (shutdownStarted) return;
|
|
1749
|
-
shutdownStarted = true;
|
|
1750
|
-
log('graceful shutdown requested', signalName, 'connection', Boolean(connection));
|
|
1751
|
-
try {
|
|
1752
|
-
if (currentAbortController && !currentAbortController.signal.aborted) currentAbortController.abort();
|
|
1753
|
-
} catch (e) {
|
|
1754
|
-
warn('abort before shutdown failed', e?.stack || e);
|
|
1755
|
-
}
|
|
1756
|
-
try {
|
|
1757
|
-
if (connection) {
|
|
1758
|
-
let detail = '';
|
|
1759
|
-
const noticePath = path.join(ROOT, '.cache', 'restart-notice.txt');
|
|
1760
|
-
try {
|
|
1761
|
-
if (fs.existsSync(noticePath)) {
|
|
1762
|
-
detail = fs.readFileSync(noticePath, 'utf8').replace(/\s+/g, ' ').trim().slice(0, 120);
|
|
1763
|
-
}
|
|
1764
|
-
} catch (e) {
|
|
1765
|
-
warn('read restart notice failed', e?.stack || e);
|
|
1766
|
-
}
|
|
1767
|
-
await speakText(formatRestartShutdownNotice(detail, settings.tts.edge.voice));
|
|
1768
|
-
await waitEvent(player, AudioPlayerStatus.Idle, 30000).catch(() => {});
|
|
1769
|
-
}
|
|
1770
|
-
} catch (e) {
|
|
1771
|
-
warn('shutdown voice notice failed', e?.stack || e);
|
|
1772
|
-
}
|
|
1773
|
-
try { connection?.destroy(); } catch {}
|
|
1774
|
-
try { client.destroy(); } catch {}
|
|
1775
|
-
process.exit(0);
|
|
1776
|
-
}
|
|
1777
1232
|
process.on('SIGTERM', () => { void gracefulShutdown('SIGTERM'); });
|
|
1778
1233
|
process.on('SIGINT', () => { void gracefulShutdown('SIGINT'); });
|
|
1779
1234
|
|