verbalcoding 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.env.example +27 -1
  2. package/README.es.md +132 -0
  3. package/README.fr.md +132 -0
  4. package/README.ja.md +132 -0
  5. package/README.ko.md +132 -0
  6. package/README.md +116 -74
  7. package/README.ru.md +132 -0
  8. package/README.zh.md +131 -0
  9. package/app-node/agent_adapters.mjs +37 -5
  10. package/app-node/agent_adapters.test.mjs +13 -1
  11. package/app-node/agent_detect.mjs +73 -0
  12. package/app-node/agent_detect.test.mjs +77 -0
  13. package/app-node/install_config.mjs +3 -0
  14. package/app-node/main.mjs +339 -4
  15. package/app-node/notify.mjs +73 -0
  16. package/app-node/notify.test.mjs +68 -0
  17. package/app-node/plan_mode.mjs +174 -0
  18. package/app-node/plan_mode.test.mjs +153 -0
  19. package/app-node/smart_progress.mjs +94 -0
  20. package/app-node/smart_progress.test.mjs +66 -0
  21. package/app-node/stream_sentencer.mjs +61 -0
  22. package/app-node/stream_sentencer.test.mjs +64 -0
  23. package/app-node/streaming_tts_queue.mjs +48 -0
  24. package/app-node/streaming_tts_queue.test.mjs +58 -0
  25. package/app-node/text_routing.mjs +20 -0
  26. package/app-node/text_routing.test.mjs +23 -1
  27. package/docs/CONFIGURATION.md +69 -96
  28. package/docs/FRESH_INSTALL.md +105 -63
  29. package/docs/HERMES_VOICE.md +65 -0
  30. package/docs/MULTI_INSTANCE.md +16 -0
  31. package/docs/README.md +49 -0
  32. package/docs/RELEASE.md +42 -19
  33. package/docs/ROADMAP.md +38 -0
  34. package/docs/TROUBLESHOOTING.md +126 -0
  35. package/docs/USAGE.md +72 -40
  36. package/docs/assets/figures/verbalcoding-flow.svg +1 -1
  37. package/docs/i18n/CONFIGURATION.es.md +25 -0
  38. package/docs/i18n/CONFIGURATION.fr.md +25 -0
  39. package/docs/i18n/CONFIGURATION.ja.md +25 -0
  40. package/docs/i18n/CONFIGURATION.ko.md +25 -0
  41. package/docs/i18n/CONFIGURATION.ru.md +25 -0
  42. package/docs/i18n/CONFIGURATION.zh.md +25 -0
  43. package/docs/i18n/FRESH_INSTALL.es.md +27 -2
  44. package/docs/i18n/FRESH_INSTALL.fr.md +27 -2
  45. package/docs/i18n/FRESH_INSTALL.ja.md +27 -2
  46. package/docs/i18n/FRESH_INSTALL.ko.md +27 -2
  47. package/docs/i18n/FRESH_INSTALL.ru.md +27 -2
  48. package/docs/i18n/FRESH_INSTALL.zh.md +27 -2
  49. package/docs/i18n/HERMES_VOICE.es.md +46 -0
  50. package/docs/i18n/HERMES_VOICE.fr.md +46 -0
  51. package/docs/i18n/HERMES_VOICE.ja.md +46 -0
  52. package/docs/i18n/HERMES_VOICE.ko.md +65 -0
  53. package/docs/i18n/HERMES_VOICE.ru.md +46 -0
  54. package/docs/i18n/HERMES_VOICE.zh.md +46 -0
  55. package/docs/i18n/MULTI_INSTANCE.es.md +25 -0
  56. package/docs/i18n/MULTI_INSTANCE.fr.md +25 -0
  57. package/docs/i18n/MULTI_INSTANCE.ja.md +25 -0
  58. package/docs/i18n/MULTI_INSTANCE.ko.md +25 -0
  59. package/docs/i18n/MULTI_INSTANCE.ru.md +25 -0
  60. package/docs/i18n/MULTI_INSTANCE.zh.md +25 -0
  61. package/docs/i18n/README.es.md +20 -134
  62. package/docs/i18n/README.fr.md +20 -134
  63. package/docs/i18n/README.ja.md +20 -134
  64. package/docs/i18n/README.ko.md +20 -133
  65. package/docs/i18n/README.ru.md +20 -134
  66. package/docs/i18n/README.zh.md +20 -133
  67. package/docs/i18n/RELEASE.es.md +26 -1
  68. package/docs/i18n/RELEASE.fr.md +26 -1
  69. package/docs/i18n/RELEASE.ja.md +26 -1
  70. package/docs/i18n/RELEASE.ko.md +26 -1
  71. package/docs/i18n/RELEASE.ru.md +26 -1
  72. package/docs/i18n/RELEASE.zh.md +26 -1
  73. package/docs/i18n/TROUBLESHOOTING.es.md +39 -0
  74. package/docs/i18n/TROUBLESHOOTING.fr.md +39 -0
  75. package/docs/i18n/TROUBLESHOOTING.ja.md +39 -0
  76. package/docs/i18n/TROUBLESHOOTING.ko.md +39 -0
  77. package/docs/i18n/TROUBLESHOOTING.ru.md +39 -0
  78. package/docs/i18n/TROUBLESHOOTING.zh.md +39 -0
  79. package/docs/i18n/USAGE.es.md +25 -0
  80. package/docs/i18n/USAGE.fr.md +25 -0
  81. package/docs/i18n/USAGE.ja.md +25 -0
  82. package/docs/i18n/USAGE.ko.md +25 -0
  83. package/docs/i18n/USAGE.ru.md +25 -0
  84. package/docs/i18n/USAGE.zh.md +25 -0
  85. package/docs/superpowers/plans/2026-05-13-phase1-streaming-pipeline.md +122 -0
  86. package/docs/superpowers/plans/2026-05-13-phase10-push-notifications.md +152 -0
  87. package/docs/superpowers/plans/2026-05-13-phase2-agent-adapters.md +242 -0
  88. package/docs/superpowers/plans/2026-05-13-phase6-smart-progress.md +172 -0
  89. package/docs/superpowers/plans/2026-05-13-phase7-voice-plan-mode.md +108 -0
  90. package/package.json +2 -1
  91. package/scripts/cli.mjs +4 -3
  92. package/scripts/doctor.mjs +11 -0
  93. package/scripts/install.mjs +15 -1
@@ -288,6 +288,8 @@ test('Claude, Codex, and Gemini adapters use backend-specific default commands w
288
288
  { backend: 'gemini', command: ['gemini', '-p'], label: 'Gemini' },
289
289
  { backend: 'opencode', command: ['opencode', 'run'], label: 'OpenCode' },
290
290
  { backend: 'openclaw', command: ['openclaw', 'run'], label: 'OpenClaw' },
291
+ { backend: 'aider', command: ['aider', '--no-pretty', '--yes-always', '--message'], label: 'Aider' },
292
+ { backend: 'cursor', command: ['cursor-agent', '--print', '--prompt'], label: 'Cursor CLI' },
291
293
  ];
292
294
 
293
295
  for (const item of cases) {
@@ -352,10 +354,20 @@ test('voiceBridgePrompt keeps voice-specific operating instructions with user te
352
354
  const prompt = voiceBridgePrompt('파일 수정해줘');
353
355
 
354
356
  assert.match(prompt, /Discord 음성 대화/);
355
- assert.match(prompt, /파일 수정, 실행, 로그 확인/);
356
357
  assert.match(prompt, /파일 수정해줘/);
357
358
  });
358
359
 
360
+ test('voiceBridgePrompt includes recent Discord text context when provided', () => {
361
+ const prompt = voiceBridgePrompt('왜 죽었어?', {
362
+ recentDiscordContext: '최근 텍스트 채널 메시지:\n- user: 음성채널에서만 나가줘',
363
+ });
364
+
365
+ assert.match(prompt, /음성 채널 발화와 텍스트 채널 메시지를 같은 대화 맥락으로 함께 고려/);
366
+ assert.match(prompt, /최근 텍스트 채널 메시지/);
367
+ assert.match(prompt, /음성채널에서만 나가줘/);
368
+ assert.match(prompt, /왜 죽었어\?/);
369
+ });
370
+
359
371
  test('voiceBridgePrompt adds optional verbose progress instructions only when enabled', () => {
360
372
  const normal = voiceBridgePrompt('파일 수정해줘');
361
373
  const verbose = voiceBridgePrompt('파일 수정해줘', { verboseProgress: true });
@@ -0,0 +1,73 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+
4
+ const PROBES = [
5
+ { backend: 'hermes', bin: 'hermes', defaultCommand: 'hermes chat -Q -q', envCommand: 'HERMES_COMMAND', label: 'Hermes Agent' },
6
+ { backend: 'claude', bin: 'claude', defaultCommand: 'claude -p', envCommand: 'CLAUDE_COMMAND', label: 'Claude Code' },
7
+ { backend: 'codex', bin: 'codex', defaultCommand: 'codex exec', envCommand: 'CODEX_COMMAND', label: 'Codex' },
8
+ { backend: 'gemini', bin: 'gemini', defaultCommand: 'gemini -p', envCommand: 'GEMINI_COMMAND', label: 'Gemini' },
9
+ { backend: 'opencode', bin: 'opencode', defaultCommand: 'opencode run', envCommand: 'OPENCODE_COMMAND', label: 'OpenCode' },
10
+ { backend: 'openclaw', bin: 'openclaw', defaultCommand: 'openclaw run', envCommand: 'OPENCLAW_COMMAND', label: 'OpenClaw' },
11
+ { backend: 'aider', bin: 'aider', defaultCommand: 'aider --no-pretty --yes-always --message', envCommand: 'AIDER_COMMAND', label: 'Aider' },
12
+ { backend: 'cursor', bin: 'cursor-agent', defaultCommand: 'cursor-agent --print --prompt', envCommand: 'CURSOR_COMMAND', label: 'Cursor CLI' },
13
+ ];
14
+
15
+ function defaultWhich(bin, { env = process.env, accessSync = fs.accessSync } = {}) {
16
+ const pathVar = env.PATH || '';
17
+ const sep = process.platform === 'win32' ? ';' : ':';
18
+ const exts = process.platform === 'win32' ? (env.PATHEXT || '.EXE;.CMD;.BAT').split(';') : [''];
19
+ for (const dir of pathVar.split(sep)) {
20
+ if (!dir) continue;
21
+ for (const ext of exts) {
22
+ const candidate = path.join(dir, bin + ext);
23
+ try {
24
+ accessSync(candidate, fs.constants.X_OK);
25
+ return candidate;
26
+ } catch { /* not here */ }
27
+ }
28
+ }
29
+ return null;
30
+ }
31
+
32
+ export async function detectInstalledAgents(env = process.env, { which } = {}) {
33
+ const probe = which || ((bin) => defaultWhich(bin, { env }));
34
+ return Promise.all(PROBES.map(async (p) => {
35
+ const located = await probe(p.bin);
36
+ return {
37
+ backend: p.backend,
38
+ label: p.label,
39
+ bin: p.bin,
40
+ path: located || null,
41
+ present: Boolean(located),
42
+ command: env[p.envCommand] || p.defaultCommand,
43
+ };
44
+ }));
45
+ }
46
+
47
+ export function listKnownBackends() {
48
+ return PROBES.map(p => ({ backend: p.backend, label: p.label, bin: p.bin }));
49
+ }
50
+
51
+ export function pickDefaultBackend(detection, preferred = '') {
52
+ const list = Array.isArray(detection) ? detection : [];
53
+ const pref = String(preferred || '').toLowerCase();
54
+ if (pref) {
55
+ const match = list.find(r => r.backend === pref && r.present);
56
+ if (match) return match.backend;
57
+ }
58
+ const firstPresent = list.find(r => r.present);
59
+ if (firstPresent) return firstPresent.backend;
60
+ return 'hermes';
61
+ }
62
+
63
+ export function formatAgentDetectionReport(detection) {
64
+ const list = Array.isArray(detection) ? detection : [];
65
+ if (!list.length) return 'No agent backends probed.';
66
+ const rows = list.map(r => {
67
+ const marker = r.present ? '✓' : '·';
68
+ const pathPart = r.present ? r.path : 'not found';
69
+ return ` ${marker} ${r.label.padEnd(14)} ${r.bin.padEnd(14)} ${pathPart}`;
70
+ });
71
+ const presentCount = list.filter(r => r.present).length;
72
+ return `Agent backends (${presentCount}/${list.length} present):\n${rows.join('\n')}`;
73
+ }
@@ -0,0 +1,77 @@
1
+ import { test } from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import { detectInstalledAgents, listKnownBackends, pickDefaultBackend, formatAgentDetectionReport } from './agent_detect.mjs';
4
+
5
+ test('detectInstalledAgents marks present when which resolves', async () => {
6
+ const fakeWhich = async (bin) => (bin === 'hermes' ? '/usr/local/bin/hermes' : null);
7
+ const result = await detectInstalledAgents({}, { which: fakeWhich });
8
+ const hermes = result.find(r => r.backend === 'hermes');
9
+ assert.equal(hermes.present, true);
10
+ assert.equal(hermes.path, '/usr/local/bin/hermes');
11
+ const claude = result.find(r => r.backend === 'claude');
12
+ assert.equal(claude.present, false);
13
+ });
14
+
15
+ test('detectInstalledAgents includes aider and cursor', async () => {
16
+ const fakeWhich = async () => null;
17
+ const result = await detectInstalledAgents({}, { which: fakeWhich });
18
+ const backends = result.map(r => r.backend);
19
+ assert.ok(backends.includes('aider'));
20
+ assert.ok(backends.includes('cursor'));
21
+ });
22
+
23
+ test('detectInstalledAgents honors env overrides for command', async () => {
24
+ const fakeWhich = async (bin) => (bin === 'aider' ? '/opt/aider' : null);
25
+ const result = await detectInstalledAgents({ AIDER_COMMAND: 'aider --foo' }, { which: fakeWhich });
26
+ const aider = result.find(r => r.backend === 'aider');
27
+ assert.equal(aider.command, 'aider --foo');
28
+ assert.equal(aider.present, true);
29
+ });
30
+
31
+ test('listKnownBackends returns 8 entries', () => {
32
+ const list = listKnownBackends();
33
+ assert.equal(list.length, 8);
34
+ assert.ok(list.some(b => b.backend === 'hermes'));
35
+ assert.ok(list.some(b => b.backend === 'cursor'));
36
+ });
37
+
38
+ test('detectInstalledAgents default which uses PATH iteration', async () => {
39
+ const result = await detectInstalledAgents({ PATH: '/nonexistent/dir' }, {});
40
+ assert.ok(Array.isArray(result));
41
+ for (const r of result) assert.equal(r.present, false);
42
+ });
43
+
44
+ test('pickDefaultBackend respects preferred when present', () => {
45
+ const detection = [
46
+ { backend: 'hermes', present: false },
47
+ { backend: 'claude', present: true },
48
+ { backend: 'aider', present: true },
49
+ ];
50
+ assert.equal(pickDefaultBackend(detection, 'aider'), 'aider');
51
+ });
52
+
53
+ test('pickDefaultBackend falls back to first present when preferred missing', () => {
54
+ const detection = [
55
+ { backend: 'hermes', present: false },
56
+ { backend: 'claude', present: true },
57
+ { backend: 'aider', present: true },
58
+ ];
59
+ assert.equal(pickDefaultBackend(detection, 'codex'), 'claude');
60
+ });
61
+
62
+ test('pickDefaultBackend returns hermes when nothing detected', () => {
63
+ const detection = [{ backend: 'hermes', present: false }, { backend: 'claude', present: false }];
64
+ assert.equal(pickDefaultBackend(detection, ''), 'hermes');
65
+ });
66
+
67
+ test('formatAgentDetectionReport marks present and missing', () => {
68
+ const detection = [
69
+ { backend: 'hermes', label: 'Hermes Agent', bin: 'hermes', present: true, path: '/usr/local/bin/hermes' },
70
+ { backend: 'claude', label: 'Claude Code', bin: 'claude', present: false, path: null },
71
+ ];
72
+ const out = formatAgentDetectionReport(detection);
73
+ assert.match(out, /1\/2 present/);
74
+ assert.match(out, /✓ Hermes Agent/);
75
+ assert.match(out, /· Claude Code/);
76
+ assert.match(out, /not found/);
77
+ });
@@ -8,6 +8,9 @@ export const SUPPORTED_HARNESSES = [
8
8
  'gemini',
9
9
  'opencode',
10
10
  'openclaw',
11
+ 'aider',
12
+ 'cursor',
13
+ 'cursor-cli',
11
14
  'custom',
12
15
  ];
13
16
 
package/app-node/main.mjs CHANGED
@@ -28,6 +28,22 @@ import {
28
28
  } from './latency_metrics.mjs';
29
29
  import { splitForTTS } from './tts_chunks.mjs';
30
30
  import { playChunkedTTSWithPrefetch } from './tts_prefetch.mjs';
31
+ import { createSentencer } from './stream_sentencer.mjs';
32
+ import { createStreamingTTSQueue } from './streaming_tts_queue.mjs';
33
+ import { createSmartProgressSummarizer } from './smart_progress.mjs';
34
+ import {
35
+ isPlanEntryUtterance,
36
+ parsePlanOutput,
37
+ parseVoiceCommand as parsePlanVoiceCommand,
38
+ applyCommand as applyPlanCommand,
39
+ renderFinalPlan,
40
+ planModePreamble,
41
+ planExecutionPreamble,
42
+ parseDecisionAnswer,
43
+ renderDecisionPrompt,
44
+ renderResolvedDecisions,
45
+ } from './plan_mode.mjs';
46
+ import { createNotifier, buildDiscordDeepLink } from './notify.mjs';
31
47
  import { progressCategory, summarizeProgressEvents, formatProgressMessage } from './progress_speech.mjs';
32
48
  import { buildTtsSettings } from './tts_settings.mjs';
33
49
  import { createTtsBackend } from './tts_backends.mjs';
@@ -47,7 +63,11 @@ import { sendDiscordText, splitDiscordMessage } from './discord_text.mjs';
47
63
  import { progressTtsCacheFileName } from './progress_cache.mjs';
48
64
  import { shouldPassWhisperLanguage, voiceLanguageCommandFromTranscript, languagePreset } from './language_config.mjs';
49
65
  import { formatRestartCompleteNotice, formatRestartShutdownNotice } from './restart_notice.mjs';
50
- import { shouldRouteDiscordTextToAgent } from './text_routing.mjs';
66
+ import {
67
+ appendRecentDiscordText,
68
+ formatRecentDiscordContext,
69
+ shouldRouteDiscordTextToAgent,
70
+ } from './text_routing.mjs';
51
71
  import {
52
72
  bindProjectSessionToChannel,
53
73
  createProjectSession,
@@ -193,6 +213,7 @@ const voiceCloneCapture = createVoiceCloneCaptureState({ defaultTargetPath: sett
193
213
  let connection = null;
194
214
  let activeVoiceChannelId = '';
195
215
  let activeTranscriptChannelId = '';
216
+ const recentDiscordTextByChannel = new Map();
196
217
  let player = createAudioPlayer();
197
218
  let speaking = false;
198
219
  let processing = false;
@@ -263,6 +284,220 @@ let progressSpeechBatch = [];
263
284
  let progressSpeechBatchTimer = null;
264
285
  let progressSpeechBatchSignal = null;
265
286
  let progressSpeechBatchStartedAt = 0;
287
+
288
+ const STREAMING_TTS_ENABLED = ['1', 'true', 'yes', 'on'].includes(String(process.env.STREAMING_TTS || '0').toLowerCase());
289
+ let activeSentencer = null;
290
+ let activeStreamingQueue = null;
291
+ let streamingSpeechDelivered = false;
292
+
293
+ let notifyUserOptIn = false;
294
+ let notifierInstance = null;
295
+ function ensureNotifier() {
296
+ if (notifierInstance) return notifierInstance;
297
+ notifierInstance = createNotifier({
298
+ provider: (process.env.NOTIFY_PROVIDER || 'ntfy').toLowerCase(),
299
+ topic: process.env.NTFY_TOPIC || '',
300
+ pushoverUser: process.env.PUSHOVER_USER || '',
301
+ pushoverToken: process.env.PUSHOVER_TOKEN || '',
302
+ });
303
+ return notifierInstance;
304
+ }
305
+ function notifyStatusText() {
306
+ const provider = (process.env.NOTIFY_PROVIDER || 'ntfy').toLowerCase();
307
+ const hasTopic = provider === 'ntfy' ? Boolean(process.env.NTFY_TOPIC) : (provider === 'pushover' ? Boolean(process.env.PUSHOVER_USER && process.env.PUSHOVER_TOKEN) : true);
308
+ const mode = notifyUserOptIn ? 'always' : 'empty-channel only';
309
+ const config = hasTopic ? 'configured' : 'NOT configured';
310
+ return `notify: ${mode} via ${provider} (${config}). Threshold: ${process.env.NOTIFY_MIN_TASK_MS || '60000'}ms.`;
311
+ }
312
+ async function getVoiceChannelHumanCount() {
313
+ if (!activeVoiceChannelId) return 0;
314
+ try {
315
+ const ch = await client.channels.fetch(activeVoiceChannelId).catch(() => null);
316
+ if (!ch || !ch.members) return 0;
317
+ let count = 0;
318
+ for (const [, m] of ch.members) if (!m.user?.bot) count += 1;
319
+ return count;
320
+ } catch (e) {
321
+ warn('humanCount failed', e?.message || e);
322
+ return 0;
323
+ }
324
+ }
325
+ async function maybeNotifyTaskComplete({ answer, label, elapsedMs, guildId }) {
326
+ const provider = (process.env.NOTIFY_PROVIDER || '').toLowerCase();
327
+ if (!provider || provider === 'noop') return;
328
+ const minTaskMs = Number(process.env.NOTIFY_MIN_TASK_MS || '60000');
329
+ const humanCount = await getVoiceChannelHumanCount();
330
+ const notifier = ensureNotifier();
331
+ if (!notifier.shouldNotify({ humanCount, taskMs: elapsedMs, minTaskMs, userOptIn: notifyUserOptIn })) return;
332
+ const text = String(answer || '').trim();
333
+ const lastSentence = text.split(/(?<=[.!?。!?])\s+/).filter(Boolean).pop() || text;
334
+ const body = lastSentence.slice(0, 200);
335
+ const title = label ? `${label} finished` : 'VerbalCoding finished';
336
+ const deepLink = buildDiscordDeepLink({ guildId, channelId: activeVoiceChannelId });
337
+ try {
338
+ const result = await notifier.send({ title, body, deepLink });
339
+ log('notify sent', 'provider', provider, 'status', result?.status || result?.ok, 'skipped', result?.skipped || false);
340
+ } catch (e) {
341
+ warn('notify send failed', e?.message || e);
342
+ }
343
+ }
344
+
345
+ const planStates = new Map(); // channelId -> { steps, language }
346
+
347
+ function planChannelKey() {
348
+ return activeVoiceChannelId || settings.transcriptChannelId || 'default';
349
+ }
350
+
351
+ async function askNextDecision(state, signal) {
352
+ const decision = state.decisions[state.pendingDecisionIndex];
353
+ if (!decision) return;
354
+ const text = renderDecisionPrompt(decision, state.language);
355
+ await sendText(`❓ ${text}`);
356
+ await speakText(text, signal, null);
357
+ }
358
+
359
+ async function finalizePlanReady(state, signal) {
360
+ const language = state.language;
361
+ const resolvedLine = renderResolvedDecisions(state.resolvedDecisions, language);
362
+ const plan = planNarrationLines(state.steps, language);
363
+ const tail = /^en/i.test(String(language || ''))
364
+ ? `${plan}\n${resolvedLine}\nSay "approve" to run, or edit with skip/insert.`
365
+ : `${plan}\n${resolvedLine}\n"실행"이라고 하면 시작할게. skip/insert로 수정도 돼.`;
366
+ await sendText(`📝 ${tail}`);
367
+ await speakText(tail, signal, null);
368
+ }
369
+
370
+ async function dispatchPlanModeUtterance(prompt, signal) {
371
+ const language = settings.voiceLanguage;
372
+ const key = planChannelKey();
373
+ const existing = planStates.get(key);
374
+
375
+ if (existing && existing.pendingDecisionIndex < existing.decisions.length) {
376
+ const decision = existing.decisions[existing.pendingDecisionIndex];
377
+ const answer = parseDecisionAnswer(prompt, decision, language);
378
+ if (answer.type === 'unknown') {
379
+ await sendText(/^en/i.test(String(language || ''))
380
+ ? '⚠️ I did not catch that. Please pick an option.'
381
+ : '⚠️ 못 알아들었어. 옵션 중에 하나 골라줘.');
382
+ await askNextDecision(existing, signal);
383
+ return { handled: true };
384
+ }
385
+ const next = {
386
+ ...existing,
387
+ resolvedDecisions: { ...existing.resolvedDecisions, [decision.slot]: answer.choice },
388
+ pendingDecisionIndex: existing.pendingDecisionIndex + 1,
389
+ };
390
+ planStates.set(key, next);
391
+ if (next.pendingDecisionIndex < next.decisions.length) {
392
+ await askNextDecision(next, signal);
393
+ } else {
394
+ await finalizePlanReady(next, signal);
395
+ }
396
+ return { handled: true };
397
+ }
398
+
399
+ if (existing) {
400
+ const cmd = parsePlanVoiceCommand(prompt, language);
401
+ if (cmd.type === 'skip' || cmd.type === 'insert') {
402
+ const nextSteps = applyPlanCommand(existing.steps, cmd);
403
+ planStates.set(key, { ...existing, steps: nextSteps });
404
+ await finalizePlanReady({ ...existing, steps: nextSteps }, signal);
405
+ return { handled: true };
406
+ }
407
+ if (cmd.type === 'cancel') {
408
+ planStates.delete(key);
409
+ const msg = /^en/i.test(String(language || '')) ? 'Plan cancelled.' : '계획을 취소했어.';
410
+ await sendText(`❎ ${msg}`);
411
+ await speakText(msg, signal, null);
412
+ return { handled: true };
413
+ }
414
+ if (cmd.type === 'approve') {
415
+ const finalPlan = renderFinalPlan(existing.steps);
416
+ const resolvedLine = renderResolvedDecisions(existing.resolvedDecisions, language);
417
+ const promptToRun = [
418
+ planExecutionPreamble(language),
419
+ '',
420
+ finalPlan,
421
+ resolvedLine,
422
+ '',
423
+ `Original user request: ${existing.originalPrompt}`,
424
+ ].filter(Boolean).join('\n');
425
+ planStates.delete(key);
426
+ const note = /^en/i.test(String(language || '')) ? 'Running the plan now.' : '계획대로 실행할게.';
427
+ await sendText(`▶ ${note}`);
428
+ await speakText(note, signal, null);
429
+ return { handled: false, prompt: promptToRun };
430
+ }
431
+ planStates.delete(key);
432
+ return { handled: false, prompt };
433
+ }
434
+
435
+ if (isPlanEntryUtterance(prompt, language)) {
436
+ const planPrompt = `${planModePreamble(language)}\n\nUser request: ${prompt}`;
437
+ const adapter = adapterForProjectSession(resolveProjectSessionForChannel(planChannelKey()));
438
+ const plan = { task: false, label: adapter.label, verboseProgress: false, language, projectContext: '' };
439
+ const result = await adapter.run(planPrompt, signal, plan).catch(e => ({ answer: '', error: e }));
440
+ const { steps, decisions } = parsePlanOutput(result.answer || '');
441
+ if (!steps.length) {
442
+ const failMsg = /^en/i.test(String(language || ''))
443
+ ? 'I could not produce a plan. Continuing as a regular turn.'
444
+ : '계획을 만들지 못했어. 일반 작업으로 진행할게.';
445
+ await sendText(`⚠️ ${failMsg}`);
446
+ return { handled: false, prompt };
447
+ }
448
+ const state = {
449
+ steps,
450
+ decisions,
451
+ resolvedDecisions: {},
452
+ pendingDecisionIndex: 0,
453
+ originalPrompt: prompt,
454
+ language,
455
+ };
456
+ planStates.set(planChannelKey(), state);
457
+ const narration = planNarrationLines(steps, language);
458
+ await sendText(`📝 ${narration}`);
459
+ await speakText(narration, signal, null);
460
+ if (decisions.length) {
461
+ await askNextDecision(state, signal);
462
+ } else {
463
+ await finalizePlanReady(state, signal);
464
+ }
465
+ return { handled: true };
466
+ }
467
+ return { handled: false, prompt };
468
+ }
469
+
470
+ function planNarrationLines(steps, language) {
471
+ const visible = steps.filter(s => s.status !== 'skipped');
472
+ const header = /^en/i.test(String(language || ''))
473
+ ? `Plan with ${visible.length} steps. Say "skip step N", "add X after step N", or "approve" to run.`
474
+ : `${visible.length}단계 계획. "step N 건너뛰어", "step N 다음에 X 추가", "실행"이라고 말해줘.`;
475
+ const body = visible.map((s, i) => `${i + 1}. ${s.text}`).join('\n');
476
+ return `${header}\n${body}`;
477
+ }
478
+
479
+ let smartProgressEnabled = Boolean(process.env.SMART_PROGRESS_API_KEY);
480
+ let smartProgressSummarizer = null;
481
+ function ensureSmartProgressSummarizer() {
482
+ if (smartProgressSummarizer) return smartProgressSummarizer;
483
+ smartProgressSummarizer = createSmartProgressSummarizer({
484
+ apiKey: process.env.SMART_PROGRESS_API_KEY || '',
485
+ baseUrl: process.env.SMART_PROGRESS_BASE_URL || 'https://api.groq.com/openai/v1',
486
+ model: process.env.SMART_PROGRESS_MODEL || 'llama-3.1-8b-instant',
487
+ language: settings.voiceLanguage,
488
+ });
489
+ smartProgressSummarizer.on('summary', summary => {
490
+ if (!summary || !activeProgressSignal) return;
491
+ queueVerboseProgressSpeech(summary, activeProgressSignal);
492
+ });
493
+ return smartProgressSummarizer;
494
+ }
495
+ function smartProgressStatusText() {
496
+ const hasKey = Boolean(process.env.SMART_PROGRESS_API_KEY);
497
+ const mode = smartProgressEnabled && hasKey ? 'on' : 'off';
498
+ const reason = !hasKey ? ' (no SMART_PROGRESS_API_KEY set)' : '';
499
+ return `smart-progress: ${mode}${reason}`;
500
+ }
266
501
  let activeProgressLastEventAt = 0;
267
502
  let lastVerboseProgressText = '';
268
503
  let lastVerboseProgressTextAt = 0;
@@ -283,7 +518,17 @@ function createBridgeAgentAdapter(agentSettings) {
283
518
  if (!verboseProgress) return;
284
519
  activeProgressLastEventAt = Date.now();
285
520
  sendVerboseProgressText(event, activeProgressSignal);
286
- queueVerboseProgressSpeech(event, activeProgressSignal);
521
+ if (smartProgressEnabled && process.env.SMART_PROGRESS_API_KEY) {
522
+ try { ensureSmartProgressSummarizer().ingest(event); }
523
+ catch (e) { warn('smart progress ingest failed', e?.stack || e); queueVerboseProgressSpeech(event, activeProgressSignal); }
524
+ } else {
525
+ queueVerboseProgressSpeech(event, activeProgressSignal);
526
+ }
527
+ },
528
+ onStdoutChunk: chunk => {
529
+ if (activeSentencer) {
530
+ try { activeSentencer.push(chunk); } catch (e) { warn('streaming sentencer push failed', e?.stack || e); }
531
+ }
287
532
  },
288
533
  });
289
534
  }
@@ -764,6 +1009,39 @@ async function speakText(text, signal, metricsTurn = null, options = {}) {
764
1009
  metricsTurn?.stage('tts_total', Date.now() - ttsStart);
765
1010
  }
766
1011
 
1012
+ function beginStreamingTurn(signal) {
1013
+ if (!STREAMING_TTS_ENABLED || !connection) return false;
1014
+ streamingSpeechDelivered = false;
1015
+ const sentencer = createSentencer({ minChars: 40, maxLatencyMs: 800 });
1016
+ const queue = createStreamingTTSQueue({
1017
+ synth: async text => synthTTS(text, signal),
1018
+ play: async file => playAudio(file, { deleteAfter: false }),
1019
+ cleanup: async file => { try { await fs.promises.rm(file, { force: true }); } catch {} },
1020
+ signal,
1021
+ log,
1022
+ });
1023
+ sentencer.on('sentence', text => {
1024
+ if (signal?.aborted) return;
1025
+ queue.enqueue(text);
1026
+ });
1027
+ activeSentencer = sentencer;
1028
+ activeStreamingQueue = queue;
1029
+ log('streaming turn begin');
1030
+ return true;
1031
+ }
1032
+
1033
+ async function endStreamingTurn() {
1034
+ const sentencer = activeSentencer;
1035
+ const queue = activeStreamingQueue;
1036
+ activeSentencer = null;
1037
+ activeStreamingQueue = null;
1038
+ if (!sentencer || !queue) return;
1039
+ try { sentencer.flush(); } catch (e) { warn('streaming sentencer flush failed', e?.stack || e); }
1040
+ try { await queue.drain(); } catch (e) { warn('streaming queue drain failed', e?.stack || e); }
1041
+ streamingSpeechDelivered = queue.size === 0;
1042
+ log('streaming turn end');
1043
+ }
1044
+
767
1045
  async function speakProgress(text, signal) {
768
1046
  if (signal?.aborted) return;
769
1047
  try {
@@ -883,6 +1161,9 @@ async function handleTextAgentMessage(msg, text, { speakResponse = false } = {})
883
1161
  activeTranscriptChannelId = session?.transcriptChannelId || msg.channelId;
884
1162
  const selectedAgentAdapter = adapterForProjectSession(session);
885
1163
  const projectContext = projectSessionContextText(session);
1164
+ const recentDiscordContext = formatRecentDiscordContext(recentDiscordTextByChannel, {
1165
+ channelId: activeTranscriptChannelId,
1166
+ });
886
1167
  const plan = {
887
1168
  task: true,
888
1169
  label: selectedAgentAdapter.label,
@@ -890,6 +1171,7 @@ async function handleTextAgentMessage(msg, text, { speakResponse = false } = {})
890
1171
  language: settings.voiceLanguage,
891
1172
  cwd: session?.workdir,
892
1173
  projectContext,
1174
+ recentDiscordContext,
893
1175
  };
894
1176
  const sessionBefore = selectedAgentAdapter.readSessionId?.();
895
1177
  log('text agent request start', selectedAgentAdapter.label, sessionBefore ? 'resume-existing-session' : 'new-session', 'verbose', verboseProgress, session ? `project=${session.slug}` : 'project=default');
@@ -1232,8 +1514,22 @@ async function handleRecording(userId, wavPath, pcmBytes, segments = 1, metricsT
1232
1514
  return;
1233
1515
  }
1234
1516
  }
1517
+ let promptForAgent = prompt;
1518
+ try {
1519
+ const planOutcome = await dispatchPlanModeUtterance(prompt, signal);
1520
+ if (planOutcome.handled) {
1521
+ metricsTurn?.finish({ status: 'plan_mode' });
1522
+ return;
1523
+ }
1524
+ if (planOutcome.prompt) promptForAgent = planOutcome.prompt;
1525
+ } catch (e) {
1526
+ warn('plan mode dispatch failed', e?.stack || e);
1527
+ }
1235
1528
  const selectedAgentAdapter = adapterForProjectSession(session);
1236
1529
  const projectContext = projectSessionContextText(session);
1530
+ const recentDiscordContext = formatRecentDiscordContext(recentDiscordTextByChannel, {
1531
+ channelId: activeTranscriptChannelId,
1532
+ });
1237
1533
  const plan = {
1238
1534
  task: true,
1239
1535
  label: selectedAgentAdapter.label,
@@ -1241,6 +1537,7 @@ async function handleRecording(userId, wavPath, pcmBytes, segments = 1, metricsT
1241
1537
  language: settings.voiceLanguage,
1242
1538
  cwd: session?.workdir,
1243
1539
  projectContext,
1540
+ recentDiscordContext,
1244
1541
  };
1245
1542
  log('Agent plan', plan.label, 'backend', selectedAgentAdapter.backend, 'task', plan.task, 'language', plan.language, session ? `project=${session.slug}` : 'project=default');
1246
1543
  const agentStart = Date.now();
@@ -1248,7 +1545,8 @@ async function handleRecording(userId, wavPath, pcmBytes, segments = 1, metricsT
1248
1545
  activeProgressAbortController = progressController;
1249
1546
  activeProgressSignal = progressController.signal;
1250
1547
  activeProgressLastEventAt = Date.now();
1251
- const agentPromise = selectedAgentAdapter.ask(prompt, signal, plan);
1548
+ const streamingTurnActive = beginStreamingTurn(signal);
1549
+ const agentPromise = selectedAgentAdapter.ask(promptForAgent, signal, plan);
1252
1550
  let done = false;
1253
1551
  // Status announcements share one queue with verbose progress so they never
1254
1552
  // talk over each other. In verbose mode, skip the generic initial prompt;
@@ -1292,6 +1590,7 @@ async function handleRecording(userId, wavPath, pcmBytes, segments = 1, metricsT
1292
1590
  if (!isAbortError(e)) warn('progress loop failed', e?.stack || e);
1293
1591
  });
1294
1592
  const answer = await agentPromise.finally(() => { done = true; });
1593
+ if (streamingTurnActive) await endStreamingTurn();
1295
1594
  metricsTurn?.stage('agent', Date.now() - agentStart, { answerChars: String(answer || '').length, backend: selectedAgentAdapter.backend });
1296
1595
  void progressLoop;
1297
1596
  if (interruptedTurns.has(turnId) || signal.aborted) { metricsTurn?.finish({ status: 'aborted_after_agent' }); return; }
@@ -1306,7 +1605,20 @@ async function handleRecording(userId, wavPath, pcmBytes, segments = 1, metricsT
1306
1605
  }
1307
1606
  log('spoken answer', spokenAnswer.slice(0, 200));
1308
1607
  stopProgressSpeech(progressController.signal, 'agent-answer-ready');
1309
- await speakText(spokenAnswer, signal, metricsTurn, { mirrorText: !answerTextDelivered });
1608
+ if (streamingTurnActive && streamingSpeechDelivered) {
1609
+ log('skipping post-run speakText; streaming already delivered audio');
1610
+ } else {
1611
+ await speakText(spokenAnswer, signal, metricsTurn, { mirrorText: !answerTextDelivered });
1612
+ }
1613
+ try {
1614
+ const guildId = client.channels.cache.get(activeVoiceChannelId)?.guild?.id || '';
1615
+ await maybeNotifyTaskComplete({
1616
+ answer: spokenAnswer || answer,
1617
+ label: selectedAgentAdapter.label,
1618
+ elapsedMs: Date.now() - agentStart,
1619
+ guildId,
1620
+ });
1621
+ } catch (e) { warn('maybeNotifyTaskComplete failed', e?.message || e); }
1310
1622
  metricsTurn?.finish({ status: 'ok' });
1311
1623
  } catch (e) {
1312
1624
  if (isAbortError(e) || interruptedTurns.has(turnId)) {
@@ -1617,6 +1929,11 @@ client.on('messageCreate', async msg => {
1617
1929
  if (msg.author.bot) return;
1618
1930
  if (!isAllowed(msg.author.id)) return;
1619
1931
  const content = msg.content.trim();
1932
+ appendRecentDiscordText(recentDiscordTextByChannel, {
1933
+ channelId: msg.channelId,
1934
+ authorLabel: msg.member?.displayName || msg.author?.username || 'user',
1935
+ content,
1936
+ });
1620
1937
  const projectSessionCommand = parseProjectSessionCommand(content);
1621
1938
  if (projectSessionCommand) {
1622
1939
  try {
@@ -1637,6 +1954,24 @@ client.on('messageCreate', async msg => {
1637
1954
  setVerboseProgress(false, 'discord-command');
1638
1955
  return void msg.reply(verboseStatusText());
1639
1956
  }
1957
+ if (content === '!notify') return void msg.reply(notifyStatusText());
1958
+ if (['!notify on', '!notify always', '!notify 1'].includes(content.toLowerCase())) {
1959
+ notifyUserOptIn = true;
1960
+ return void msg.reply(notifyStatusText());
1961
+ }
1962
+ if (['!notify off', '!notify auto', '!notify 0'].includes(content.toLowerCase())) {
1963
+ notifyUserOptIn = false;
1964
+ return void msg.reply(notifyStatusText());
1965
+ }
1966
+ if (content === '!smart-progress' || content === '!smart_progress') return void msg.reply(smartProgressStatusText());
1967
+ if (['!smart-progress on', '!smart-progress true', '!smart-progress 1', '!smart_progress on'].includes(content.toLowerCase())) {
1968
+ smartProgressEnabled = true;
1969
+ return void msg.reply(smartProgressStatusText());
1970
+ }
1971
+ if (['!smart-progress off', '!smart-progress false', '!smart-progress 0', '!smart_progress off'].includes(content.toLowerCase())) {
1972
+ smartProgressEnabled = false;
1973
+ return void msg.reply(smartProgressStatusText());
1974
+ }
1640
1975
  if (content === '!sensitivity') return void msg.reply(sensitivityStatusText());
1641
1976
  if (content === '!latency' || content === '!metrics') {
1642
1977
  const summary = summarizeLatencyRecords(readJsonlRecords(settings.latencyLogPath, { limit: 200 }));