npm - verbalcoding - Versions diffs - 0.2.12 → 0.2.13 - Mend

verbalcoding 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

package/.env.example +74 -4
package/README.es.md +3 -1
package/README.fr.md +3 -1
package/README.ja.md +3 -1
package/README.ko.md +4 -2
package/README.md +4 -2
package/README.ru.md +3 -1
package/README.zh.md +3 -1
package/app-node/agent_adapters.test.mjs +14 -0
package/app-node/agent_routing.mjs +148 -0
package/app-node/agent_routing.test.mjs +138 -0
package/app-node/agent_turn.mjs +86 -0
package/app-node/agent_turn.test.mjs +109 -0
package/app-node/bridge_context.mjs +73 -0
package/app-node/bridge_context.test.mjs +54 -0
package/app-node/bridge_state.mjs +4 -0
package/app-node/bridge_wireup.test.mjs +462 -0
package/app-node/cli_install.test.mjs +31 -0
package/app-node/cross_agent_routing.test.mjs +78 -0
package/app-node/discord_command_router.mjs +204 -0
package/app-node/discord_command_router.test.mjs +311 -0
package/app-node/discord_voice_setup.mjs +251 -0
package/app-node/discord_voice_setup.test.mjs +86 -0
package/app-node/hermes_profiles.test.mjs +12 -1
package/app-node/install_config.mjs +110 -3
package/app-node/install_config.test.mjs +8 -0
package/app-node/instance_doctor.test.mjs +9 -0
package/app-node/instances.test.mjs +8 -1
package/app-node/main.mjs +488 -1368
package/app-node/mcp_tools.test.mjs +7 -0
package/app-node/notification_handler.mjs +89 -0
package/app-node/notification_handler.test.mjs +187 -0
package/app-node/plan_dispatcher.mjs +215 -0
package/app-node/plan_dispatcher.test.mjs +101 -0
package/app-node/plan_mode.mjs +36 -7
package/app-node/plan_mode.test.mjs +78 -0
package/app-node/progress_handler.mjs +220 -0
package/app-node/progress_handler.test.mjs +193 -0
package/app-node/progress_speech.mjs +54 -32
package/app-node/progress_speech.test.mjs +12 -3
package/app-node/project_sessions.mjs +5 -2
package/app-node/project_sessions.test.mjs +7 -0
package/app-node/research_mode.mjs +282 -0
package/app-node/research_mode.test.mjs +264 -0
package/app-node/restart_notice.mjs +3 -0
package/app-node/restart_notice.test.mjs +11 -0
package/app-node/session_ontology.mjs +271 -0
package/app-node/session_ontology.test.mjs +130 -0
package/app-node/smart_progress.mjs +1 -1
package/app-node/stream_sentencer.mjs +32 -2
package/app-node/stream_sentencer.test.mjs +65 -0
package/app-node/streaming_tts_queue.mjs +5 -1
package/app-node/streaming_tts_queue.test.mjs +7 -1
package/app-node/stt_whisper.mjs +24 -0
package/app-node/stt_whisper.test.mjs +32 -0
package/app-node/text_routing.mjs +4 -2
package/app-node/tts_backends.mjs +537 -3
package/app-node/tts_backends.test.mjs +454 -0
package/app-node/tts_player.mjs +164 -0
package/app-node/tts_player.test.mjs +202 -0
package/app-node/tts_runtime.mjs +134 -0
package/app-node/tts_runtime.test.mjs +89 -0
package/app-node/tts_settings.mjs +150 -3
package/app-node/tts_settings.test.mjs +204 -0
package/app-node/tts_voice_config.mjs +136 -2
package/app-node/tts_voice_config.test.mjs +94 -0
package/app-node/utterance_router.mjs +216 -0
package/app-node/utterance_router.test.mjs +236 -0
package/app-node/voice_autojoin.mjs +37 -0
package/app-node/voice_autojoin.test.mjs +59 -0
package/app-node/voice_io.mjs +272 -0
package/app-node/voice_io.test.mjs +102 -0
package/app-node/voice_turn_runner.mjs +449 -0
package/app-node/voice_turn_runner.test.mjs +289 -0
package/docs/CONFIGURATION.md +12 -2
package/docs/HARNESSES.md +58 -0
package/docs/HARNESS_AIDER.md +50 -0
package/docs/HARNESS_CLAUDE.md +56 -0
package/docs/HARNESS_CODEX.md +56 -0
package/docs/HARNESS_CURSOR.md +45 -0
package/docs/HARNESS_GEMINI.md +45 -0
package/docs/HARNESS_HERMES.md +57 -0
package/docs/HARNESS_OPENCLAW.md +44 -0
package/docs/HARNESS_OPENCODE.md +44 -0
package/docs/README.md +1 -0
package/docs/ROADMAP.md +20 -5
package/docs/TTS_BACKENDS.md +227 -0
package/docs/USAGE.md +22 -0
package/docs/i18n/AGENTS.es.md +34 -0
package/docs/i18n/AGENTS.fr.md +34 -0
package/docs/i18n/AGENTS.ja.md +34 -0
package/docs/i18n/AGENTS.ko.md +34 -0
package/docs/i18n/AGENTS.ru.md +34 -0
package/docs/i18n/AGENTS.zh.md +34 -0
package/docs/i18n/HARNESSES.es.md +58 -0
package/docs/i18n/HARNESSES.fr.md +58 -0
package/docs/i18n/HARNESSES.ja.md +58 -0
package/docs/i18n/HARNESSES.ko.md +58 -0
package/docs/i18n/HARNESSES.ru.md +58 -0
package/docs/i18n/HARNESSES.zh.md +58 -0
package/docs/i18n/HARNESS_AIDER.es.md +48 -0
package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
package/docs/i18n/HARNESS_CODEX.es.md +55 -0
package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
package/docs/i18n/HARNESS_HERMES.es.md +54 -0
package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
package/integrations/fireredtts2/mlx_llm.py +183 -0
package/integrations/fireredtts2/synth.py +156 -0
package/integrations/fireredtts2/synth_mlx.py +196 -0
package/integrations/mlxaudio/synth.py +74 -0
package/integrations/neuttsair/synth.py +104 -0
package/integrations/omnivoice/synth.py +110 -0
package/package.json +6 -1
package/scripts/cli.mjs +84 -0
package/scripts/doctor.mjs +104 -4
package/scripts/install.mjs +5 -1
package/scripts/install_fireredtts2.sh +109 -0
package/scripts/install_mlxaudio.sh +34 -0
package/scripts/install_mossttsnano.sh +46 -0
package/scripts/postinstall.mjs +34 -0

package/app-node/tts_backends.test.mjs CHANGED Viewed

@@ -47,6 +47,93 @@ function baseSettings() {
       intraOpThreads: '',
       interOpThreads: '',
     },
+    omnivoice: {
+      python: '/project/.venv-omnivoice/bin/python',
+      model: 'k2-fsa/OmniVoice',
+      device: 'mps',
+      dtype: 'float16',
+      refAudio: '/project/voice-samples/me.wav',
+      refText: '테스트 기준 음성입니다.',
+      language: 'ko',
+      speaker: 'warm korean male voice',
+      timeoutMs: 180000,
+      useForProgress: false,
+    },
+    qwen3tts: {
+      command: 'audio',
+      mode: 'custom',
+      model: '',
+      language: 'korean',
+      speaker: 'sohee',
+      instruct: 'calm conversational Korean',
+      refAudio: '/project/voice-samples/me.wav',
+      refText: '테스트 기준 음성입니다.',
+      stream: true,
+      timeoutMs: 120000,
+      useForProgress: false,
+    },
+    fireredtts2: {
+      command: 'fireredtts2',
+      pretrainedDir: '/project/models/FireRedTTS2',
+      device: 'mps',
+      genType: 'monologue',
+      speaker: 'S1',
+      promptAudio: '/project/voice-samples/me.wav',
+      promptText: '테스트 기준 음성입니다.',
+      useBf16: true,
+      timeoutMs: 180000,
+      useForProgress: false,
+    },
+    mossttsnano: {
+      command: 'python3',
+      script: '/project/vendor/MOSS-TTS-Nano/infer.py',
+      checkpoint: 'OpenMOSS-Team/MOSS-TTS-Nano',
+      audioTokenizer: 'OpenMOSS-Team/MOSS-Audio-Tokenizer-Nano',
+      mode: 'voice_clone',
+      language: 'ko',
+      device: 'cpu',
+      dtype: 'float32',
+      promptAudio: '/project/voice-samples/me.wav',
+      promptText: '테스트 기준 음성입니다.',
+      maxNewFrames: 256,
+      seed: '7',
+      timeoutMs: 120000,
+      useForProgress: false,
+    },
+    mossttsnano_mlx: {
+      python: 'python3',
+      script: '/project/integrations/mossttsnano_mlx/synth.py',
+      workerScript: '/project/integrations/mossttsnano_mlx/worker.py',
+      workerEnabled: false,
+      workerStartupTimeoutMs: 120000,
+      torchInferScript: '/project/vendor/MOSS-TTS-Nano/infer.py',
+      checkpoint: 'OpenMOSS-Team/MOSS-TTS-Nano',
+      audioTokenizer: 'OpenMOSS-Team/MOSS-Audio-Tokenizer-Nano',
+      mode: 'voice_clone',
+      language: 'ko',
+      torchDevice: 'cpu',
+      torchDtype: 'float32',
+      promptAudio: '/project/voice-samples/me.wav',
+      promptText: '테스트 기준 음성입니다.',
+      maxNewFrames: 120,
+      seed: '7',
+      timeoutMs: 180000,
+      useForProgress: false,
+    },
+    neuttsair: {
+      python: '/project/.venv-neuttsair/bin/python',
+      script: '/project/integrations/neuttsair/synth.py',
+      backboneRepo: 'neuphonic/neutts-air-q4-gguf',
+      backboneDevice: 'mps',
+      codecRepo: 'neuphonic/neucodec',
+      codecDevice: 'mps',
+      refAudio: '/project/voice-samples/me.wav',
+      refText: 'Reference voice text.',
+      language: 'en',
+      sampleRate: 24000,
+      timeoutMs: 120000,
+      useForProgress: false,
+    },
   };
 }
@@ -146,6 +233,29 @@ test('OpenVoice progress uses Edge fallback unless explicitly enabled', async ()
   assert.equal(calls[0].cmd, 'edge-tts');
 });
+test('createTtsBackend forwards backend label to onFallback when non-edge backend errors', async () => {
+  const settings = { ...baseSettings(), backend: 'openvoice' };
+  const events = [];
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 123 }),
+    warn: () => {},
+    onFallback: payload => events.push(payload),
+    execFileAsync: async cmd => {
+      if (cmd.includes('.venv-openvoice')) throw new Error('openvoice missing');
+    },
+  });
+  await backend.synthesize('first', { kind: 'final' });
+  await backend.synthesize('second', { kind: 'final' });
+  assert.equal(events.length, 2);
+  assert.equal(events[0].backend, 'openvoice');
+  assert.equal(events[0].kind, 'final');
+  assert.ok(events[0].error instanceof Error);
+});
 test('OpenVoice final synthesis falls back to Edge when wrapper fails', async () => {
   const calls = [];
   const settings = { ...baseSettings(), backend: 'openvoice' };
@@ -384,6 +494,350 @@ test('Supertonic falls back to Edge when local CLI fails', async () => {
   assert.ok(calls.some(call => /supertonic failed; falling back to edge/i.test(call.warn || '')));
 });
+test('OmniVoice backend calls Python wrapper with model, reference sample, and output path', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'omnivoice' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 999 }),
+    execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
+  });
+  const out = await backend.synthesize('옴니보이스 테스트', { kind: 'final' });
+  assert.equal(calls[0].cmd, '/project/.venv-omnivoice/bin/python');
+  assert.ok(calls[0].args.some(arg => String(arg).endsWith('integrations/omnivoice/synth.py')));
+  assert.ok(calls[0].args.includes('--model'));
+  assert.ok(calls[0].args.includes('k2-fsa/OmniVoice'));
+  assert.ok(calls[0].args.includes('--ref-audio'));
+  assert.ok(calls[0].args.includes('/project/voice-samples/me.wav'));
+  assert.ok(calls[0].args.includes('--ref-text'));
+  assert.ok(calls[0].args.includes('테스트 기준 음성입니다.'));
+  assert.ok(calls[0].args.includes('--speaker'));
+  assert.ok(calls[0].args.includes('warm korean male voice'));
+  assert.ok(calls[0].args.includes('--text'));
+  assert.ok(calls[0].args.includes('옴니보이스 테스트'));
+  assert.equal(calls[0].options.timeout, 180000);
+  assert.match(out, /^\/tmp\/verbalcoding-omnivoice-/);
+  assert.deepEqual(backend.cacheKeyParts(), ['omnivoice', 'k2-fsa/OmniVoice', 'mps', 'float16', '/project/voice-samples/me.wav', '테스트 기준 음성입니다.', 'ko', 'warm korean male voice']);
+});
+test('OmniVoice progress uses Edge fallback unless explicitly enabled', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'omnivoice' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 123 }),
+    execFileAsync: async (cmd, args) => calls.push({ cmd, args }),
+  });
+  await backend.synthesize('진행 안내', { kind: 'progress' });
+  assert.equal(calls[0].cmd, 'edge-tts');
+});
+test('OmniVoice falls back to Edge when Python wrapper fails', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'omnivoice' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 123 }),
+    warn: (...args) => calls.push({ warn: args.join(' ') }),
+    execFileAsync: async (cmd, args) => {
+      calls.push({ cmd, args });
+      if (cmd.includes('.venv-omnivoice')) throw new Error('omnivoice missing');
+    },
+  });
+  await backend.synthesize('fallback', { kind: 'final' });
+  assert.ok(calls.some(call => call.cmd?.includes('.venv-omnivoice')));
+  assert.ok(calls.some(call => call.cmd === 'edge-tts'));
+  assert.ok(calls.some(call => /omnivoice failed; falling back to edge/i.test(call.warn || '')));
+});
+test('Qwen3 TTS backend calls audio CLI with qwen3 engine, speaker, language, and output path', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'qwen3tts' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 999 }),
+    execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
+  });
+  const out = await backend.synthesize('큐웬 티티에스 테스트', { kind: 'final' });
+  assert.equal(calls[0].cmd, 'audio');
+  assert.deepEqual(calls[0].args.slice(0, 5), ['speak', '큐웬 티티에스 테스트', '--engine', 'qwen3', '--output']);
+  assert.ok(calls[0].args.includes('--language'));
+  assert.ok(calls[0].args.includes('korean'));
+  assert.ok(calls[0].args.includes('--stream'));
+  assert.ok(calls[0].args.includes('--model'));
+  assert.ok(calls[0].args.includes('customVoice'));
+  assert.ok(calls[0].args.includes('--speaker'));
+  assert.ok(calls[0].args.includes('sohee'));
+  assert.ok(calls[0].args.includes('--instruct'));
+  assert.ok(calls[0].args.includes('calm conversational Korean'));
+  assert.equal(calls[0].options.timeout, 120000);
+  assert.match(out, /^\/tmp\/verbalcoding-qwen3tts-/);
+  assert.deepEqual(backend.cacheKeyParts(), ['qwen3tts', 'audio', 'custom', 'korean', 'sohee', 'calm conversational Korean', '/project/voice-samples/me.wav', '테스트 기준 음성입니다.']);
+});
+test('Qwen3 TTS clone mode passes reference audio', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'qwen3tts', qwen3tts: { ...baseSettings().qwen3tts, mode: 'clone' } };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 999 }),
+    execFileAsync: async (cmd, args) => calls.push({ cmd, args }),
+  });
+  await backend.synthesize('복제 음성 테스트', { kind: 'final' });
+  assert.ok(calls[0].args.includes('--model'));
+  assert.ok(calls[0].args.includes('base'));
+  assert.ok(calls[0].args.includes('--voice-sample'));
+  assert.ok(calls[0].args.includes('/project/voice-samples/me.wav'));
+  assert.equal(calls[0].args.includes('--speaker'), false);
+});
+test('Qwen3 TTS progress uses Edge fallback unless explicitly enabled', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'qwen3tts' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 123 }),
+    execFileAsync: async (cmd, args) => calls.push({ cmd, args }),
+  });
+  await backend.synthesize('진행 안내', { kind: 'progress' });
+  assert.equal(calls[0].cmd, 'edge-tts');
+});
+test('Qwen3 TTS falls back to Edge when local CLI fails', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'qwen3tts' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 123 }),
+    warn: (...args) => calls.push({ warn: args.join(' ') }),
+    execFileAsync: async (cmd, args) => {
+      calls.push({ cmd, args });
+      if (cmd === 'audio') throw new Error('qwen3 tts missing');
+    },
+  });
+  await backend.synthesize('fallback', { kind: 'final' });
+  assert.ok(calls.some(call => call.cmd === 'audio'));
+  assert.ok(calls.some(call => call.cmd === 'edge-tts'));
+  assert.ok(calls.some(call => /qwen3tts failed; falling back to edge/i.test(call.warn || '')));
+});
+test('FireRedTTS-2 backend calls configured CLI with model, prompt, and output path', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'fireredtts2' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 999 }),
+    execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
+  });
+  const out = await backend.synthesize('파이어레드 테스트', { kind: 'final' });
+  assert.equal(calls[0].cmd, 'fireredtts2');
+  assert.deepEqual(calls[0].args.slice(0, 4), ['--text', '파이어레드 테스트', '--output', calls[0].args[3]]);
+  assert.ok(calls[0].args.includes('--pretrained-dir'));
+  assert.ok(calls[0].args.includes('/project/models/FireRedTTS2'));
+  assert.ok(calls[0].args.includes('--prompt-audio'));
+  assert.ok(calls[0].args.includes('/project/voice-samples/me.wav'));
+  assert.ok(calls[0].args.includes('--bf16'));
+  assert.equal(calls[0].options.timeout, 180000);
+  assert.match(out, /^\/tmp\/verbalcoding-fireredtts2-/);
+  assert.deepEqual(backend.cacheKeyParts(), ['fireredtts2', 'fireredtts2', '/project/models/FireRedTTS2', 'mps', 'monologue', 'S1', '/project/voice-samples/me.wav', '테스트 기준 음성입니다.', true]);
+});
+test('FireRedTTS-2 progress uses Edge fallback unless explicitly enabled', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'fireredtts2' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 123 }),
+    execFileAsync: async (cmd, args) => calls.push({ cmd, args }),
+  });
+  await backend.synthesize('진행 안내', { kind: 'progress' });
+  assert.equal(calls[0].cmd, 'edge-tts');
+});
+test('MOSS-TTS-Nano backend calls infer.py with checkpoint, prompt, and output path', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'mossttsnano' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 999 }),
+    execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
+  });
+  const out = await backend.synthesize('모스 나노 테스트', { kind: 'final' });
+  assert.equal(calls[0].cmd, 'python3');
+  assert.deepEqual(calls[0].args.slice(0, 5), ['/project/vendor/MOSS-TTS-Nano/infer.py', '--text', '모스 나노 테스트', '--output-audio-path', calls[0].args[4]]);
+  assert.ok(calls[0].args.includes('--checkpoint'));
+  assert.ok(calls[0].args.includes('OpenMOSS-Team/MOSS-TTS-Nano'));
+  assert.ok(calls[0].args.includes('--audio-tokenizer-pretrained-name-or-path'));
+  assert.ok(calls[0].args.includes('--prompt-audio-path'));
+  assert.ok(calls[0].args.includes('/project/voice-samples/me.wav'));
+  assert.ok(calls[0].args.includes('--max-new-frames'));
+  assert.ok(calls[0].args.includes('256'));
+  assert.equal(calls[0].options.timeout, 120000);
+  assert.match(out, /^\/tmp\/verbalcoding-mossttsnano-/);
+  assert.deepEqual(backend.cacheKeyParts(), ['mossttsnano', 'python3', '/project/vendor/MOSS-TTS-Nano/infer.py', 'OpenMOSS-Team/MOSS-TTS-Nano', 'OpenMOSS-Team/MOSS-Audio-Tokenizer-Nano', 'voice_clone', 'ko', 'cpu', 'float32', '/project/voice-samples/me.wav', '테스트 기준 음성입니다.', 256, '7']);
+});
+test('MOSS-TTS-Nano falls back to Edge when local CLI fails', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'mossttsnano' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 123 }),
+    warn: (...args) => calls.push({ warn: args.join(' ') }),
+    execFileAsync: async (cmd, args) => {
+      calls.push({ cmd, args });
+      if (cmd === 'python3') throw new Error('moss missing');
+    },
+  });
+  await backend.synthesize('fallback', { kind: 'final' });
+  assert.ok(calls.some(call => call.cmd === 'python3'));
+  assert.ok(calls.some(call => call.cmd === 'edge-tts'));
+  assert.ok(calls.some(call => /mossttsnano failed; falling back to edge/i.test(call.warn || '')));
+});
+test('MOSS-TTS-Nano MLX hybrid backend calls experimental synth wrapper', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'mossttsnano_mlx' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 999 }),
+    execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
+  });
+  const out = await backend.synthesize('모스 엠엘엑스 테스트', { kind: 'final' });
+  assert.equal(calls[0].cmd, 'python3');
+  assert.deepEqual(calls[0].args.slice(0, 5), ['/project/integrations/mossttsnano_mlx/synth.py', '--text', '모스 엠엘엑스 테스트', '--output-audio-path', calls[0].args[4]]);
+  assert.ok(calls[0].args.includes('--torch-infer-script'));
+  assert.ok(calls[0].args.includes('/project/vendor/MOSS-TTS-Nano/infer.py'));
+  assert.ok(calls[0].args.includes('--torch-device'));
+  assert.ok(calls[0].args.includes('cpu'));
+  assert.ok(calls[0].args.includes('--torch-dtype'));
+  assert.ok(calls[0].args.includes('float32'));
+  assert.ok(calls[0].args.includes('--prompt-audio-path'));
+  assert.ok(calls[0].args.includes('/project/voice-samples/me.wav'));
+  assert.equal(calls[0].options.timeout, 180000);
+  assert.match(out, /^\/tmp\/verbalcoding-mossttsnano-mlx-/);
+  assert.deepEqual(backend.cacheKeyParts(), ['mossttsnano_mlx', 'subprocess', 'python3', '/project/integrations/mossttsnano_mlx/synth.py', '/project/integrations/mossttsnano_mlx/worker.py', '/project/vendor/MOSS-TTS-Nano/infer.py', 'OpenMOSS-Team/MOSS-TTS-Nano', 'OpenMOSS-Team/MOSS-Audio-Tokenizer-Nano', 'voice_clone', 'ko', 'cpu', 'float32', '/project/voice-samples/me.wav', '테스트 기준 음성입니다.', 120, '7']);
+});
+test('MOSS-TTS-Nano MLX progress uses Edge fallback unless explicitly enabled', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'mossttsnano_mlx' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 123 }),
+    execFileAsync: async (cmd, args) => calls.push({ cmd, args }),
+  });
+  await backend.synthesize('진행 안내', { kind: 'progress' });
+  assert.equal(calls[0].cmd, 'edge-tts');
+});
+test('NeuTTS Air backend calls Python wrapper with GGUF backbone, reference sample, and output path', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'neuttsair' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 999 }),
+    execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
+  });
+  const out = await backend.synthesize('NeuTTS Air test', { kind: 'final' });
+  assert.equal(calls[0].cmd, '/project/.venv-neuttsair/bin/python');
+  assert.deepEqual(calls[0].args.slice(0, 5), ['/project/integrations/neuttsair/synth.py', '--text', 'NeuTTS Air test', '--output', calls[0].args[4]]);
+  assert.ok(calls[0].args.includes('--backbone-repo'));
+  assert.ok(calls[0].args.includes('neuphonic/neutts-air-q4-gguf'));
+  assert.ok(calls[0].args.includes('--backbone-device'));
+  assert.ok(calls[0].args.includes('mps'));
+  assert.ok(calls[0].args.includes('--codec-repo'));
+  assert.ok(calls[0].args.includes('neuphonic/neucodec'));
+  assert.ok(calls[0].args.includes('--codec-device'));
+  assert.ok(calls[0].args.includes('--ref-audio'));
+  assert.ok(calls[0].args.includes('/project/voice-samples/me.wav'));
+  assert.ok(calls[0].args.includes('--ref-text'));
+  assert.ok(calls[0].args.includes('Reference voice text.'));
+  assert.ok(calls[0].args.includes('--language'));
+  assert.ok(calls[0].args.includes('en'));
+  assert.equal(calls[0].options.timeout, 120000);
+  assert.match(out, /^\/tmp\/verbalcoding-neuttsair-/);
+  assert.deepEqual(backend.cacheKeyParts(), ['neuttsair', '/project/.venv-neuttsair/bin/python', '/project/integrations/neuttsair/synth.py', 'neuphonic/neutts-air-q4-gguf', 'mps', 'neuphonic/neucodec', 'mps', '/project/voice-samples/me.wav', 'Reference voice text.', 'en', 24000]);
+});
+test('NeuTTS Air progress uses Edge fallback unless explicitly enabled', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'neuttsair' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 123 }),
+    execFileAsync: async (cmd, args) => calls.push({ cmd, args }),
+  });
+  await backend.synthesize('진행 안내', { kind: 'progress' });
+  assert.equal(calls[0].cmd, 'edge-tts');
+});
+test('NeuTTS Air falls back to Edge when Python wrapper fails', async () => {
+  const calls = [];
+  const settings = { ...baseSettings(), backend: 'neuttsair' };
+  const backend = createTtsBackend(settings, {
+    tmpdir: '/tmp',
+    existsSync: () => true,
+    statSync: () => ({ size: 123 }),
+    warn: (...args) => calls.push({ warn: args.join(' ') }),
+    execFileAsync: async (cmd, args) => {
+      calls.push({ cmd, args });
+      if (cmd.includes('.venv-neuttsair')) throw new Error('neutts missing');
+    },
+  });
+  await backend.synthesize('fallback', { kind: 'final' });
+  assert.ok(calls.some(call => call.cmd?.includes('.venv-neuttsair')));
+  assert.ok(calls.some(call => call.cmd === 'edge-tts'));
+  assert.ok(calls.some(call => /neuttsair failed; falling back to edge/i.test(call.warn || '')));
+});
 test('TTS backends omit signal option when no AbortSignal is provided', async () => {
   const calls = [];
   const backend = createTtsBackend(baseSettings(), {

package/app-node/tts_player.mjs ADDED Viewed

@@ -0,0 +1,164 @@
+// Text-to-speech playback pipeline: chunk text -> synth -> play through the
+// shared @discordjs/voice AudioPlayer, with optional streaming (sentence-by-
+// sentence) playback and barge-in cancellation.
+//
+// Phase 3 extraction from main.mjs. Reads/writes shared bridge state
+// (connection, player, speaking, speechPlaybackGeneration, activeSentencer,
+// activeStreamingQueue, streamingSpeechDelivered, ttsBackend) and calls back
+// into helpers that still live in main.mjs (refreshTtsRuntimeConfig,
+// waitEvent, sendText). Module-level imports keep the heavy dependencies
+// (@discordjs/voice helpers, streaming utilities) out of main.mjs.
+import fs from 'node:fs';
+import { AudioPlayerStatus, StreamType, createAudioResource } from '@discordjs/voice';
+import { splitForTTS } from './tts_chunks.mjs';
+import { playChunkedTTSWithPrefetch } from './tts_prefetch.mjs';
+import { createSentencer } from './stream_sentencer.mjs';
+import { createStreamingTTSQueue } from './streaming_tts_queue.mjs';
+export function createTtsPlayer(deps) {
+  const {
+    bridge,
+    settings,
+    log,
+    warn,
+    sleep,
+    sendText,
+    refreshTtsRuntimeConfig,
+    waitEvent,
+    isAbortError,
+    STREAMING_TTS_ENABLED,
+  } = deps;
+  async function synthTTS(text, signal) {
+    await refreshTtsRuntimeConfig();
+    let lastError = null;
+    for (let attempt = 1; attempt <= 3; attempt += 1) {
+      try {
+        log('final tts synth start', 'backend', bridge.ttsBackend.name, 'attempt', attempt, 'chars', String(text || '').length);
+        const out = await bridge.ttsBackend.synthesize(text, { signal, kind: 'final' });
+        log('final tts synth done', 'backend', bridge.ttsBackend.name, 'attempt', attempt, out, fs.statSync(out).size);
+        return out;
+      } catch (e) {
+        lastError = e;
+        if (isAbortError(e) || signal?.aborted) throw e;
+        warn('final tts synth failed', 'attempt', attempt, e?.stderr?.toString?.().slice(-500) || e?.message || e);
+        await sleep(1000 * attempt);
+      }
+    }
+    throw lastError;
+  }
+  async function playAudio(file, { deleteAfter = true } = {}) {
+    if (!bridge.connection) return;
+    bridge.speaking = true;
+    try {
+      const resource = createAudioResource(file, { inputType: StreamType.Arbitrary, inlineVolume: true });
+      resource.volume?.setVolume(settings.tts.volume);
+      bridge.player.play(resource);
+      bridge.connection.subscribe(bridge.player);
+      await waitEvent(bridge.player, AudioPlayerStatus.Idle, 120000).catch(() => {});
+    } finally {
+      bridge.speaking = false;
+      if (deleteAfter) fs.rm(file, { force: true }, () => {});
+    }
+  }
+  async function speakText(text, signal, metricsTurn = null, options = {}) {
+    const chunks = splitForTTS(text, settings.tts.maxChars);
+    if (!chunks.length) return;
+    if (options.mirrorText !== false) {
+      await sendText(`${options.mirrorPrefix || '🔊 음성으로 읽는 내용'}:\n${String(text || '')}`);
+    }
+    log('TTS chunks', chunks.length, 'maxChars', settings.tts.maxChars, 'backend', bridge.ttsBackend.name);
+    const playbackGeneration = bridge.speechPlaybackGeneration;
+    const playbackStopped = () => playbackGeneration !== bridge.speechPlaybackGeneration;
+    let synthMs = 0;
+    let playMs = 0;
+    const ttsStart = Date.now();
+    await playChunkedTTSWithPrefetch(chunks, {
+      signal,
+      log,
+      synth: async chunk => {
+        if (playbackStopped()) return null;
+        const start = Date.now();
+        try { return await synthTTS(chunk, signal); }
+        finally { synthMs += Date.now() - start; }
+      },
+      play: async file => {
+        if (playbackStopped()) {
+          await fs.promises.rm(file, { force: true }).catch(() => {});
+          return;
+        }
+        const start = Date.now();
+        try { return await playAudio(file); }
+        finally { playMs += Date.now() - start; }
+      },
+      cleanup: file => fs.promises.rm(file, { force: true }),
+    });
+    metricsTurn?.stage('tts_synth', synthMs, { ttsChunks: chunks.length, spokenChars: String(text || '').length });
+    metricsTurn?.stage('tts_play', playMs);
+    metricsTurn?.stage('tts_total', Date.now() - ttsStart);
+  }
+  function beginStreamingTurn(signal) {
+    if (!STREAMING_TTS_ENABLED || !bridge.connection) return false;
+    bridge.streamingSpeechDelivered = false;
+    const sentencer = createSentencer({ minChars: 40, maxLatencyMs: 800 });
+    let streamingDropAnnounced = false;
+    const queue = createStreamingTTSQueue({
+      synth: async text => synthTTS(text, signal),
+      play: async file => playAudio(file, { deleteAfter: false }),
+      cleanup: async file => { try { await fs.promises.rm(file, { force: true }); } catch {} },
+      signal,
+      log,
+      onSynthError: () => {
+        if (streamingDropAnnounced || signal?.aborted) return;
+        streamingDropAnnounced = true;
+        const en = /^en/i.test(String(settings.voiceLanguage || ''));
+        const msg = en
+          ? 'Some sentences could not be spoken; check the text channel for the full answer.'
+          : '일부 문장 음성 합성에 실패했어. 전체 답변은 텍스트 채널을 확인해줘.';
+        void sendText(`⚠️ ${msg}`).catch(e => warn('streaming synth notice send failed', e?.message || e));
+      },
+    });
+    sentencer.on('sentence', text => {
+      if (signal?.aborted) return;
+      queue.enqueue(text);
+    });
+    bridge.activeSentencer = sentencer;
+    bridge.activeStreamingQueue = queue;
+    log('streaming turn begin');
+    return true;
+  }
+  async function endStreamingTurn() {
+    const sentencer = bridge.activeSentencer;
+    const queue = bridge.activeStreamingQueue;
+    bridge.activeSentencer = null;
+    bridge.activeStreamingQueue = null;
+    if (!sentencer || !queue) return;
+    try { sentencer.flush(); } catch (e) { warn('streaming sentencer flush failed', e?.stack || e); }
+    try { await queue.drain(); } catch (e) { warn('streaming queue drain failed', e?.stack || e); }
+    bridge.streamingSpeechDelivered = queue.size === 0;
+    log('streaming turn end');
+  }
+  function stopPlaybackForBargeIn(userId, reason = 'playback-barge-in') {
+    if (!bridge.speaking) return false;
+    log('stop playback for barge-in', 'byUser', userId, 'reason', reason, 'speaking', bridge.speaking, 'processing', bridge.processing, 'turn', bridge.activeTurnId);
+    bridge.speechPlaybackGeneration += 1;
+    try { bridge.player.stop(true); } catch (e) { warn('stop playback failed', e?.stack || e); }
+    bridge.speaking = false;
+    return true;
+  }
+  return {
+    synthTTS,
+    playAudio,
+    speakText,
+    beginStreamingTurn,
+    endStreamingTurn,
+    stopPlaybackForBargeIn,
+  };
+}