npm - verbalcoding - Versions diffs - 0.2.12 → 0.2.13 - Mend

verbalcoding 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

package/.env.example +74 -4
package/README.es.md +3 -1
package/README.fr.md +3 -1
package/README.ja.md +3 -1
package/README.ko.md +4 -2
package/README.md +4 -2
package/README.ru.md +3 -1
package/README.zh.md +3 -1
package/app-node/agent_adapters.test.mjs +14 -0
package/app-node/agent_routing.mjs +148 -0
package/app-node/agent_routing.test.mjs +138 -0
package/app-node/agent_turn.mjs +86 -0
package/app-node/agent_turn.test.mjs +109 -0
package/app-node/bridge_context.mjs +73 -0
package/app-node/bridge_context.test.mjs +54 -0
package/app-node/bridge_state.mjs +4 -0
package/app-node/bridge_wireup.test.mjs +462 -0
package/app-node/cli_install.test.mjs +31 -0
package/app-node/cross_agent_routing.test.mjs +78 -0
package/app-node/discord_command_router.mjs +204 -0
package/app-node/discord_command_router.test.mjs +311 -0
package/app-node/discord_voice_setup.mjs +251 -0
package/app-node/discord_voice_setup.test.mjs +86 -0
package/app-node/hermes_profiles.test.mjs +12 -1
package/app-node/install_config.mjs +110 -3
package/app-node/install_config.test.mjs +8 -0
package/app-node/instance_doctor.test.mjs +9 -0
package/app-node/instances.test.mjs +8 -1
package/app-node/main.mjs +488 -1368
package/app-node/mcp_tools.test.mjs +7 -0
package/app-node/notification_handler.mjs +89 -0
package/app-node/notification_handler.test.mjs +187 -0
package/app-node/plan_dispatcher.mjs +215 -0
package/app-node/plan_dispatcher.test.mjs +101 -0
package/app-node/plan_mode.mjs +36 -7
package/app-node/plan_mode.test.mjs +78 -0
package/app-node/progress_handler.mjs +220 -0
package/app-node/progress_handler.test.mjs +193 -0
package/app-node/progress_speech.mjs +54 -32
package/app-node/progress_speech.test.mjs +12 -3
package/app-node/project_sessions.mjs +5 -2
package/app-node/project_sessions.test.mjs +7 -0
package/app-node/research_mode.mjs +282 -0
package/app-node/research_mode.test.mjs +264 -0
package/app-node/restart_notice.mjs +3 -0
package/app-node/restart_notice.test.mjs +11 -0
package/app-node/session_ontology.mjs +271 -0
package/app-node/session_ontology.test.mjs +130 -0
package/app-node/smart_progress.mjs +1 -1
package/app-node/stream_sentencer.mjs +32 -2
package/app-node/stream_sentencer.test.mjs +65 -0
package/app-node/streaming_tts_queue.mjs +5 -1
package/app-node/streaming_tts_queue.test.mjs +7 -1
package/app-node/stt_whisper.mjs +24 -0
package/app-node/stt_whisper.test.mjs +32 -0
package/app-node/text_routing.mjs +4 -2
package/app-node/tts_backends.mjs +537 -3
package/app-node/tts_backends.test.mjs +454 -0
package/app-node/tts_player.mjs +164 -0
package/app-node/tts_player.test.mjs +202 -0
package/app-node/tts_runtime.mjs +134 -0
package/app-node/tts_runtime.test.mjs +89 -0
package/app-node/tts_settings.mjs +150 -3
package/app-node/tts_settings.test.mjs +204 -0
package/app-node/tts_voice_config.mjs +136 -2
package/app-node/tts_voice_config.test.mjs +94 -0
package/app-node/utterance_router.mjs +216 -0
package/app-node/utterance_router.test.mjs +236 -0
package/app-node/voice_autojoin.mjs +37 -0
package/app-node/voice_autojoin.test.mjs +59 -0
package/app-node/voice_io.mjs +272 -0
package/app-node/voice_io.test.mjs +102 -0
package/app-node/voice_turn_runner.mjs +449 -0
package/app-node/voice_turn_runner.test.mjs +289 -0
package/docs/CONFIGURATION.md +12 -2
package/docs/HARNESSES.md +58 -0
package/docs/HARNESS_AIDER.md +50 -0
package/docs/HARNESS_CLAUDE.md +56 -0
package/docs/HARNESS_CODEX.md +56 -0
package/docs/HARNESS_CURSOR.md +45 -0
package/docs/HARNESS_GEMINI.md +45 -0
package/docs/HARNESS_HERMES.md +57 -0
package/docs/HARNESS_OPENCLAW.md +44 -0
package/docs/HARNESS_OPENCODE.md +44 -0
package/docs/README.md +1 -0
package/docs/ROADMAP.md +20 -5
package/docs/TTS_BACKENDS.md +227 -0
package/docs/USAGE.md +22 -0
package/docs/i18n/AGENTS.es.md +34 -0
package/docs/i18n/AGENTS.fr.md +34 -0
package/docs/i18n/AGENTS.ja.md +34 -0
package/docs/i18n/AGENTS.ko.md +34 -0
package/docs/i18n/AGENTS.ru.md +34 -0
package/docs/i18n/AGENTS.zh.md +34 -0
package/docs/i18n/HARNESSES.es.md +58 -0
package/docs/i18n/HARNESSES.fr.md +58 -0
package/docs/i18n/HARNESSES.ja.md +58 -0
package/docs/i18n/HARNESSES.ko.md +58 -0
package/docs/i18n/HARNESSES.ru.md +58 -0
package/docs/i18n/HARNESSES.zh.md +58 -0
package/docs/i18n/HARNESS_AIDER.es.md +48 -0
package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
package/docs/i18n/HARNESS_CODEX.es.md +55 -0
package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
package/docs/i18n/HARNESS_HERMES.es.md +54 -0
package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
package/integrations/fireredtts2/mlx_llm.py +183 -0
package/integrations/fireredtts2/synth.py +156 -0
package/integrations/fireredtts2/synth_mlx.py +196 -0
package/integrations/mlxaudio/synth.py +74 -0
package/integrations/neuttsair/synth.py +104 -0
package/integrations/omnivoice/synth.py +110 -0
package/package.json +6 -1
package/scripts/cli.mjs +84 -0
package/scripts/doctor.mjs +104 -4
package/scripts/install.mjs +5 -1
package/scripts/install_fireredtts2.sh +109 -0
package/scripts/install_mlxaudio.sh +34 -0
package/scripts/install_mossttsnano.sh +46 -0
package/scripts/postinstall.mjs +34 -0

package/app-node/tts_settings.test.mjs CHANGED Viewed

@@ -121,6 +121,210 @@ test('buildTtsSettings normalizes Supertonic local backend settings', () => {
   assert.equal(settings.supertonic.interOpThreads, '1');
 });
+test('buildTtsSettings normalizes OmniVoice local backend settings', () => {
+  const root = '/project';
+  const settings = buildTtsSettings({
+    TTS_BACKEND: 'omnivoice',
+    OMNIVOICE_PYTHON: './.venv-omnivoice/bin/python',
+    OMNIVOICE_MODEL: 'k2-fsa/OmniVoice',
+    OMNIVOICE_DEVICE: 'mps',
+    OMNIVOICE_DTYPE: 'float16',
+    OMNIVOICE_REF_AUDIO: './voice-samples/me.wav',
+    OMNIVOICE_REF_TEXT: '테스트 기준 음성입니다.',
+    OMNIVOICE_LANGUAGE: 'ko',
+    OMNIVOICE_SPEAKER: 'warm korean male voice',
+    OMNIVOICE_TIMEOUT_MS: '180000',
+    OMNIVOICE_PROGRESS: '1',
+  }, root);
+  assert.equal(settings.backend, 'omnivoice');
+  assert.equal(settings.omnivoice.python, path.join(root, '.venv-omnivoice', 'bin', 'python'));
+  assert.equal(settings.omnivoice.model, 'k2-fsa/OmniVoice');
+  assert.equal(settings.omnivoice.device, 'mps');
+  assert.equal(settings.omnivoice.dtype, 'float16');
+  assert.equal(settings.omnivoice.refAudio, path.join(root, 'voice-samples', 'me.wav'));
+  assert.equal(settings.omnivoice.refText, '테스트 기준 음성입니다.');
+  assert.equal(settings.omnivoice.language, 'ko');
+  assert.equal(settings.omnivoice.speaker, 'warm korean male voice');
+  assert.equal(settings.omnivoice.timeoutMs, 180000);
+  assert.equal(settings.omnivoice.useForProgress, true);
+});
+test('buildTtsSettings normalizes Qwen3 TTS CLI settings and aliases qwen3', () => {
+  const root = '/project';
+  const settings = buildTtsSettings({
+    TTS_BACKEND: 'qwen3',
+    QWEN3TTS_COMMAND: 'audio',
+    QWEN3TTS_MODE: 'clone',
+    QWEN3TTS_MODEL: 'base-8bit',
+    QWEN3TTS_LANGUAGE: 'korean',
+    QWEN3TTS_SPEAKER: 'sohee',
+    QWEN3TTS_INSTRUCT: 'calm conversational Korean',
+    QWEN3TTS_REF_AUDIO: './voice-samples/me.wav',
+    QWEN3TTS_REF_TEXT: '테스트 기준 음성입니다.',
+    QWEN3TTS_STREAM: '0',
+    QWEN3TTS_TIMEOUT_MS: '90000',
+    QWEN3TTS_PROGRESS: '1',
+  }, root);
+  assert.equal(settings.backend, 'qwen3tts');
+  assert.equal(settings.qwen3tts.command, 'audio');
+  assert.equal(settings.qwen3tts.mode, 'clone');
+  assert.equal(settings.qwen3tts.model, 'base-8bit');
+  assert.equal(settings.qwen3tts.language, 'korean');
+  assert.equal(settings.qwen3tts.speaker, 'sohee');
+  assert.equal(settings.qwen3tts.instruct, 'calm conversational Korean');
+  assert.equal(settings.qwen3tts.refAudio, path.join(root, 'voice-samples', 'me.wav'));
+  assert.equal(settings.qwen3tts.refText, '테스트 기준 음성입니다.');
+  assert.equal(settings.qwen3tts.stream, false);
+  assert.equal(settings.qwen3tts.timeoutMs, 90000);
+  assert.equal(settings.qwen3tts.useForProgress, true);
+});
+test('buildTtsSettings normalizes FireRedTTS-2 settings', () => {
+  const root = '/project';
+  const settings = buildTtsSettings({
+    TTS_BACKEND: 'firered',
+    FIREREDTTS2_COMMAND: './bin/fireredtts2',
+    FIREREDTTS2_PRETRAINED_DIR: './models/FireRedTTS2',
+    FIREREDTTS2_DEVICE: 'mps',
+    FIREREDTTS2_GEN_TYPE: 'monologue',
+    FIREREDTTS2_SPEAKER: 'S1',
+    FIREREDTTS2_PROMPT_AUDIO: './voice-samples/me.wav',
+    FIREREDTTS2_PROMPT_TEXT: '테스트 기준 음성입니다.',
+    FIREREDTTS2_BF16: '1',
+    FIREREDTTS2_TIMEOUT_MS: '240000',
+    FIREREDTTS2_PROGRESS: '1',
+  }, root);
+  assert.equal(settings.backend, 'fireredtts2');
+  assert.equal(settings.fireredtts2.command, './bin/fireredtts2');
+  assert.equal(settings.fireredtts2.pretrainedDir, path.join(root, 'models', 'FireRedTTS2'));
+  assert.equal(settings.fireredtts2.device, 'mps');
+  assert.equal(settings.fireredtts2.genType, 'monologue');
+  assert.equal(settings.fireredtts2.speaker, 'S1');
+  assert.equal(settings.fireredtts2.promptAudio, path.join(root, 'voice-samples', 'me.wav'));
+  assert.equal(settings.fireredtts2.promptText, '테스트 기준 음성입니다.');
+  assert.equal(settings.fireredtts2.useBf16, true);
+  assert.equal(settings.fireredtts2.timeoutMs, 240000);
+  assert.equal(settings.fireredtts2.useForProgress, true);
+});
+test('buildTtsSettings normalizes MOSS-TTS-Nano settings', () => {
+  const root = '/project';
+  const settings = buildTtsSettings({
+    TTS_BACKEND: 'moss-tts-nano',
+    MOSSTTSNANO_COMMAND: 'python3',
+    MOSSTTSNANO_SCRIPT: './vendor/MOSS-TTS-Nano/infer.py',
+    MOSSTTSNANO_CHECKPOINT: './models/MOSS-TTS-Nano',
+    MOSSTTSNANO_AUDIO_TOKENIZER: './models/MOSS-Audio-Tokenizer-Nano',
+    MOSSTTSNANO_MODE: 'voice_clone',
+    MOSSTTSNANO_LANGUAGE: 'ko',
+    MOSSTTSNANO_DEVICE: 'cpu',
+    MOSSTTSNANO_DTYPE: 'float32',
+    MOSSTTSNANO_PROMPT_AUDIO: './voice-samples/me.wav',
+    MOSSTTSNANO_PROMPT_TEXT: '테스트 기준 음성입니다.',
+    MOSSTTSNANO_MAX_NEW_FRAMES: '256',
+    MOSSTTSNANO_SEED: '7',
+    MOSSTTSNANO_TIMEOUT_MS: '90000',
+    MOSSTTSNANO_PROGRESS: '1',
+  }, root);
+  assert.equal(settings.backend, 'mossttsnano');
+  assert.equal(settings.mossttsnano.command, 'python3');
+  assert.equal(settings.mossttsnano.script, path.join(root, 'vendor', 'MOSS-TTS-Nano', 'infer.py'));
+  assert.equal(settings.mossttsnano.checkpoint, './models/MOSS-TTS-Nano');
+  assert.equal(settings.mossttsnano.audioTokenizer, './models/MOSS-Audio-Tokenizer-Nano');
+  assert.equal(settings.mossttsnano.mode, 'voice_clone');
+  assert.equal(settings.mossttsnano.language, 'ko');
+  assert.equal(settings.mossttsnano.device, 'cpu');
+  assert.equal(settings.mossttsnano.dtype, 'float32');
+  assert.equal(settings.mossttsnano.promptAudio, path.join(root, 'voice-samples', 'me.wav'));
+  assert.equal(settings.mossttsnano.promptText, '테스트 기준 음성입니다.');
+  assert.equal(settings.mossttsnano.maxNewFrames, 256);
+  assert.equal(settings.mossttsnano.seed, '7');
+  assert.equal(settings.mossttsnano.timeoutMs, 90000);
+  assert.equal(settings.mossttsnano.useForProgress, true);
+});
+test('buildTtsSettings normalizes MOSS-TTS-Nano MLX hybrid settings', () => {
+  const root = '/project';
+  const settings = buildTtsSettings({
+    TTS_BACKEND: 'moss-mlx',
+    MOSSTTSNANO_COMMAND: 'python3',
+    MOSSTTSNANO_MLX_SCRIPT: './integrations/mossttsnano_mlx/synth.py',
+    MOSSTTSNANO_MLX_WORKER_SCRIPT: './integrations/mossttsnano_mlx/worker.py',
+    MOSSTTSNANO_MLX_WORKER: '1',
+    MOSSTTSNANO_MLX_WORKER_STARTUP_TIMEOUT_MS: '240000',
+    MOSSTTSNANO_SCRIPT: './vendor/MOSS-TTS-Nano/infer.py',
+    MOSSTTSNANO_CHECKPOINT: './models/MOSS-TTS-Nano',
+    MOSSTTSNANO_AUDIO_TOKENIZER: './models/MOSS-Audio-Tokenizer-Nano',
+    MOSSTTSNANO_MODE: 'voice_clone',
+    MOSSTTSNANO_LANGUAGE: 'ko',
+    MOSSTTSNANO_TORCH_DEVICE: 'cpu',
+    MOSSTTSNANO_TORCH_DTYPE: 'float32',
+    MOSSTTSNANO_PROMPT_AUDIO: './voice-samples/me.wav',
+    MOSSTTSNANO_PROMPT_TEXT: '테스트 기준 음성입니다.',
+    MOSSTTSNANO_MAX_NEW_FRAMES: '120',
+    MOSSTTSNANO_SEED: '7',
+    MOSSTTSNANO_MLX_TIMEOUT_MS: '180000',
+    MOSSTTSNANO_MLX_PROGRESS: '0',
+  }, root);
+  assert.equal(settings.backend, 'mossttsnano_mlx');
+  assert.equal(settings.mossttsnano_mlx.python, 'python3');
+  assert.equal(settings.mossttsnano_mlx.script, path.join(root, 'integrations', 'mossttsnano_mlx', 'synth.py'));
+  assert.equal(settings.mossttsnano_mlx.workerScript, path.join(root, 'integrations', 'mossttsnano_mlx', 'worker.py'));
+  assert.equal(settings.mossttsnano_mlx.workerEnabled, true);
+  assert.equal(settings.mossttsnano_mlx.workerStartupTimeoutMs, 240000);
+  assert.equal(settings.mossttsnano_mlx.torchInferScript, path.join(root, 'vendor', 'MOSS-TTS-Nano', 'infer.py'));
+  assert.equal(settings.mossttsnano_mlx.checkpoint, './models/MOSS-TTS-Nano');
+  assert.equal(settings.mossttsnano_mlx.audioTokenizer, './models/MOSS-Audio-Tokenizer-Nano');
+  assert.equal(settings.mossttsnano_mlx.mode, 'voice_clone');
+  assert.equal(settings.mossttsnano_mlx.language, 'ko');
+  assert.equal(settings.mossttsnano_mlx.torchDevice, 'cpu');
+  assert.equal(settings.mossttsnano_mlx.torchDtype, 'float32');
+  assert.equal(settings.mossttsnano_mlx.promptAudio, path.join(root, 'voice-samples', 'me.wav'));
+  assert.equal(settings.mossttsnano_mlx.promptText, '테스트 기준 음성입니다.');
+  assert.equal(settings.mossttsnano_mlx.maxNewFrames, 120);
+  assert.equal(settings.mossttsnano_mlx.seed, '7');
+  assert.equal(settings.mossttsnano_mlx.timeoutMs, 180000);
+  assert.equal(settings.mossttsnano_mlx.useForProgress, false);
+});
+test('buildTtsSettings normalizes NeuTTS Air settings and aliases neutts air', () => {
+  const root = '/project';
+  const settings = buildTtsSettings({
+    TTS_BACKEND: 'neutts-air',
+    NEUTTSAIR_PYTHON: './.venv-neuttsair/bin/python',
+    NEUTTSAIR_SCRIPT: './integrations/neuttsair/synth.py',
+    NEUTTSAIR_BACKBONE_REPO: 'neuphonic/neutts-air-q4-gguf',
+    NEUTTSAIR_BACKBONE_DEVICE: 'mps',
+    NEUTTSAIR_CODEC_REPO: 'neuphonic/neucodec',
+    NEUTTSAIR_CODEC_DEVICE: 'mps',
+    NEUTTSAIR_REF_AUDIO: './voice-samples/me.wav',
+    NEUTTSAIR_REF_TEXT: 'Reference voice text.',
+    NEUTTSAIR_LANGUAGE: 'en',
+    NEUTTSAIR_SAMPLE_RATE: '24000',
+    NEUTTSAIR_TIMEOUT_MS: '120000',
+    NEUTTSAIR_PROGRESS: '1',
+  }, root);
+  assert.equal(settings.backend, 'neuttsair');
+  assert.equal(settings.neuttsair.python, path.join(root, '.venv-neuttsair', 'bin', 'python'));
+  assert.equal(settings.neuttsair.script, path.join(root, 'integrations', 'neuttsair', 'synth.py'));
+  assert.equal(settings.neuttsair.backboneRepo, 'neuphonic/neutts-air-q4-gguf');
+  assert.equal(settings.neuttsair.backboneDevice, 'mps');
+  assert.equal(settings.neuttsair.codecRepo, 'neuphonic/neucodec');
+  assert.equal(settings.neuttsair.codecDevice, 'mps');
+  assert.equal(settings.neuttsair.refAudio, path.join(root, 'voice-samples', 'me.wav'));
+  assert.equal(settings.neuttsair.refText, 'Reference voice text.');
+  assert.equal(settings.neuttsair.language, 'en');
+  assert.equal(settings.neuttsair.sampleRate, 24000);
+  assert.equal(settings.neuttsair.timeoutMs, 120000);
+  assert.equal(settings.neuttsair.useForProgress, true);
+});
 test('buildTtsSettings falls back to edge for unsupported backend', () => {
   const settings = buildTtsSettings({ TTS_BACKEND: 'unknown' }, '/project');
   assert.equal(settings.backend, 'edge');

package/app-node/tts_voice_config.mjs CHANGED Viewed

@@ -34,6 +34,55 @@ export const DEFAULT_TTS_VOICE_CONFIG = {
         m1: { label: 'Supertonic M1', language: 'ko', voice: 'M1' },
       },
     },
+    omnivoice: {
+      currentVoiceType: 'cloned_reference',
+      voices: {
+        cloned_reference: { label: 'OmniVoice reference sample', language: 'ko', voice: 'voice-samples/user-reference.wav' },
+        designed_speaker: { label: 'OmniVoice designed speaker', language: 'ko', voice: 'warm korean male voice' },
+      },
+    },
+    qwen3tts: {
+      currentVoiceType: 'korean_preset',
+      voices: {
+        korean_preset: { label: 'Qwen3 TTS Korean preset', language: 'ko', voice: 'sohee' },
+        cloned_reference: { label: 'Qwen3 TTS reference sample', language: 'ko', voice: 'voice-samples/user-reference.wav' },
+        designed_speaker: { label: 'Qwen3 TTS designed speaker', language: 'ko', voice: 'calm conversational Korean voice' },
+      },
+    },
+    mlxaudio: {
+      currentVoiceType: 'qwen3_mlx',
+      voices: {
+        qwen3_mlx: { label: 'MLX Audio Qwen3 speaker', language: 'ko', voice: 'Chelsie' },
+      },
+    },
+    neuttsair: {
+      currentVoiceType: 'cloned_reference',
+      voices: {
+        cloned_reference: { label: 'NeuTTS Air reference sample', language: 'en', voice: 'voice-samples/user-reference.wav' },
+        default_sample: { label: 'NeuTTS Air bundled sample', language: 'en', voice: 'vendor/neutts-air/samples/jo.wav' },
+      },
+    },
+    fireredtts2: {
+      currentVoiceType: 'prompt_reference',
+      voices: {
+        prompt_reference: { label: 'FireRedTTS-2 prompt reference', language: 'ko', voice: 'voice-samples/user-reference.wav' },
+        random_speaker: { label: 'FireRedTTS-2 random speaker', language: 'ko', voice: '' },
+      },
+    },
+    mossttsnano: {
+      currentVoiceType: 'prompt_reference',
+      voices: {
+        prompt_reference: { label: 'MOSS-TTS-Nano prompt reference', language: 'ko', voice: 'voice-samples/user-reference.wav' },
+        continuation: { label: 'MOSS-TTS-Nano continuation/default', language: 'ko', voice: '' },
+      },
+    },
+    mossttsnano_mlx: {
+      currentVoiceType: 'prompt_reference',
+      voices: {
+        prompt_reference: { label: 'MOSS-TTS-Nano MLX hybrid prompt reference', language: 'ko', voice: 'voice-samples/user-reference.wav' },
+        continuation: { label: 'MOSS-TTS-Nano MLX hybrid continuation/default', language: 'ko', voice: '' },
+      },
+    },
   },
 };
@@ -47,7 +96,37 @@ export function defaultTtsVoiceConfig() {
 function normalizeBackend(value, config) {
   const key = String(value || '').trim().toLowerCase();
-  return config.backends?.[key] ? key : 'edge';
+  const aliases = new Map([
+    ['q3', 'qwen3tts'],
+    ['qwen3', 'qwen3tts'],
+    ['qwen3-tts', 'qwen3tts'],
+    ['qtts', 'qwen3tts'],
+    ['qwen3-mlx', 'mlxaudio'],
+    ['mlx', 'mlxaudio'],
+    ['mlx-audio', 'mlxaudio'],
+    ['neutts', 'neuttsair'],
+    ['neutts-air', 'neuttsair'],
+    ['neutts air', 'neuttsair'],
+    ['neuttsair', 'neuttsair'],
+    ['neu-tts-air', 'neuttsair'],
+    ['neu tts air', 'neuttsair'],
+    ['firered', 'fireredtts2'],
+    ['fireredtts', 'fireredtts2'],
+    ['firered-tts-2', 'fireredtts2'],
+    ['fireredtts-2', 'fireredtts2'],
+    ['moss', 'mossttsnano'],
+    ['moss-tts', 'mossttsnano'],
+    ['mossnano', 'mossttsnano'],
+    ['moss-tts-nano', 'mossttsnano'],
+    ['openmoss', 'mossttsnano'],
+    ['moss-mlx', 'mossttsnano_mlx'],
+    ['moss mlx', 'mossttsnano_mlx'],
+    ['mossttsnano-mlx', 'mossttsnano_mlx'],
+    ['mossttsnano_mlx', 'mossttsnano_mlx'],
+    ['openmoss-mlx', 'mossttsnano_mlx'],
+  ]);
+  const normalized = aliases.get(key) || key;
+  return config.backends?.[normalized] ? normalized : 'edge';
 }
 function normalizeVoiceType(backendConfig, requested) {
@@ -75,7 +154,10 @@ export function effectiveTtsVoiceSelection(config, env = {}) {
   const merged = config || defaultTtsVoiceConfig();
   const backend = normalizeBackend(env.TTS_BACKEND || merged.currentBackend, merged);
   const backendConfig = merged.backends[backend] || merged.backends.edge;
-  const voiceType = normalizeVoiceType(backendConfig, env.TTS_VOICE_TYPE || merged.currentVoiceType || backendConfig.currentVoiceType);
+  const requestedVoiceType = env.TTS_VOICE_TYPE || merged.currentVoiceType;
+  const voiceType = backendConfig.voices?.[requestedVoiceType]
+    ? requestedVoiceType
+    : normalizeVoiceType(backendConfig, backendConfig.currentVoiceType);
   const voice = backendConfig.voices[voiceType];
   return { backend, voiceType, voice, backendConfig };
 }
@@ -83,6 +165,41 @@ export function effectiveTtsVoiceSelection(config, env = {}) {
 export function applyTtsVoiceSelectionToEnv(env = {}, selection) {
   const next = { ...env, TTS_BACKEND: selection.backend, TTS_VOICE_TYPE: selection.voiceType };
   if (selection.backend === 'edge') next.TTS_VOICE = selection.voice.voice;
+  if (selection.backend === 'qwen3tts') {
+    if (selection.voiceType === 'cloned_reference') {
+      next.QWEN3TTS_MODE = 'clone';
+      next.QWEN3TTS_REF_AUDIO = selection.voice.voice;
+    } else if (selection.voiceType === 'designed_speaker') {
+      next.QWEN3TTS_MODE = 'design';
+      next.QWEN3TTS_INSTRUCT = selection.voice.voice;
+    } else {
+      next.QWEN3TTS_MODE = 'custom';
+      next.QWEN3TTS_SPEAKER = selection.voice.voice;
+    }
+  }
+  if (selection.backend === 'mlxaudio') {
+    if (selection.voice?.voice) next.MLXAUDIO_VOICE = selection.voice.voice;
+  }
+  if (selection.backend === 'neuttsair') {
+    if (selection.voice?.voice) next.NEUTTSAIR_REF_AUDIO = selection.voice.voice;
+  }
+  if (selection.backend === 'fireredtts2') {
+    if (selection.voice?.voice) next.FIREREDTTS2_PROMPT_AUDIO = selection.voice.voice;
+  }
+  if (selection.backend === 'mossttsnano') {
+    if (selection.voiceType === 'continuation') next.MOSSTTSNANO_MODE = 'continuation';
+    else {
+      next.MOSSTTSNANO_MODE = 'voice_clone';
+      if (selection.voice?.voice) next.MOSSTTSNANO_PROMPT_AUDIO = selection.voice.voice;
+    }
+  }
+  if (selection.backend === 'mossttsnano_mlx') {
+    if (selection.voiceType === 'continuation') next.MOSSTTSNANO_MODE = 'continuation';
+    else {
+      next.MOSSTTSNANO_MODE = 'voice_clone';
+      if (selection.voice?.voice) next.MOSSTTSNANO_PROMPT_AUDIO = selection.voice.voice;
+    }
+  }
   if (selection.voice?.language) next.VOICE_LANGUAGE = selection.voice.language;
   return next;
 }
@@ -113,6 +230,23 @@ export function voiceCommandFromTranscript(text) {
   const raw = String(text || '').trim();
   if (!raw) return null;
   const compact = raw.toLowerCase().replace(/\s+/g, '');
+  const looksLikeBackend = /\b(tts|voice|speech|audio)\b.*\bbackend\b|\bbackend\b.*\b(tts|voice|speech|audio)\b/i.test(raw)
+    || /(tts|음성|목소리).*(백엔드|백앤드|backend).*(바꿔|변경|설정|해줘|로)/iu.test(raw)
+    || /(백엔드|백앤드|backend).*(옴니보이스|오픈보이스|엣지|수퍼토닉|슈퍼토닉|스피치스위프트|큐원|큐웬|qwen|q3|qtts|firered|moss|openmoss|neutts|neu\s*tts|뉴티티에스|뉴티TS|speechswift|omnivoice|openvoice|edge|supertonic)/iu.test(raw)
+    || /tts를.*(옴니보이스|오픈보이스|엣지|수퍼토닉|슈퍼토닉|스피치스위프트|큐원|큐웬|qwen|q3|qtts|firered|moss|openmoss|neutts|neu\s*tts|뉴티티에스|뉴티TS|omnivoice|openvoice|edge|supertonic|speechswift).*바꿔/iu.test(raw);
+  if (looksLikeBackend) {
+    if (/(neutts\s*-?\s*air|neu\s*tts\s*-?\s*air|neuttsair|뉴\s*티\s*티\s*에스\s*에어|뉴티티에스\s*에어|뉴티TS\s*에어)/iu.test(raw)) return { backend: 'neuttsair' };
+    if (/(omnivoice|omni voice|옴니보이스|업니보이스|옴니|업니)/iu.test(raw)) return { backend: 'omnivoice' };
+    if (/(openvoice|open voice|오픈보이스|오픈 보이스)/iu.test(raw)) return { backend: 'openvoice' };
+    if (/(speechswift|speech swift|스피치스위프트|스피치 스위프트|cosyvoice|코지보이스)/iu.test(raw)) return { backend: 'speechswift' };
+    if (/(qwen3mlx|qwen mlx|qwen3 mlx|mlx-audio|mlx audio|엠엘엑스|mlx)/iu.test(raw)) return { backend: 'mlxaudio' };
+    if (/(neutts-air|neuttsair|neutts|neu tts air|뉴티티에스|뉴티츠|뉴티에스)/iu.test(raw)) return { backend: 'neuttsair' };
+    if (/(qwen3|qwen|q3|qtts|큐원|큐웬|큐엔|큐3|큐삼)/iu.test(raw)) return { backend: 'qwen3tts' };
+    if (/(fireredtts2|fireredtts|firered|fire red|파이어레드)/iu.test(raw)) return { backend: 'fireredtts2' };
+    if (/(moss-tts-nano|moss tts nano|mossnano|moss|openmoss|모스|오픈모스)/iu.test(raw)) return { backend: 'mossttsnano' };
+    if (/(supertonic|수퍼토닉|슈퍼토닉)/iu.test(raw)) return { backend: 'supertonic' };
+    if (/(edge|엣지)/iu.test(raw)) return { backend: 'edge' };
+  }
   const looksLikeVoice = /\b(change|switch|set)\b.*\b(voice|speaker)\b/i.test(raw)
     || /\b(voice|speaker)\b.*\b(to|as)\b/i.test(raw)
     || /(목소리|음성).*(바꿔|변경|설정|해줘)|목소리.*로|음성.*로/u.test(compact);

package/app-node/tts_voice_config.test.mjs CHANGED Viewed

@@ -15,6 +15,11 @@ import {
   writeTtsVoiceConfig,
 } from './tts_voice_config.mjs';
+const __tempRoots = [];
+test.after(() => {
+  for (const root of __tempRoots) try { fs.rmSync(root, { recursive: true, force: true }); } catch {}
+});
 test('effectiveTtsVoiceSelection reads backend and voice type from config', () => {
   const config = defaultTtsVoiceConfig();
   config.currentBackend = 'edge';
@@ -27,6 +32,26 @@ test('effectiveTtsVoiceSelection reads backend and voice type from config', () =
   assert.equal(selected.voice.voice, 'ko-KR-SunHiNeural');
 });
+test('effectiveTtsVoiceSelection falls back to backend voice when env voice type belongs to another backend', () => {
+  const config = defaultTtsVoiceConfig();
+  config.currentBackend = 'edge';
+  config.currentVoiceType = 'korean_male';
+  config.backends.edge.currentVoiceType = 'korean_male';
+  const selected = effectiveTtsVoiceSelection(config, { TTS_BACKEND: 'edge', TTS_VOICE_TYPE: 'cloned_reference' });
+  assert.equal(selected.backend, 'edge');
+  assert.equal(selected.voiceType, 'korean_male');
+  assert.equal(selected.voice.voice, 'ko-KR-InJoonNeural');
+});
+test('effectiveTtsVoiceSelection accepts Qwen3 backend aliases from env', () => {
+  const selected = effectiveTtsVoiceSelection(defaultTtsVoiceConfig(), { TTS_BACKEND: 'qwen3' });
+  assert.equal(selected.backend, 'qwen3tts');
+  assert.equal(selected.voiceType, 'korean_preset');
+});
 test('applyTtsVoiceSelectionToEnv updates backend voice and voice language together', () => {
   const selected = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { voiceType: 'korean_male' }), {});
@@ -52,8 +77,77 @@ test('voiceCommandFromTranscript detects voice type changes', () => {
   assert.equal(voiceCommandFromTranscript('change language to Korean'), null);
 });
+test('voiceCommandFromTranscript detects TTS backend changes', () => {
+  assert.deepEqual(voiceCommandFromTranscript('change TTS backend to OmniVoice'), { backend: 'omnivoice' });
+  assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 옴니보이스로 바꿔'), { backend: 'omnivoice' });
+  assert.deepEqual(voiceCommandFromTranscript('TTS를 Edge로 바꿔'), { backend: 'edge' });
+  assert.deepEqual(voiceCommandFromTranscript('TTS를 qwen3로 바꿔'), { backend: 'qwen3tts' });
+  assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 큐웬으로 바꿔'), { backend: 'qwen3tts' });
+  assert.deepEqual(voiceCommandFromTranscript('TTS backend to FireRedTTS-2'), { backend: 'fireredtts2' });
+  assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 모스로 바꿔'), { backend: 'mossttsnano' });
+  assert.deepEqual(voiceCommandFromTranscript('TTS backend to NeuTTS Air'), { backend: 'neuttsair' });
+  assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 뉴티티에스 에어로 바꿔'), { backend: 'neuttsair' });
+});
+test('applyTtsVoiceSelectionToEnv maps Qwen3 voice types to CLI mode env', () => {
+  const preset = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'qwen3tts', voiceType: 'korean_preset' }), {});
+  assert.deepEqual(applyTtsVoiceSelectionToEnv({}, preset), {
+    TTS_BACKEND: 'qwen3tts',
+    TTS_VOICE_TYPE: 'korean_preset',
+    QWEN3TTS_MODE: 'custom',
+    QWEN3TTS_SPEAKER: 'sohee',
+    VOICE_LANGUAGE: 'ko',
+  });
+  const clone = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'qwen3tts', voiceType: 'cloned_reference' }), {});
+  assert.deepEqual(applyTtsVoiceSelectionToEnv({}, clone), {
+    TTS_BACKEND: 'qwen3tts',
+    TTS_VOICE_TYPE: 'cloned_reference',
+    QWEN3TTS_MODE: 'clone',
+    QWEN3TTS_REF_AUDIO: 'voice-samples/user-reference.wav',
+    VOICE_LANGUAGE: 'ko',
+  });
+});
+test('applyTtsVoiceSelectionToEnv maps FireRedTTS-2 and MOSS prompt references', () => {
+  const fire = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'fireredtts2', voiceType: 'prompt_reference' }), {});
+  assert.deepEqual(applyTtsVoiceSelectionToEnv({}, fire), {
+    TTS_BACKEND: 'fireredtts2',
+    TTS_VOICE_TYPE: 'prompt_reference',
+    FIREREDTTS2_PROMPT_AUDIO: 'voice-samples/user-reference.wav',
+    VOICE_LANGUAGE: 'ko',
+  });
+  const moss = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'mossttsnano', voiceType: 'prompt_reference' }), {});
+  assert.deepEqual(applyTtsVoiceSelectionToEnv({}, moss), {
+    TTS_BACKEND: 'mossttsnano',
+    TTS_VOICE_TYPE: 'prompt_reference',
+    MOSSTTSNANO_MODE: 'voice_clone',
+    MOSSTTSNANO_PROMPT_AUDIO: 'voice-samples/user-reference.wav',
+    VOICE_LANGUAGE: 'ko',
+  });
+  const neutts = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'neuttsair', voiceType: 'cloned_reference' }), {});
+  assert.deepEqual(applyTtsVoiceSelectionToEnv({}, neutts), {
+    TTS_BACKEND: 'neuttsair',
+    TTS_VOICE_TYPE: 'cloned_reference',
+    NEUTTSAIR_REF_AUDIO: 'voice-samples/user-reference.wav',
+    VOICE_LANGUAGE: 'en',
+  });
+});
+test('updateTtsVoiceConfig can switch to OmniVoice backend default voice', () => {
+  const config = updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'omnivoice' });
+  const selected = effectiveTtsVoiceSelection(config, {});
+  assert.equal(selected.backend, 'omnivoice');
+  assert.equal(selected.voiceType, 'cloned_reference');
+  assert.equal(selected.voice.voice, 'voice-samples/user-reference.wav');
+});
 test('read and write voice config round trips current selection', () => {
   const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'voice-config-test-'));
+  __tempRoots.push(dir);
   const file = path.join(dir, 'tts-voices.json');
   const config = updateTtsVoiceConfig(defaultTtsVoiceConfig(), { voiceType: 'korean_female' });