verbalcoding 0.2.11 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +98 -2
- package/README.es.md +134 -0
- package/README.fr.md +134 -0
- package/README.ja.md +134 -0
- package/README.ko.md +134 -0
- package/README.md +118 -74
- package/README.ru.md +134 -0
- package/README.zh.md +133 -0
- package/app-node/agent_adapters.mjs +37 -5
- package/app-node/agent_adapters.test.mjs +27 -1
- package/app-node/agent_detect.mjs +73 -0
- package/app-node/agent_detect.test.mjs +77 -0
- package/app-node/agent_routing.mjs +148 -0
- package/app-node/agent_routing.test.mjs +138 -0
- package/app-node/agent_turn.mjs +86 -0
- package/app-node/agent_turn.test.mjs +109 -0
- package/app-node/bridge_context.mjs +73 -0
- package/app-node/bridge_context.test.mjs +54 -0
- package/app-node/bridge_state.mjs +4 -0
- package/app-node/bridge_wireup.test.mjs +462 -0
- package/app-node/cli_install.test.mjs +31 -0
- package/app-node/cross_agent_routing.test.mjs +78 -0
- package/app-node/discord_command_router.mjs +204 -0
- package/app-node/discord_command_router.test.mjs +311 -0
- package/app-node/discord_voice_setup.mjs +251 -0
- package/app-node/discord_voice_setup.test.mjs +86 -0
- package/app-node/hermes_profiles.test.mjs +12 -1
- package/app-node/install_config.mjs +113 -3
- package/app-node/install_config.test.mjs +8 -0
- package/app-node/instance_doctor.test.mjs +9 -0
- package/app-node/instances.test.mjs +8 -1
- package/app-node/main.mjs +513 -1058
- package/app-node/mcp_tools.test.mjs +7 -0
- package/app-node/notification_handler.mjs +89 -0
- package/app-node/notification_handler.test.mjs +187 -0
- package/app-node/notify.mjs +73 -0
- package/app-node/notify.test.mjs +68 -0
- package/app-node/plan_dispatcher.mjs +215 -0
- package/app-node/plan_dispatcher.test.mjs +101 -0
- package/app-node/plan_mode.mjs +203 -0
- package/app-node/plan_mode.test.mjs +231 -0
- package/app-node/progress_handler.mjs +220 -0
- package/app-node/progress_handler.test.mjs +193 -0
- package/app-node/progress_speech.mjs +54 -32
- package/app-node/progress_speech.test.mjs +12 -3
- package/app-node/project_sessions.mjs +5 -2
- package/app-node/project_sessions.test.mjs +7 -0
- package/app-node/research_mode.mjs +282 -0
- package/app-node/research_mode.test.mjs +264 -0
- package/app-node/restart_notice.mjs +3 -0
- package/app-node/restart_notice.test.mjs +11 -0
- package/app-node/session_ontology.mjs +271 -0
- package/app-node/session_ontology.test.mjs +130 -0
- package/app-node/smart_progress.mjs +94 -0
- package/app-node/smart_progress.test.mjs +66 -0
- package/app-node/stream_sentencer.mjs +91 -0
- package/app-node/stream_sentencer.test.mjs +129 -0
- package/app-node/streaming_tts_queue.mjs +52 -0
- package/app-node/streaming_tts_queue.test.mjs +64 -0
- package/app-node/stt_whisper.mjs +24 -0
- package/app-node/stt_whisper.test.mjs +32 -0
- package/app-node/text_routing.mjs +22 -0
- package/app-node/text_routing.test.mjs +23 -1
- package/app-node/tts_backends.mjs +537 -3
- package/app-node/tts_backends.test.mjs +454 -0
- package/app-node/tts_player.mjs +164 -0
- package/app-node/tts_player.test.mjs +202 -0
- package/app-node/tts_runtime.mjs +134 -0
- package/app-node/tts_runtime.test.mjs +89 -0
- package/app-node/tts_settings.mjs +150 -3
- package/app-node/tts_settings.test.mjs +204 -0
- package/app-node/tts_voice_config.mjs +136 -2
- package/app-node/tts_voice_config.test.mjs +94 -0
- package/app-node/utterance_router.mjs +216 -0
- package/app-node/utterance_router.test.mjs +236 -0
- package/app-node/voice_autojoin.mjs +37 -0
- package/app-node/voice_autojoin.test.mjs +59 -0
- package/app-node/voice_io.mjs +272 -0
- package/app-node/voice_io.test.mjs +102 -0
- package/app-node/voice_turn_runner.mjs +449 -0
- package/app-node/voice_turn_runner.test.mjs +289 -0
- package/docs/CONFIGURATION.md +79 -96
- package/docs/FRESH_INSTALL.md +105 -63
- package/docs/HARNESSES.md +58 -0
- package/docs/HARNESS_AIDER.md +50 -0
- package/docs/HARNESS_CLAUDE.md +56 -0
- package/docs/HARNESS_CODEX.md +56 -0
- package/docs/HARNESS_CURSOR.md +45 -0
- package/docs/HARNESS_GEMINI.md +45 -0
- package/docs/HARNESS_HERMES.md +57 -0
- package/docs/HARNESS_OPENCLAW.md +44 -0
- package/docs/HARNESS_OPENCODE.md +44 -0
- package/docs/HERMES_VOICE.md +65 -0
- package/docs/MULTI_INSTANCE.md +16 -0
- package/docs/README.md +50 -0
- package/docs/RELEASE.md +42 -19
- package/docs/ROADMAP.md +53 -0
- package/docs/TROUBLESHOOTING.md +126 -0
- package/docs/TTS_BACKENDS.md +227 -0
- package/docs/USAGE.md +94 -40
- package/docs/assets/figures/verbalcoding-flow.svg +1 -1
- package/docs/i18n/AGENTS.es.md +34 -0
- package/docs/i18n/AGENTS.fr.md +34 -0
- package/docs/i18n/AGENTS.ja.md +34 -0
- package/docs/i18n/AGENTS.ko.md +34 -0
- package/docs/i18n/AGENTS.ru.md +34 -0
- package/docs/i18n/AGENTS.zh.md +34 -0
- package/docs/i18n/CONFIGURATION.es.md +25 -0
- package/docs/i18n/CONFIGURATION.fr.md +25 -0
- package/docs/i18n/CONFIGURATION.ja.md +25 -0
- package/docs/i18n/CONFIGURATION.ko.md +25 -0
- package/docs/i18n/CONFIGURATION.ru.md +25 -0
- package/docs/i18n/CONFIGURATION.zh.md +25 -0
- package/docs/i18n/FRESH_INSTALL.es.md +27 -2
- package/docs/i18n/FRESH_INSTALL.fr.md +27 -2
- package/docs/i18n/FRESH_INSTALL.ja.md +27 -2
- package/docs/i18n/FRESH_INSTALL.ko.md +27 -2
- package/docs/i18n/FRESH_INSTALL.ru.md +27 -2
- package/docs/i18n/FRESH_INSTALL.zh.md +27 -2
- package/docs/i18n/HARNESSES.es.md +58 -0
- package/docs/i18n/HARNESSES.fr.md +58 -0
- package/docs/i18n/HARNESSES.ja.md +58 -0
- package/docs/i18n/HARNESSES.ko.md +58 -0
- package/docs/i18n/HARNESSES.ru.md +58 -0
- package/docs/i18n/HARNESSES.zh.md +58 -0
- package/docs/i18n/HARNESS_AIDER.es.md +48 -0
- package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
- package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
- package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
- package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
- package/docs/i18n/HARNESS_CODEX.es.md +55 -0
- package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
- package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
- package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
- package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
- package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
- package/docs/i18n/HARNESS_HERMES.es.md +54 -0
- package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
- package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
- package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
- package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
- package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
- package/docs/i18n/HERMES_VOICE.es.md +46 -0
- package/docs/i18n/HERMES_VOICE.fr.md +46 -0
- package/docs/i18n/HERMES_VOICE.ja.md +46 -0
- package/docs/i18n/HERMES_VOICE.ko.md +65 -0
- package/docs/i18n/HERMES_VOICE.ru.md +46 -0
- package/docs/i18n/HERMES_VOICE.zh.md +46 -0
- package/docs/i18n/MULTI_INSTANCE.es.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.fr.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.ja.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.ko.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.ru.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.zh.md +25 -0
- package/docs/i18n/README.es.md +20 -134
- package/docs/i18n/README.fr.md +20 -134
- package/docs/i18n/README.ja.md +20 -134
- package/docs/i18n/README.ko.md +20 -133
- package/docs/i18n/README.ru.md +20 -134
- package/docs/i18n/README.zh.md +20 -133
- package/docs/i18n/RELEASE.es.md +26 -1
- package/docs/i18n/RELEASE.fr.md +26 -1
- package/docs/i18n/RELEASE.ja.md +26 -1
- package/docs/i18n/RELEASE.ko.md +26 -1
- package/docs/i18n/RELEASE.ru.md +26 -1
- package/docs/i18n/RELEASE.zh.md +26 -1
- package/docs/i18n/TROUBLESHOOTING.es.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.fr.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.ja.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.ko.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.ru.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.zh.md +39 -0
- package/docs/i18n/USAGE.es.md +25 -0
- package/docs/i18n/USAGE.fr.md +25 -0
- package/docs/i18n/USAGE.ja.md +25 -0
- package/docs/i18n/USAGE.ko.md +25 -0
- package/docs/i18n/USAGE.ru.md +25 -0
- package/docs/i18n/USAGE.zh.md +25 -0
- package/docs/superpowers/plans/2026-05-13-phase1-streaming-pipeline.md +122 -0
- package/docs/superpowers/plans/2026-05-13-phase10-push-notifications.md +152 -0
- package/docs/superpowers/plans/2026-05-13-phase2-agent-adapters.md +242 -0
- package/docs/superpowers/plans/2026-05-13-phase6-smart-progress.md +172 -0
- package/docs/superpowers/plans/2026-05-13-phase7-voice-plan-mode.md +108 -0
- package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
- package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
- package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
- package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
- package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
- package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
- package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
- package/integrations/fireredtts2/mlx_llm.py +183 -0
- package/integrations/fireredtts2/synth.py +156 -0
- package/integrations/fireredtts2/synth_mlx.py +196 -0
- package/integrations/mlxaudio/synth.py +74 -0
- package/integrations/neuttsair/synth.py +104 -0
- package/integrations/omnivoice/synth.py +110 -0
- package/package.json +7 -1
- package/scripts/cli.mjs +88 -3
- package/scripts/doctor.mjs +115 -4
- package/scripts/install.mjs +20 -2
- package/scripts/install_fireredtts2.sh +109 -0
- package/scripts/install_mlxaudio.sh +34 -0
- package/scripts/install_mossttsnano.sh +46 -0
- package/scripts/postinstall.mjs +34 -0
|
@@ -121,6 +121,210 @@ test('buildTtsSettings normalizes Supertonic local backend settings', () => {
|
|
|
121
121
|
assert.equal(settings.supertonic.interOpThreads, '1');
|
|
122
122
|
});
|
|
123
123
|
|
|
124
|
+
test('buildTtsSettings normalizes OmniVoice local backend settings', () => {
|
|
125
|
+
const root = '/project';
|
|
126
|
+
const settings = buildTtsSettings({
|
|
127
|
+
TTS_BACKEND: 'omnivoice',
|
|
128
|
+
OMNIVOICE_PYTHON: './.venv-omnivoice/bin/python',
|
|
129
|
+
OMNIVOICE_MODEL: 'k2-fsa/OmniVoice',
|
|
130
|
+
OMNIVOICE_DEVICE: 'mps',
|
|
131
|
+
OMNIVOICE_DTYPE: 'float16',
|
|
132
|
+
OMNIVOICE_REF_AUDIO: './voice-samples/me.wav',
|
|
133
|
+
OMNIVOICE_REF_TEXT: '테스트 기준 음성입니다.',
|
|
134
|
+
OMNIVOICE_LANGUAGE: 'ko',
|
|
135
|
+
OMNIVOICE_SPEAKER: 'warm korean male voice',
|
|
136
|
+
OMNIVOICE_TIMEOUT_MS: '180000',
|
|
137
|
+
OMNIVOICE_PROGRESS: '1',
|
|
138
|
+
}, root);
|
|
139
|
+
|
|
140
|
+
assert.equal(settings.backend, 'omnivoice');
|
|
141
|
+
assert.equal(settings.omnivoice.python, path.join(root, '.venv-omnivoice', 'bin', 'python'));
|
|
142
|
+
assert.equal(settings.omnivoice.model, 'k2-fsa/OmniVoice');
|
|
143
|
+
assert.equal(settings.omnivoice.device, 'mps');
|
|
144
|
+
assert.equal(settings.omnivoice.dtype, 'float16');
|
|
145
|
+
assert.equal(settings.omnivoice.refAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
146
|
+
assert.equal(settings.omnivoice.refText, '테스트 기준 음성입니다.');
|
|
147
|
+
assert.equal(settings.omnivoice.language, 'ko');
|
|
148
|
+
assert.equal(settings.omnivoice.speaker, 'warm korean male voice');
|
|
149
|
+
assert.equal(settings.omnivoice.timeoutMs, 180000);
|
|
150
|
+
assert.equal(settings.omnivoice.useForProgress, true);
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
test('buildTtsSettings normalizes Qwen3 TTS CLI settings and aliases qwen3', () => {
|
|
154
|
+
const root = '/project';
|
|
155
|
+
const settings = buildTtsSettings({
|
|
156
|
+
TTS_BACKEND: 'qwen3',
|
|
157
|
+
QWEN3TTS_COMMAND: 'audio',
|
|
158
|
+
QWEN3TTS_MODE: 'clone',
|
|
159
|
+
QWEN3TTS_MODEL: 'base-8bit',
|
|
160
|
+
QWEN3TTS_LANGUAGE: 'korean',
|
|
161
|
+
QWEN3TTS_SPEAKER: 'sohee',
|
|
162
|
+
QWEN3TTS_INSTRUCT: 'calm conversational Korean',
|
|
163
|
+
QWEN3TTS_REF_AUDIO: './voice-samples/me.wav',
|
|
164
|
+
QWEN3TTS_REF_TEXT: '테스트 기준 음성입니다.',
|
|
165
|
+
QWEN3TTS_STREAM: '0',
|
|
166
|
+
QWEN3TTS_TIMEOUT_MS: '90000',
|
|
167
|
+
QWEN3TTS_PROGRESS: '1',
|
|
168
|
+
}, root);
|
|
169
|
+
|
|
170
|
+
assert.equal(settings.backend, 'qwen3tts');
|
|
171
|
+
assert.equal(settings.qwen3tts.command, 'audio');
|
|
172
|
+
assert.equal(settings.qwen3tts.mode, 'clone');
|
|
173
|
+
assert.equal(settings.qwen3tts.model, 'base-8bit');
|
|
174
|
+
assert.equal(settings.qwen3tts.language, 'korean');
|
|
175
|
+
assert.equal(settings.qwen3tts.speaker, 'sohee');
|
|
176
|
+
assert.equal(settings.qwen3tts.instruct, 'calm conversational Korean');
|
|
177
|
+
assert.equal(settings.qwen3tts.refAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
178
|
+
assert.equal(settings.qwen3tts.refText, '테스트 기준 음성입니다.');
|
|
179
|
+
assert.equal(settings.qwen3tts.stream, false);
|
|
180
|
+
assert.equal(settings.qwen3tts.timeoutMs, 90000);
|
|
181
|
+
assert.equal(settings.qwen3tts.useForProgress, true);
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test('buildTtsSettings normalizes FireRedTTS-2 settings', () => {
|
|
185
|
+
const root = '/project';
|
|
186
|
+
const settings = buildTtsSettings({
|
|
187
|
+
TTS_BACKEND: 'firered',
|
|
188
|
+
FIREREDTTS2_COMMAND: './bin/fireredtts2',
|
|
189
|
+
FIREREDTTS2_PRETRAINED_DIR: './models/FireRedTTS2',
|
|
190
|
+
FIREREDTTS2_DEVICE: 'mps',
|
|
191
|
+
FIREREDTTS2_GEN_TYPE: 'monologue',
|
|
192
|
+
FIREREDTTS2_SPEAKER: 'S1',
|
|
193
|
+
FIREREDTTS2_PROMPT_AUDIO: './voice-samples/me.wav',
|
|
194
|
+
FIREREDTTS2_PROMPT_TEXT: '테스트 기준 음성입니다.',
|
|
195
|
+
FIREREDTTS2_BF16: '1',
|
|
196
|
+
FIREREDTTS2_TIMEOUT_MS: '240000',
|
|
197
|
+
FIREREDTTS2_PROGRESS: '1',
|
|
198
|
+
}, root);
|
|
199
|
+
|
|
200
|
+
assert.equal(settings.backend, 'fireredtts2');
|
|
201
|
+
assert.equal(settings.fireredtts2.command, './bin/fireredtts2');
|
|
202
|
+
assert.equal(settings.fireredtts2.pretrainedDir, path.join(root, 'models', 'FireRedTTS2'));
|
|
203
|
+
assert.equal(settings.fireredtts2.device, 'mps');
|
|
204
|
+
assert.equal(settings.fireredtts2.genType, 'monologue');
|
|
205
|
+
assert.equal(settings.fireredtts2.speaker, 'S1');
|
|
206
|
+
assert.equal(settings.fireredtts2.promptAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
207
|
+
assert.equal(settings.fireredtts2.promptText, '테스트 기준 음성입니다.');
|
|
208
|
+
assert.equal(settings.fireredtts2.useBf16, true);
|
|
209
|
+
assert.equal(settings.fireredtts2.timeoutMs, 240000);
|
|
210
|
+
assert.equal(settings.fireredtts2.useForProgress, true);
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
test('buildTtsSettings normalizes MOSS-TTS-Nano settings', () => {
|
|
214
|
+
const root = '/project';
|
|
215
|
+
const settings = buildTtsSettings({
|
|
216
|
+
TTS_BACKEND: 'moss-tts-nano',
|
|
217
|
+
MOSSTTSNANO_COMMAND: 'python3',
|
|
218
|
+
MOSSTTSNANO_SCRIPT: './vendor/MOSS-TTS-Nano/infer.py',
|
|
219
|
+
MOSSTTSNANO_CHECKPOINT: './models/MOSS-TTS-Nano',
|
|
220
|
+
MOSSTTSNANO_AUDIO_TOKENIZER: './models/MOSS-Audio-Tokenizer-Nano',
|
|
221
|
+
MOSSTTSNANO_MODE: 'voice_clone',
|
|
222
|
+
MOSSTTSNANO_LANGUAGE: 'ko',
|
|
223
|
+
MOSSTTSNANO_DEVICE: 'cpu',
|
|
224
|
+
MOSSTTSNANO_DTYPE: 'float32',
|
|
225
|
+
MOSSTTSNANO_PROMPT_AUDIO: './voice-samples/me.wav',
|
|
226
|
+
MOSSTTSNANO_PROMPT_TEXT: '테스트 기준 음성입니다.',
|
|
227
|
+
MOSSTTSNANO_MAX_NEW_FRAMES: '256',
|
|
228
|
+
MOSSTTSNANO_SEED: '7',
|
|
229
|
+
MOSSTTSNANO_TIMEOUT_MS: '90000',
|
|
230
|
+
MOSSTTSNANO_PROGRESS: '1',
|
|
231
|
+
}, root);
|
|
232
|
+
|
|
233
|
+
assert.equal(settings.backend, 'mossttsnano');
|
|
234
|
+
assert.equal(settings.mossttsnano.command, 'python3');
|
|
235
|
+
assert.equal(settings.mossttsnano.script, path.join(root, 'vendor', 'MOSS-TTS-Nano', 'infer.py'));
|
|
236
|
+
assert.equal(settings.mossttsnano.checkpoint, './models/MOSS-TTS-Nano');
|
|
237
|
+
assert.equal(settings.mossttsnano.audioTokenizer, './models/MOSS-Audio-Tokenizer-Nano');
|
|
238
|
+
assert.equal(settings.mossttsnano.mode, 'voice_clone');
|
|
239
|
+
assert.equal(settings.mossttsnano.language, 'ko');
|
|
240
|
+
assert.equal(settings.mossttsnano.device, 'cpu');
|
|
241
|
+
assert.equal(settings.mossttsnano.dtype, 'float32');
|
|
242
|
+
assert.equal(settings.mossttsnano.promptAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
243
|
+
assert.equal(settings.mossttsnano.promptText, '테스트 기준 음성입니다.');
|
|
244
|
+
assert.equal(settings.mossttsnano.maxNewFrames, 256);
|
|
245
|
+
assert.equal(settings.mossttsnano.seed, '7');
|
|
246
|
+
assert.equal(settings.mossttsnano.timeoutMs, 90000);
|
|
247
|
+
assert.equal(settings.mossttsnano.useForProgress, true);
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
test('buildTtsSettings normalizes MOSS-TTS-Nano MLX hybrid settings', () => {
|
|
251
|
+
const root = '/project';
|
|
252
|
+
const settings = buildTtsSettings({
|
|
253
|
+
TTS_BACKEND: 'moss-mlx',
|
|
254
|
+
MOSSTTSNANO_COMMAND: 'python3',
|
|
255
|
+
MOSSTTSNANO_MLX_SCRIPT: './integrations/mossttsnano_mlx/synth.py',
|
|
256
|
+
MOSSTTSNANO_MLX_WORKER_SCRIPT: './integrations/mossttsnano_mlx/worker.py',
|
|
257
|
+
MOSSTTSNANO_MLX_WORKER: '1',
|
|
258
|
+
MOSSTTSNANO_MLX_WORKER_STARTUP_TIMEOUT_MS: '240000',
|
|
259
|
+
MOSSTTSNANO_SCRIPT: './vendor/MOSS-TTS-Nano/infer.py',
|
|
260
|
+
MOSSTTSNANO_CHECKPOINT: './models/MOSS-TTS-Nano',
|
|
261
|
+
MOSSTTSNANO_AUDIO_TOKENIZER: './models/MOSS-Audio-Tokenizer-Nano',
|
|
262
|
+
MOSSTTSNANO_MODE: 'voice_clone',
|
|
263
|
+
MOSSTTSNANO_LANGUAGE: 'ko',
|
|
264
|
+
MOSSTTSNANO_TORCH_DEVICE: 'cpu',
|
|
265
|
+
MOSSTTSNANO_TORCH_DTYPE: 'float32',
|
|
266
|
+
MOSSTTSNANO_PROMPT_AUDIO: './voice-samples/me.wav',
|
|
267
|
+
MOSSTTSNANO_PROMPT_TEXT: '테스트 기준 음성입니다.',
|
|
268
|
+
MOSSTTSNANO_MAX_NEW_FRAMES: '120',
|
|
269
|
+
MOSSTTSNANO_SEED: '7',
|
|
270
|
+
MOSSTTSNANO_MLX_TIMEOUT_MS: '180000',
|
|
271
|
+
MOSSTTSNANO_MLX_PROGRESS: '0',
|
|
272
|
+
}, root);
|
|
273
|
+
|
|
274
|
+
assert.equal(settings.backend, 'mossttsnano_mlx');
|
|
275
|
+
assert.equal(settings.mossttsnano_mlx.python, 'python3');
|
|
276
|
+
assert.equal(settings.mossttsnano_mlx.script, path.join(root, 'integrations', 'mossttsnano_mlx', 'synth.py'));
|
|
277
|
+
assert.equal(settings.mossttsnano_mlx.workerScript, path.join(root, 'integrations', 'mossttsnano_mlx', 'worker.py'));
|
|
278
|
+
assert.equal(settings.mossttsnano_mlx.workerEnabled, true);
|
|
279
|
+
assert.equal(settings.mossttsnano_mlx.workerStartupTimeoutMs, 240000);
|
|
280
|
+
assert.equal(settings.mossttsnano_mlx.torchInferScript, path.join(root, 'vendor', 'MOSS-TTS-Nano', 'infer.py'));
|
|
281
|
+
assert.equal(settings.mossttsnano_mlx.checkpoint, './models/MOSS-TTS-Nano');
|
|
282
|
+
assert.equal(settings.mossttsnano_mlx.audioTokenizer, './models/MOSS-Audio-Tokenizer-Nano');
|
|
283
|
+
assert.equal(settings.mossttsnano_mlx.mode, 'voice_clone');
|
|
284
|
+
assert.equal(settings.mossttsnano_mlx.language, 'ko');
|
|
285
|
+
assert.equal(settings.mossttsnano_mlx.torchDevice, 'cpu');
|
|
286
|
+
assert.equal(settings.mossttsnano_mlx.torchDtype, 'float32');
|
|
287
|
+
assert.equal(settings.mossttsnano_mlx.promptAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
288
|
+
assert.equal(settings.mossttsnano_mlx.promptText, '테스트 기준 음성입니다.');
|
|
289
|
+
assert.equal(settings.mossttsnano_mlx.maxNewFrames, 120);
|
|
290
|
+
assert.equal(settings.mossttsnano_mlx.seed, '7');
|
|
291
|
+
assert.equal(settings.mossttsnano_mlx.timeoutMs, 180000);
|
|
292
|
+
assert.equal(settings.mossttsnano_mlx.useForProgress, false);
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
test('buildTtsSettings normalizes NeuTTS Air settings and aliases neutts air', () => {
|
|
296
|
+
const root = '/project';
|
|
297
|
+
const settings = buildTtsSettings({
|
|
298
|
+
TTS_BACKEND: 'neutts-air',
|
|
299
|
+
NEUTTSAIR_PYTHON: './.venv-neuttsair/bin/python',
|
|
300
|
+
NEUTTSAIR_SCRIPT: './integrations/neuttsair/synth.py',
|
|
301
|
+
NEUTTSAIR_BACKBONE_REPO: 'neuphonic/neutts-air-q4-gguf',
|
|
302
|
+
NEUTTSAIR_BACKBONE_DEVICE: 'mps',
|
|
303
|
+
NEUTTSAIR_CODEC_REPO: 'neuphonic/neucodec',
|
|
304
|
+
NEUTTSAIR_CODEC_DEVICE: 'mps',
|
|
305
|
+
NEUTTSAIR_REF_AUDIO: './voice-samples/me.wav',
|
|
306
|
+
NEUTTSAIR_REF_TEXT: 'Reference voice text.',
|
|
307
|
+
NEUTTSAIR_LANGUAGE: 'en',
|
|
308
|
+
NEUTTSAIR_SAMPLE_RATE: '24000',
|
|
309
|
+
NEUTTSAIR_TIMEOUT_MS: '120000',
|
|
310
|
+
NEUTTSAIR_PROGRESS: '1',
|
|
311
|
+
}, root);
|
|
312
|
+
|
|
313
|
+
assert.equal(settings.backend, 'neuttsair');
|
|
314
|
+
assert.equal(settings.neuttsair.python, path.join(root, '.venv-neuttsair', 'bin', 'python'));
|
|
315
|
+
assert.equal(settings.neuttsair.script, path.join(root, 'integrations', 'neuttsair', 'synth.py'));
|
|
316
|
+
assert.equal(settings.neuttsair.backboneRepo, 'neuphonic/neutts-air-q4-gguf');
|
|
317
|
+
assert.equal(settings.neuttsair.backboneDevice, 'mps');
|
|
318
|
+
assert.equal(settings.neuttsair.codecRepo, 'neuphonic/neucodec');
|
|
319
|
+
assert.equal(settings.neuttsair.codecDevice, 'mps');
|
|
320
|
+
assert.equal(settings.neuttsair.refAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
321
|
+
assert.equal(settings.neuttsair.refText, 'Reference voice text.');
|
|
322
|
+
assert.equal(settings.neuttsair.language, 'en');
|
|
323
|
+
assert.equal(settings.neuttsair.sampleRate, 24000);
|
|
324
|
+
assert.equal(settings.neuttsair.timeoutMs, 120000);
|
|
325
|
+
assert.equal(settings.neuttsair.useForProgress, true);
|
|
326
|
+
});
|
|
327
|
+
|
|
124
328
|
test('buildTtsSettings falls back to edge for unsupported backend', () => {
|
|
125
329
|
const settings = buildTtsSettings({ TTS_BACKEND: 'unknown' }, '/project');
|
|
126
330
|
assert.equal(settings.backend, 'edge');
|
|
@@ -34,6 +34,55 @@ export const DEFAULT_TTS_VOICE_CONFIG = {
|
|
|
34
34
|
m1: { label: 'Supertonic M1', language: 'ko', voice: 'M1' },
|
|
35
35
|
},
|
|
36
36
|
},
|
|
37
|
+
omnivoice: {
|
|
38
|
+
currentVoiceType: 'cloned_reference',
|
|
39
|
+
voices: {
|
|
40
|
+
cloned_reference: { label: 'OmniVoice reference sample', language: 'ko', voice: 'voice-samples/user-reference.wav' },
|
|
41
|
+
designed_speaker: { label: 'OmniVoice designed speaker', language: 'ko', voice: 'warm korean male voice' },
|
|
42
|
+
},
|
|
43
|
+
},
|
|
44
|
+
qwen3tts: {
|
|
45
|
+
currentVoiceType: 'korean_preset',
|
|
46
|
+
voices: {
|
|
47
|
+
korean_preset: { label: 'Qwen3 TTS Korean preset', language: 'ko', voice: 'sohee' },
|
|
48
|
+
cloned_reference: { label: 'Qwen3 TTS reference sample', language: 'ko', voice: 'voice-samples/user-reference.wav' },
|
|
49
|
+
designed_speaker: { label: 'Qwen3 TTS designed speaker', language: 'ko', voice: 'calm conversational Korean voice' },
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
mlxaudio: {
|
|
53
|
+
currentVoiceType: 'qwen3_mlx',
|
|
54
|
+
voices: {
|
|
55
|
+
qwen3_mlx: { label: 'MLX Audio Qwen3 speaker', language: 'ko', voice: 'Chelsie' },
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
neuttsair: {
|
|
59
|
+
currentVoiceType: 'cloned_reference',
|
|
60
|
+
voices: {
|
|
61
|
+
cloned_reference: { label: 'NeuTTS Air reference sample', language: 'en', voice: 'voice-samples/user-reference.wav' },
|
|
62
|
+
default_sample: { label: 'NeuTTS Air bundled sample', language: 'en', voice: 'vendor/neutts-air/samples/jo.wav' },
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
fireredtts2: {
|
|
66
|
+
currentVoiceType: 'prompt_reference',
|
|
67
|
+
voices: {
|
|
68
|
+
prompt_reference: { label: 'FireRedTTS-2 prompt reference', language: 'ko', voice: 'voice-samples/user-reference.wav' },
|
|
69
|
+
random_speaker: { label: 'FireRedTTS-2 random speaker', language: 'ko', voice: '' },
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
mossttsnano: {
|
|
73
|
+
currentVoiceType: 'prompt_reference',
|
|
74
|
+
voices: {
|
|
75
|
+
prompt_reference: { label: 'MOSS-TTS-Nano prompt reference', language: 'ko', voice: 'voice-samples/user-reference.wav' },
|
|
76
|
+
continuation: { label: 'MOSS-TTS-Nano continuation/default', language: 'ko', voice: '' },
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
mossttsnano_mlx: {
|
|
80
|
+
currentVoiceType: 'prompt_reference',
|
|
81
|
+
voices: {
|
|
82
|
+
prompt_reference: { label: 'MOSS-TTS-Nano MLX hybrid prompt reference', language: 'ko', voice: 'voice-samples/user-reference.wav' },
|
|
83
|
+
continuation: { label: 'MOSS-TTS-Nano MLX hybrid continuation/default', language: 'ko', voice: '' },
|
|
84
|
+
},
|
|
85
|
+
},
|
|
37
86
|
},
|
|
38
87
|
};
|
|
39
88
|
|
|
@@ -47,7 +96,37 @@ export function defaultTtsVoiceConfig() {
|
|
|
47
96
|
|
|
48
97
|
function normalizeBackend(value, config) {
|
|
49
98
|
const key = String(value || '').trim().toLowerCase();
|
|
50
|
-
|
|
99
|
+
const aliases = new Map([
|
|
100
|
+
['q3', 'qwen3tts'],
|
|
101
|
+
['qwen3', 'qwen3tts'],
|
|
102
|
+
['qwen3-tts', 'qwen3tts'],
|
|
103
|
+
['qtts', 'qwen3tts'],
|
|
104
|
+
['qwen3-mlx', 'mlxaudio'],
|
|
105
|
+
['mlx', 'mlxaudio'],
|
|
106
|
+
['mlx-audio', 'mlxaudio'],
|
|
107
|
+
['neutts', 'neuttsair'],
|
|
108
|
+
['neutts-air', 'neuttsair'],
|
|
109
|
+
['neutts air', 'neuttsair'],
|
|
110
|
+
['neuttsair', 'neuttsair'],
|
|
111
|
+
['neu-tts-air', 'neuttsair'],
|
|
112
|
+
['neu tts air', 'neuttsair'],
|
|
113
|
+
['firered', 'fireredtts2'],
|
|
114
|
+
['fireredtts', 'fireredtts2'],
|
|
115
|
+
['firered-tts-2', 'fireredtts2'],
|
|
116
|
+
['fireredtts-2', 'fireredtts2'],
|
|
117
|
+
['moss', 'mossttsnano'],
|
|
118
|
+
['moss-tts', 'mossttsnano'],
|
|
119
|
+
['mossnano', 'mossttsnano'],
|
|
120
|
+
['moss-tts-nano', 'mossttsnano'],
|
|
121
|
+
['openmoss', 'mossttsnano'],
|
|
122
|
+
['moss-mlx', 'mossttsnano_mlx'],
|
|
123
|
+
['moss mlx', 'mossttsnano_mlx'],
|
|
124
|
+
['mossttsnano-mlx', 'mossttsnano_mlx'],
|
|
125
|
+
['mossttsnano_mlx', 'mossttsnano_mlx'],
|
|
126
|
+
['openmoss-mlx', 'mossttsnano_mlx'],
|
|
127
|
+
]);
|
|
128
|
+
const normalized = aliases.get(key) || key;
|
|
129
|
+
return config.backends?.[normalized] ? normalized : 'edge';
|
|
51
130
|
}
|
|
52
131
|
|
|
53
132
|
function normalizeVoiceType(backendConfig, requested) {
|
|
@@ -75,7 +154,10 @@ export function effectiveTtsVoiceSelection(config, env = {}) {
|
|
|
75
154
|
const merged = config || defaultTtsVoiceConfig();
|
|
76
155
|
const backend = normalizeBackend(env.TTS_BACKEND || merged.currentBackend, merged);
|
|
77
156
|
const backendConfig = merged.backends[backend] || merged.backends.edge;
|
|
78
|
-
const
|
|
157
|
+
const requestedVoiceType = env.TTS_VOICE_TYPE || merged.currentVoiceType;
|
|
158
|
+
const voiceType = backendConfig.voices?.[requestedVoiceType]
|
|
159
|
+
? requestedVoiceType
|
|
160
|
+
: normalizeVoiceType(backendConfig, backendConfig.currentVoiceType);
|
|
79
161
|
const voice = backendConfig.voices[voiceType];
|
|
80
162
|
return { backend, voiceType, voice, backendConfig };
|
|
81
163
|
}
|
|
@@ -83,6 +165,41 @@ export function effectiveTtsVoiceSelection(config, env = {}) {
|
|
|
83
165
|
export function applyTtsVoiceSelectionToEnv(env = {}, selection) {
|
|
84
166
|
const next = { ...env, TTS_BACKEND: selection.backend, TTS_VOICE_TYPE: selection.voiceType };
|
|
85
167
|
if (selection.backend === 'edge') next.TTS_VOICE = selection.voice.voice;
|
|
168
|
+
if (selection.backend === 'qwen3tts') {
|
|
169
|
+
if (selection.voiceType === 'cloned_reference') {
|
|
170
|
+
next.QWEN3TTS_MODE = 'clone';
|
|
171
|
+
next.QWEN3TTS_REF_AUDIO = selection.voice.voice;
|
|
172
|
+
} else if (selection.voiceType === 'designed_speaker') {
|
|
173
|
+
next.QWEN3TTS_MODE = 'design';
|
|
174
|
+
next.QWEN3TTS_INSTRUCT = selection.voice.voice;
|
|
175
|
+
} else {
|
|
176
|
+
next.QWEN3TTS_MODE = 'custom';
|
|
177
|
+
next.QWEN3TTS_SPEAKER = selection.voice.voice;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
if (selection.backend === 'mlxaudio') {
|
|
181
|
+
if (selection.voice?.voice) next.MLXAUDIO_VOICE = selection.voice.voice;
|
|
182
|
+
}
|
|
183
|
+
if (selection.backend === 'neuttsair') {
|
|
184
|
+
if (selection.voice?.voice) next.NEUTTSAIR_REF_AUDIO = selection.voice.voice;
|
|
185
|
+
}
|
|
186
|
+
if (selection.backend === 'fireredtts2') {
|
|
187
|
+
if (selection.voice?.voice) next.FIREREDTTS2_PROMPT_AUDIO = selection.voice.voice;
|
|
188
|
+
}
|
|
189
|
+
if (selection.backend === 'mossttsnano') {
|
|
190
|
+
if (selection.voiceType === 'continuation') next.MOSSTTSNANO_MODE = 'continuation';
|
|
191
|
+
else {
|
|
192
|
+
next.MOSSTTSNANO_MODE = 'voice_clone';
|
|
193
|
+
if (selection.voice?.voice) next.MOSSTTSNANO_PROMPT_AUDIO = selection.voice.voice;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
if (selection.backend === 'mossttsnano_mlx') {
|
|
197
|
+
if (selection.voiceType === 'continuation') next.MOSSTTSNANO_MODE = 'continuation';
|
|
198
|
+
else {
|
|
199
|
+
next.MOSSTTSNANO_MODE = 'voice_clone';
|
|
200
|
+
if (selection.voice?.voice) next.MOSSTTSNANO_PROMPT_AUDIO = selection.voice.voice;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
86
203
|
if (selection.voice?.language) next.VOICE_LANGUAGE = selection.voice.language;
|
|
87
204
|
return next;
|
|
88
205
|
}
|
|
@@ -113,6 +230,23 @@ export function voiceCommandFromTranscript(text) {
|
|
|
113
230
|
const raw = String(text || '').trim();
|
|
114
231
|
if (!raw) return null;
|
|
115
232
|
const compact = raw.toLowerCase().replace(/\s+/g, '');
|
|
233
|
+
const looksLikeBackend = /\b(tts|voice|speech|audio)\b.*\bbackend\b|\bbackend\b.*\b(tts|voice|speech|audio)\b/i.test(raw)
|
|
234
|
+
|| /(tts|음성|목소리).*(백엔드|백앤드|backend).*(바꿔|변경|설정|해줘|로)/iu.test(raw)
|
|
235
|
+
|| /(백엔드|백앤드|backend).*(옴니보이스|오픈보이스|엣지|수퍼토닉|슈퍼토닉|스피치스위프트|큐원|큐웬|qwen|q3|qtts|firered|moss|openmoss|neutts|neu\s*tts|뉴티티에스|뉴티TS|speechswift|omnivoice|openvoice|edge|supertonic)/iu.test(raw)
|
|
236
|
+
|| /tts를.*(옴니보이스|오픈보이스|엣지|수퍼토닉|슈퍼토닉|스피치스위프트|큐원|큐웬|qwen|q3|qtts|firered|moss|openmoss|neutts|neu\s*tts|뉴티티에스|뉴티TS|omnivoice|openvoice|edge|supertonic|speechswift).*바꿔/iu.test(raw);
|
|
237
|
+
if (looksLikeBackend) {
|
|
238
|
+
if (/(neutts\s*-?\s*air|neu\s*tts\s*-?\s*air|neuttsair|뉴\s*티\s*티\s*에스\s*에어|뉴티티에스\s*에어|뉴티TS\s*에어)/iu.test(raw)) return { backend: 'neuttsair' };
|
|
239
|
+
if (/(omnivoice|omni voice|옴니보이스|업니보이스|옴니|업니)/iu.test(raw)) return { backend: 'omnivoice' };
|
|
240
|
+
if (/(openvoice|open voice|오픈보이스|오픈 보이스)/iu.test(raw)) return { backend: 'openvoice' };
|
|
241
|
+
if (/(speechswift|speech swift|스피치스위프트|스피치 스위프트|cosyvoice|코지보이스)/iu.test(raw)) return { backend: 'speechswift' };
|
|
242
|
+
if (/(qwen3mlx|qwen mlx|qwen3 mlx|mlx-audio|mlx audio|엠엘엑스|mlx)/iu.test(raw)) return { backend: 'mlxaudio' };
|
|
243
|
+
if (/(neutts-air|neuttsair|neutts|neu tts air|뉴티티에스|뉴티츠|뉴티에스)/iu.test(raw)) return { backend: 'neuttsair' };
|
|
244
|
+
if (/(qwen3|qwen|q3|qtts|큐원|큐웬|큐엔|큐3|큐삼)/iu.test(raw)) return { backend: 'qwen3tts' };
|
|
245
|
+
if (/(fireredtts2|fireredtts|firered|fire red|파이어레드)/iu.test(raw)) return { backend: 'fireredtts2' };
|
|
246
|
+
if (/(moss-tts-nano|moss tts nano|mossnano|moss|openmoss|모스|오픈모스)/iu.test(raw)) return { backend: 'mossttsnano' };
|
|
247
|
+
if (/(supertonic|수퍼토닉|슈퍼토닉)/iu.test(raw)) return { backend: 'supertonic' };
|
|
248
|
+
if (/(edge|엣지)/iu.test(raw)) return { backend: 'edge' };
|
|
249
|
+
}
|
|
116
250
|
const looksLikeVoice = /\b(change|switch|set)\b.*\b(voice|speaker)\b/i.test(raw)
|
|
117
251
|
|| /\b(voice|speaker)\b.*\b(to|as)\b/i.test(raw)
|
|
118
252
|
|| /(목소리|음성).*(바꿔|변경|설정|해줘)|목소리.*로|음성.*로/u.test(compact);
|
|
@@ -15,6 +15,11 @@ import {
|
|
|
15
15
|
writeTtsVoiceConfig,
|
|
16
16
|
} from './tts_voice_config.mjs';
|
|
17
17
|
|
|
18
|
+
const __tempRoots = [];
|
|
19
|
+
test.after(() => {
|
|
20
|
+
for (const root of __tempRoots) try { fs.rmSync(root, { recursive: true, force: true }); } catch {}
|
|
21
|
+
});
|
|
22
|
+
|
|
18
23
|
test('effectiveTtsVoiceSelection reads backend and voice type from config', () => {
|
|
19
24
|
const config = defaultTtsVoiceConfig();
|
|
20
25
|
config.currentBackend = 'edge';
|
|
@@ -27,6 +32,26 @@ test('effectiveTtsVoiceSelection reads backend and voice type from config', () =
|
|
|
27
32
|
assert.equal(selected.voice.voice, 'ko-KR-SunHiNeural');
|
|
28
33
|
});
|
|
29
34
|
|
|
35
|
+
test('effectiveTtsVoiceSelection falls back to backend voice when env voice type belongs to another backend', () => {
|
|
36
|
+
const config = defaultTtsVoiceConfig();
|
|
37
|
+
config.currentBackend = 'edge';
|
|
38
|
+
config.currentVoiceType = 'korean_male';
|
|
39
|
+
config.backends.edge.currentVoiceType = 'korean_male';
|
|
40
|
+
|
|
41
|
+
const selected = effectiveTtsVoiceSelection(config, { TTS_BACKEND: 'edge', TTS_VOICE_TYPE: 'cloned_reference' });
|
|
42
|
+
|
|
43
|
+
assert.equal(selected.backend, 'edge');
|
|
44
|
+
assert.equal(selected.voiceType, 'korean_male');
|
|
45
|
+
assert.equal(selected.voice.voice, 'ko-KR-InJoonNeural');
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
test('effectiveTtsVoiceSelection accepts Qwen3 backend aliases from env', () => {
|
|
49
|
+
const selected = effectiveTtsVoiceSelection(defaultTtsVoiceConfig(), { TTS_BACKEND: 'qwen3' });
|
|
50
|
+
|
|
51
|
+
assert.equal(selected.backend, 'qwen3tts');
|
|
52
|
+
assert.equal(selected.voiceType, 'korean_preset');
|
|
53
|
+
});
|
|
54
|
+
|
|
30
55
|
test('applyTtsVoiceSelectionToEnv updates backend voice and voice language together', () => {
|
|
31
56
|
const selected = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { voiceType: 'korean_male' }), {});
|
|
32
57
|
|
|
@@ -52,8 +77,77 @@ test('voiceCommandFromTranscript detects voice type changes', () => {
|
|
|
52
77
|
assert.equal(voiceCommandFromTranscript('change language to Korean'), null);
|
|
53
78
|
});
|
|
54
79
|
|
|
80
|
+
test('voiceCommandFromTranscript detects TTS backend changes', () => {
|
|
81
|
+
assert.deepEqual(voiceCommandFromTranscript('change TTS backend to OmniVoice'), { backend: 'omnivoice' });
|
|
82
|
+
assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 옴니보이스로 바꿔'), { backend: 'omnivoice' });
|
|
83
|
+
assert.deepEqual(voiceCommandFromTranscript('TTS를 Edge로 바꿔'), { backend: 'edge' });
|
|
84
|
+
assert.deepEqual(voiceCommandFromTranscript('TTS를 qwen3로 바꿔'), { backend: 'qwen3tts' });
|
|
85
|
+
assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 큐웬으로 바꿔'), { backend: 'qwen3tts' });
|
|
86
|
+
assert.deepEqual(voiceCommandFromTranscript('TTS backend to FireRedTTS-2'), { backend: 'fireredtts2' });
|
|
87
|
+
assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 모스로 바꿔'), { backend: 'mossttsnano' });
|
|
88
|
+
assert.deepEqual(voiceCommandFromTranscript('TTS backend to NeuTTS Air'), { backend: 'neuttsair' });
|
|
89
|
+
assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 뉴티티에스 에어로 바꿔'), { backend: 'neuttsair' });
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
test('applyTtsVoiceSelectionToEnv maps Qwen3 voice types to CLI mode env', () => {
|
|
93
|
+
const preset = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'qwen3tts', voiceType: 'korean_preset' }), {});
|
|
94
|
+
assert.deepEqual(applyTtsVoiceSelectionToEnv({}, preset), {
|
|
95
|
+
TTS_BACKEND: 'qwen3tts',
|
|
96
|
+
TTS_VOICE_TYPE: 'korean_preset',
|
|
97
|
+
QWEN3TTS_MODE: 'custom',
|
|
98
|
+
QWEN3TTS_SPEAKER: 'sohee',
|
|
99
|
+
VOICE_LANGUAGE: 'ko',
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
const clone = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'qwen3tts', voiceType: 'cloned_reference' }), {});
|
|
103
|
+
assert.deepEqual(applyTtsVoiceSelectionToEnv({}, clone), {
|
|
104
|
+
TTS_BACKEND: 'qwen3tts',
|
|
105
|
+
TTS_VOICE_TYPE: 'cloned_reference',
|
|
106
|
+
QWEN3TTS_MODE: 'clone',
|
|
107
|
+
QWEN3TTS_REF_AUDIO: 'voice-samples/user-reference.wav',
|
|
108
|
+
VOICE_LANGUAGE: 'ko',
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test('applyTtsVoiceSelectionToEnv maps FireRedTTS-2 and MOSS prompt references', () => {
|
|
113
|
+
const fire = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'fireredtts2', voiceType: 'prompt_reference' }), {});
|
|
114
|
+
assert.deepEqual(applyTtsVoiceSelectionToEnv({}, fire), {
|
|
115
|
+
TTS_BACKEND: 'fireredtts2',
|
|
116
|
+
TTS_VOICE_TYPE: 'prompt_reference',
|
|
117
|
+
FIREREDTTS2_PROMPT_AUDIO: 'voice-samples/user-reference.wav',
|
|
118
|
+
VOICE_LANGUAGE: 'ko',
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
const moss = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'mossttsnano', voiceType: 'prompt_reference' }), {});
|
|
122
|
+
assert.deepEqual(applyTtsVoiceSelectionToEnv({}, moss), {
|
|
123
|
+
TTS_BACKEND: 'mossttsnano',
|
|
124
|
+
TTS_VOICE_TYPE: 'prompt_reference',
|
|
125
|
+
MOSSTTSNANO_MODE: 'voice_clone',
|
|
126
|
+
MOSSTTSNANO_PROMPT_AUDIO: 'voice-samples/user-reference.wav',
|
|
127
|
+
VOICE_LANGUAGE: 'ko',
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
const neutts = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'neuttsair', voiceType: 'cloned_reference' }), {});
|
|
131
|
+
assert.deepEqual(applyTtsVoiceSelectionToEnv({}, neutts), {
|
|
132
|
+
TTS_BACKEND: 'neuttsair',
|
|
133
|
+
TTS_VOICE_TYPE: 'cloned_reference',
|
|
134
|
+
NEUTTSAIR_REF_AUDIO: 'voice-samples/user-reference.wav',
|
|
135
|
+
VOICE_LANGUAGE: 'en',
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
test('updateTtsVoiceConfig can switch to OmniVoice backend default voice', () => {
|
|
140
|
+
const config = updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'omnivoice' });
|
|
141
|
+
const selected = effectiveTtsVoiceSelection(config, {});
|
|
142
|
+
|
|
143
|
+
assert.equal(selected.backend, 'omnivoice');
|
|
144
|
+
assert.equal(selected.voiceType, 'cloned_reference');
|
|
145
|
+
assert.equal(selected.voice.voice, 'voice-samples/user-reference.wav');
|
|
146
|
+
});
|
|
147
|
+
|
|
55
148
|
test('read and write voice config round trips current selection', () => {
|
|
56
149
|
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'voice-config-test-'));
|
|
150
|
+
__tempRoots.push(dir);
|
|
57
151
|
const file = path.join(dir, 'tts-voices.json');
|
|
58
152
|
const config = updateTtsVoiceConfig(defaultTtsVoiceConfig(), { voiceType: 'korean_female' });
|
|
59
153
|
|