verbalcoding 0.2.12 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +74 -4
- package/README.es.md +3 -1
- package/README.fr.md +3 -1
- package/README.ja.md +3 -1
- package/README.ko.md +4 -2
- package/README.md +4 -2
- package/README.ru.md +3 -1
- package/README.zh.md +3 -1
- package/app-node/agent_adapters.test.mjs +14 -0
- package/app-node/agent_routing.mjs +148 -0
- package/app-node/agent_routing.test.mjs +138 -0
- package/app-node/agent_turn.mjs +86 -0
- package/app-node/agent_turn.test.mjs +109 -0
- package/app-node/bridge_context.mjs +73 -0
- package/app-node/bridge_context.test.mjs +54 -0
- package/app-node/bridge_state.mjs +4 -0
- package/app-node/bridge_wireup.test.mjs +462 -0
- package/app-node/cli_install.test.mjs +31 -0
- package/app-node/cross_agent_routing.test.mjs +78 -0
- package/app-node/discord_command_router.mjs +204 -0
- package/app-node/discord_command_router.test.mjs +311 -0
- package/app-node/discord_voice_setup.mjs +251 -0
- package/app-node/discord_voice_setup.test.mjs +86 -0
- package/app-node/hermes_profiles.test.mjs +12 -1
- package/app-node/install_config.mjs +110 -3
- package/app-node/install_config.test.mjs +8 -0
- package/app-node/instance_doctor.test.mjs +9 -0
- package/app-node/instances.test.mjs +8 -1
- package/app-node/main.mjs +488 -1368
- package/app-node/mcp_tools.test.mjs +7 -0
- package/app-node/notification_handler.mjs +89 -0
- package/app-node/notification_handler.test.mjs +187 -0
- package/app-node/plan_dispatcher.mjs +215 -0
- package/app-node/plan_dispatcher.test.mjs +101 -0
- package/app-node/plan_mode.mjs +36 -7
- package/app-node/plan_mode.test.mjs +78 -0
- package/app-node/progress_handler.mjs +220 -0
- package/app-node/progress_handler.test.mjs +193 -0
- package/app-node/progress_speech.mjs +54 -32
- package/app-node/progress_speech.test.mjs +12 -3
- package/app-node/project_sessions.mjs +5 -2
- package/app-node/project_sessions.test.mjs +7 -0
- package/app-node/research_mode.mjs +282 -0
- package/app-node/research_mode.test.mjs +264 -0
- package/app-node/restart_notice.mjs +3 -0
- package/app-node/restart_notice.test.mjs +11 -0
- package/app-node/session_ontology.mjs +271 -0
- package/app-node/session_ontology.test.mjs +130 -0
- package/app-node/smart_progress.mjs +1 -1
- package/app-node/stream_sentencer.mjs +32 -2
- package/app-node/stream_sentencer.test.mjs +65 -0
- package/app-node/streaming_tts_queue.mjs +5 -1
- package/app-node/streaming_tts_queue.test.mjs +7 -1
- package/app-node/stt_whisper.mjs +24 -0
- package/app-node/stt_whisper.test.mjs +32 -0
- package/app-node/text_routing.mjs +4 -2
- package/app-node/tts_backends.mjs +537 -3
- package/app-node/tts_backends.test.mjs +454 -0
- package/app-node/tts_player.mjs +164 -0
- package/app-node/tts_player.test.mjs +202 -0
- package/app-node/tts_runtime.mjs +134 -0
- package/app-node/tts_runtime.test.mjs +89 -0
- package/app-node/tts_settings.mjs +150 -3
- package/app-node/tts_settings.test.mjs +204 -0
- package/app-node/tts_voice_config.mjs +136 -2
- package/app-node/tts_voice_config.test.mjs +94 -0
- package/app-node/utterance_router.mjs +216 -0
- package/app-node/utterance_router.test.mjs +236 -0
- package/app-node/voice_autojoin.mjs +37 -0
- package/app-node/voice_autojoin.test.mjs +59 -0
- package/app-node/voice_io.mjs +272 -0
- package/app-node/voice_io.test.mjs +102 -0
- package/app-node/voice_turn_runner.mjs +449 -0
- package/app-node/voice_turn_runner.test.mjs +289 -0
- package/docs/CONFIGURATION.md +12 -2
- package/docs/HARNESSES.md +58 -0
- package/docs/HARNESS_AIDER.md +50 -0
- package/docs/HARNESS_CLAUDE.md +56 -0
- package/docs/HARNESS_CODEX.md +56 -0
- package/docs/HARNESS_CURSOR.md +45 -0
- package/docs/HARNESS_GEMINI.md +45 -0
- package/docs/HARNESS_HERMES.md +57 -0
- package/docs/HARNESS_OPENCLAW.md +44 -0
- package/docs/HARNESS_OPENCODE.md +44 -0
- package/docs/README.md +1 -0
- package/docs/ROADMAP.md +20 -5
- package/docs/TTS_BACKENDS.md +227 -0
- package/docs/USAGE.md +22 -0
- package/docs/i18n/AGENTS.es.md +34 -0
- package/docs/i18n/AGENTS.fr.md +34 -0
- package/docs/i18n/AGENTS.ja.md +34 -0
- package/docs/i18n/AGENTS.ko.md +34 -0
- package/docs/i18n/AGENTS.ru.md +34 -0
- package/docs/i18n/AGENTS.zh.md +34 -0
- package/docs/i18n/HARNESSES.es.md +58 -0
- package/docs/i18n/HARNESSES.fr.md +58 -0
- package/docs/i18n/HARNESSES.ja.md +58 -0
- package/docs/i18n/HARNESSES.ko.md +58 -0
- package/docs/i18n/HARNESSES.ru.md +58 -0
- package/docs/i18n/HARNESSES.zh.md +58 -0
- package/docs/i18n/HARNESS_AIDER.es.md +48 -0
- package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
- package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
- package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
- package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
- package/docs/i18n/HARNESS_CODEX.es.md +55 -0
- package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
- package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
- package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
- package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
- package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
- package/docs/i18n/HARNESS_HERMES.es.md +54 -0
- package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
- package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
- package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
- package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
- package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
- package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
- package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
- package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
- package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
- package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
- package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
- package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
- package/integrations/fireredtts2/mlx_llm.py +183 -0
- package/integrations/fireredtts2/synth.py +156 -0
- package/integrations/fireredtts2/synth_mlx.py +196 -0
- package/integrations/mlxaudio/synth.py +74 -0
- package/integrations/neuttsair/synth.py +104 -0
- package/integrations/omnivoice/synth.py +110 -0
- package/package.json +6 -1
- package/scripts/cli.mjs +84 -0
- package/scripts/doctor.mjs +104 -4
- package/scripts/install.mjs +5 -1
- package/scripts/install_fireredtts2.sh +109 -0
- package/scripts/install_mlxaudio.sh +34 -0
- package/scripts/install_mossttsnano.sh +46 -0
- package/scripts/postinstall.mjs +34 -0
|
@@ -121,6 +121,210 @@ test('buildTtsSettings normalizes Supertonic local backend settings', () => {
|
|
|
121
121
|
assert.equal(settings.supertonic.interOpThreads, '1');
|
|
122
122
|
});
|
|
123
123
|
|
|
124
|
+
test('buildTtsSettings normalizes OmniVoice local backend settings', () => {
|
|
125
|
+
const root = '/project';
|
|
126
|
+
const settings = buildTtsSettings({
|
|
127
|
+
TTS_BACKEND: 'omnivoice',
|
|
128
|
+
OMNIVOICE_PYTHON: './.venv-omnivoice/bin/python',
|
|
129
|
+
OMNIVOICE_MODEL: 'k2-fsa/OmniVoice',
|
|
130
|
+
OMNIVOICE_DEVICE: 'mps',
|
|
131
|
+
OMNIVOICE_DTYPE: 'float16',
|
|
132
|
+
OMNIVOICE_REF_AUDIO: './voice-samples/me.wav',
|
|
133
|
+
OMNIVOICE_REF_TEXT: '테스트 기준 음성입니다.',
|
|
134
|
+
OMNIVOICE_LANGUAGE: 'ko',
|
|
135
|
+
OMNIVOICE_SPEAKER: 'warm korean male voice',
|
|
136
|
+
OMNIVOICE_TIMEOUT_MS: '180000',
|
|
137
|
+
OMNIVOICE_PROGRESS: '1',
|
|
138
|
+
}, root);
|
|
139
|
+
|
|
140
|
+
assert.equal(settings.backend, 'omnivoice');
|
|
141
|
+
assert.equal(settings.omnivoice.python, path.join(root, '.venv-omnivoice', 'bin', 'python'));
|
|
142
|
+
assert.equal(settings.omnivoice.model, 'k2-fsa/OmniVoice');
|
|
143
|
+
assert.equal(settings.omnivoice.device, 'mps');
|
|
144
|
+
assert.equal(settings.omnivoice.dtype, 'float16');
|
|
145
|
+
assert.equal(settings.omnivoice.refAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
146
|
+
assert.equal(settings.omnivoice.refText, '테스트 기준 음성입니다.');
|
|
147
|
+
assert.equal(settings.omnivoice.language, 'ko');
|
|
148
|
+
assert.equal(settings.omnivoice.speaker, 'warm korean male voice');
|
|
149
|
+
assert.equal(settings.omnivoice.timeoutMs, 180000);
|
|
150
|
+
assert.equal(settings.omnivoice.useForProgress, true);
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
test('buildTtsSettings normalizes Qwen3 TTS CLI settings and aliases qwen3', () => {
|
|
154
|
+
const root = '/project';
|
|
155
|
+
const settings = buildTtsSettings({
|
|
156
|
+
TTS_BACKEND: 'qwen3',
|
|
157
|
+
QWEN3TTS_COMMAND: 'audio',
|
|
158
|
+
QWEN3TTS_MODE: 'clone',
|
|
159
|
+
QWEN3TTS_MODEL: 'base-8bit',
|
|
160
|
+
QWEN3TTS_LANGUAGE: 'korean',
|
|
161
|
+
QWEN3TTS_SPEAKER: 'sohee',
|
|
162
|
+
QWEN3TTS_INSTRUCT: 'calm conversational Korean',
|
|
163
|
+
QWEN3TTS_REF_AUDIO: './voice-samples/me.wav',
|
|
164
|
+
QWEN3TTS_REF_TEXT: '테스트 기준 음성입니다.',
|
|
165
|
+
QWEN3TTS_STREAM: '0',
|
|
166
|
+
QWEN3TTS_TIMEOUT_MS: '90000',
|
|
167
|
+
QWEN3TTS_PROGRESS: '1',
|
|
168
|
+
}, root);
|
|
169
|
+
|
|
170
|
+
assert.equal(settings.backend, 'qwen3tts');
|
|
171
|
+
assert.equal(settings.qwen3tts.command, 'audio');
|
|
172
|
+
assert.equal(settings.qwen3tts.mode, 'clone');
|
|
173
|
+
assert.equal(settings.qwen3tts.model, 'base-8bit');
|
|
174
|
+
assert.equal(settings.qwen3tts.language, 'korean');
|
|
175
|
+
assert.equal(settings.qwen3tts.speaker, 'sohee');
|
|
176
|
+
assert.equal(settings.qwen3tts.instruct, 'calm conversational Korean');
|
|
177
|
+
assert.equal(settings.qwen3tts.refAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
178
|
+
assert.equal(settings.qwen3tts.refText, '테스트 기준 음성입니다.');
|
|
179
|
+
assert.equal(settings.qwen3tts.stream, false);
|
|
180
|
+
assert.equal(settings.qwen3tts.timeoutMs, 90000);
|
|
181
|
+
assert.equal(settings.qwen3tts.useForProgress, true);
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test('buildTtsSettings normalizes FireRedTTS-2 settings', () => {
|
|
185
|
+
const root = '/project';
|
|
186
|
+
const settings = buildTtsSettings({
|
|
187
|
+
TTS_BACKEND: 'firered',
|
|
188
|
+
FIREREDTTS2_COMMAND: './bin/fireredtts2',
|
|
189
|
+
FIREREDTTS2_PRETRAINED_DIR: './models/FireRedTTS2',
|
|
190
|
+
FIREREDTTS2_DEVICE: 'mps',
|
|
191
|
+
FIREREDTTS2_GEN_TYPE: 'monologue',
|
|
192
|
+
FIREREDTTS2_SPEAKER: 'S1',
|
|
193
|
+
FIREREDTTS2_PROMPT_AUDIO: './voice-samples/me.wav',
|
|
194
|
+
FIREREDTTS2_PROMPT_TEXT: '테스트 기준 음성입니다.',
|
|
195
|
+
FIREREDTTS2_BF16: '1',
|
|
196
|
+
FIREREDTTS2_TIMEOUT_MS: '240000',
|
|
197
|
+
FIREREDTTS2_PROGRESS: '1',
|
|
198
|
+
}, root);
|
|
199
|
+
|
|
200
|
+
assert.equal(settings.backend, 'fireredtts2');
|
|
201
|
+
assert.equal(settings.fireredtts2.command, './bin/fireredtts2');
|
|
202
|
+
assert.equal(settings.fireredtts2.pretrainedDir, path.join(root, 'models', 'FireRedTTS2'));
|
|
203
|
+
assert.equal(settings.fireredtts2.device, 'mps');
|
|
204
|
+
assert.equal(settings.fireredtts2.genType, 'monologue');
|
|
205
|
+
assert.equal(settings.fireredtts2.speaker, 'S1');
|
|
206
|
+
assert.equal(settings.fireredtts2.promptAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
207
|
+
assert.equal(settings.fireredtts2.promptText, '테스트 기준 음성입니다.');
|
|
208
|
+
assert.equal(settings.fireredtts2.useBf16, true);
|
|
209
|
+
assert.equal(settings.fireredtts2.timeoutMs, 240000);
|
|
210
|
+
assert.equal(settings.fireredtts2.useForProgress, true);
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
test('buildTtsSettings normalizes MOSS-TTS-Nano settings', () => {
|
|
214
|
+
const root = '/project';
|
|
215
|
+
const settings = buildTtsSettings({
|
|
216
|
+
TTS_BACKEND: 'moss-tts-nano',
|
|
217
|
+
MOSSTTSNANO_COMMAND: 'python3',
|
|
218
|
+
MOSSTTSNANO_SCRIPT: './vendor/MOSS-TTS-Nano/infer.py',
|
|
219
|
+
MOSSTTSNANO_CHECKPOINT: './models/MOSS-TTS-Nano',
|
|
220
|
+
MOSSTTSNANO_AUDIO_TOKENIZER: './models/MOSS-Audio-Tokenizer-Nano',
|
|
221
|
+
MOSSTTSNANO_MODE: 'voice_clone',
|
|
222
|
+
MOSSTTSNANO_LANGUAGE: 'ko',
|
|
223
|
+
MOSSTTSNANO_DEVICE: 'cpu',
|
|
224
|
+
MOSSTTSNANO_DTYPE: 'float32',
|
|
225
|
+
MOSSTTSNANO_PROMPT_AUDIO: './voice-samples/me.wav',
|
|
226
|
+
MOSSTTSNANO_PROMPT_TEXT: '테스트 기준 음성입니다.',
|
|
227
|
+
MOSSTTSNANO_MAX_NEW_FRAMES: '256',
|
|
228
|
+
MOSSTTSNANO_SEED: '7',
|
|
229
|
+
MOSSTTSNANO_TIMEOUT_MS: '90000',
|
|
230
|
+
MOSSTTSNANO_PROGRESS: '1',
|
|
231
|
+
}, root);
|
|
232
|
+
|
|
233
|
+
assert.equal(settings.backend, 'mossttsnano');
|
|
234
|
+
assert.equal(settings.mossttsnano.command, 'python3');
|
|
235
|
+
assert.equal(settings.mossttsnano.script, path.join(root, 'vendor', 'MOSS-TTS-Nano', 'infer.py'));
|
|
236
|
+
assert.equal(settings.mossttsnano.checkpoint, './models/MOSS-TTS-Nano');
|
|
237
|
+
assert.equal(settings.mossttsnano.audioTokenizer, './models/MOSS-Audio-Tokenizer-Nano');
|
|
238
|
+
assert.equal(settings.mossttsnano.mode, 'voice_clone');
|
|
239
|
+
assert.equal(settings.mossttsnano.language, 'ko');
|
|
240
|
+
assert.equal(settings.mossttsnano.device, 'cpu');
|
|
241
|
+
assert.equal(settings.mossttsnano.dtype, 'float32');
|
|
242
|
+
assert.equal(settings.mossttsnano.promptAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
243
|
+
assert.equal(settings.mossttsnano.promptText, '테스트 기준 음성입니다.');
|
|
244
|
+
assert.equal(settings.mossttsnano.maxNewFrames, 256);
|
|
245
|
+
assert.equal(settings.mossttsnano.seed, '7');
|
|
246
|
+
assert.equal(settings.mossttsnano.timeoutMs, 90000);
|
|
247
|
+
assert.equal(settings.mossttsnano.useForProgress, true);
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
test('buildTtsSettings normalizes MOSS-TTS-Nano MLX hybrid settings', () => {
|
|
251
|
+
const root = '/project';
|
|
252
|
+
const settings = buildTtsSettings({
|
|
253
|
+
TTS_BACKEND: 'moss-mlx',
|
|
254
|
+
MOSSTTSNANO_COMMAND: 'python3',
|
|
255
|
+
MOSSTTSNANO_MLX_SCRIPT: './integrations/mossttsnano_mlx/synth.py',
|
|
256
|
+
MOSSTTSNANO_MLX_WORKER_SCRIPT: './integrations/mossttsnano_mlx/worker.py',
|
|
257
|
+
MOSSTTSNANO_MLX_WORKER: '1',
|
|
258
|
+
MOSSTTSNANO_MLX_WORKER_STARTUP_TIMEOUT_MS: '240000',
|
|
259
|
+
MOSSTTSNANO_SCRIPT: './vendor/MOSS-TTS-Nano/infer.py',
|
|
260
|
+
MOSSTTSNANO_CHECKPOINT: './models/MOSS-TTS-Nano',
|
|
261
|
+
MOSSTTSNANO_AUDIO_TOKENIZER: './models/MOSS-Audio-Tokenizer-Nano',
|
|
262
|
+
MOSSTTSNANO_MODE: 'voice_clone',
|
|
263
|
+
MOSSTTSNANO_LANGUAGE: 'ko',
|
|
264
|
+
MOSSTTSNANO_TORCH_DEVICE: 'cpu',
|
|
265
|
+
MOSSTTSNANO_TORCH_DTYPE: 'float32',
|
|
266
|
+
MOSSTTSNANO_PROMPT_AUDIO: './voice-samples/me.wav',
|
|
267
|
+
MOSSTTSNANO_PROMPT_TEXT: '테스트 기준 음성입니다.',
|
|
268
|
+
MOSSTTSNANO_MAX_NEW_FRAMES: '120',
|
|
269
|
+
MOSSTTSNANO_SEED: '7',
|
|
270
|
+
MOSSTTSNANO_MLX_TIMEOUT_MS: '180000',
|
|
271
|
+
MOSSTTSNANO_MLX_PROGRESS: '0',
|
|
272
|
+
}, root);
|
|
273
|
+
|
|
274
|
+
assert.equal(settings.backend, 'mossttsnano_mlx');
|
|
275
|
+
assert.equal(settings.mossttsnano_mlx.python, 'python3');
|
|
276
|
+
assert.equal(settings.mossttsnano_mlx.script, path.join(root, 'integrations', 'mossttsnano_mlx', 'synth.py'));
|
|
277
|
+
assert.equal(settings.mossttsnano_mlx.workerScript, path.join(root, 'integrations', 'mossttsnano_mlx', 'worker.py'));
|
|
278
|
+
assert.equal(settings.mossttsnano_mlx.workerEnabled, true);
|
|
279
|
+
assert.equal(settings.mossttsnano_mlx.workerStartupTimeoutMs, 240000);
|
|
280
|
+
assert.equal(settings.mossttsnano_mlx.torchInferScript, path.join(root, 'vendor', 'MOSS-TTS-Nano', 'infer.py'));
|
|
281
|
+
assert.equal(settings.mossttsnano_mlx.checkpoint, './models/MOSS-TTS-Nano');
|
|
282
|
+
assert.equal(settings.mossttsnano_mlx.audioTokenizer, './models/MOSS-Audio-Tokenizer-Nano');
|
|
283
|
+
assert.equal(settings.mossttsnano_mlx.mode, 'voice_clone');
|
|
284
|
+
assert.equal(settings.mossttsnano_mlx.language, 'ko');
|
|
285
|
+
assert.equal(settings.mossttsnano_mlx.torchDevice, 'cpu');
|
|
286
|
+
assert.equal(settings.mossttsnano_mlx.torchDtype, 'float32');
|
|
287
|
+
assert.equal(settings.mossttsnano_mlx.promptAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
288
|
+
assert.equal(settings.mossttsnano_mlx.promptText, '테스트 기준 음성입니다.');
|
|
289
|
+
assert.equal(settings.mossttsnano_mlx.maxNewFrames, 120);
|
|
290
|
+
assert.equal(settings.mossttsnano_mlx.seed, '7');
|
|
291
|
+
assert.equal(settings.mossttsnano_mlx.timeoutMs, 180000);
|
|
292
|
+
assert.equal(settings.mossttsnano_mlx.useForProgress, false);
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
test('buildTtsSettings normalizes NeuTTS Air settings and aliases neutts air', () => {
|
|
296
|
+
const root = '/project';
|
|
297
|
+
const settings = buildTtsSettings({
|
|
298
|
+
TTS_BACKEND: 'neutts-air',
|
|
299
|
+
NEUTTSAIR_PYTHON: './.venv-neuttsair/bin/python',
|
|
300
|
+
NEUTTSAIR_SCRIPT: './integrations/neuttsair/synth.py',
|
|
301
|
+
NEUTTSAIR_BACKBONE_REPO: 'neuphonic/neutts-air-q4-gguf',
|
|
302
|
+
NEUTTSAIR_BACKBONE_DEVICE: 'mps',
|
|
303
|
+
NEUTTSAIR_CODEC_REPO: 'neuphonic/neucodec',
|
|
304
|
+
NEUTTSAIR_CODEC_DEVICE: 'mps',
|
|
305
|
+
NEUTTSAIR_REF_AUDIO: './voice-samples/me.wav',
|
|
306
|
+
NEUTTSAIR_REF_TEXT: 'Reference voice text.',
|
|
307
|
+
NEUTTSAIR_LANGUAGE: 'en',
|
|
308
|
+
NEUTTSAIR_SAMPLE_RATE: '24000',
|
|
309
|
+
NEUTTSAIR_TIMEOUT_MS: '120000',
|
|
310
|
+
NEUTTSAIR_PROGRESS: '1',
|
|
311
|
+
}, root);
|
|
312
|
+
|
|
313
|
+
assert.equal(settings.backend, 'neuttsair');
|
|
314
|
+
assert.equal(settings.neuttsair.python, path.join(root, '.venv-neuttsair', 'bin', 'python'));
|
|
315
|
+
assert.equal(settings.neuttsair.script, path.join(root, 'integrations', 'neuttsair', 'synth.py'));
|
|
316
|
+
assert.equal(settings.neuttsair.backboneRepo, 'neuphonic/neutts-air-q4-gguf');
|
|
317
|
+
assert.equal(settings.neuttsair.backboneDevice, 'mps');
|
|
318
|
+
assert.equal(settings.neuttsair.codecRepo, 'neuphonic/neucodec');
|
|
319
|
+
assert.equal(settings.neuttsair.codecDevice, 'mps');
|
|
320
|
+
assert.equal(settings.neuttsair.refAudio, path.join(root, 'voice-samples', 'me.wav'));
|
|
321
|
+
assert.equal(settings.neuttsair.refText, 'Reference voice text.');
|
|
322
|
+
assert.equal(settings.neuttsair.language, 'en');
|
|
323
|
+
assert.equal(settings.neuttsair.sampleRate, 24000);
|
|
324
|
+
assert.equal(settings.neuttsair.timeoutMs, 120000);
|
|
325
|
+
assert.equal(settings.neuttsair.useForProgress, true);
|
|
326
|
+
});
|
|
327
|
+
|
|
124
328
|
test('buildTtsSettings falls back to edge for unsupported backend', () => {
|
|
125
329
|
const settings = buildTtsSettings({ TTS_BACKEND: 'unknown' }, '/project');
|
|
126
330
|
assert.equal(settings.backend, 'edge');
|
|
@@ -34,6 +34,55 @@ export const DEFAULT_TTS_VOICE_CONFIG = {
|
|
|
34
34
|
m1: { label: 'Supertonic M1', language: 'ko', voice: 'M1' },
|
|
35
35
|
},
|
|
36
36
|
},
|
|
37
|
+
omnivoice: {
|
|
38
|
+
currentVoiceType: 'cloned_reference',
|
|
39
|
+
voices: {
|
|
40
|
+
cloned_reference: { label: 'OmniVoice reference sample', language: 'ko', voice: 'voice-samples/user-reference.wav' },
|
|
41
|
+
designed_speaker: { label: 'OmniVoice designed speaker', language: 'ko', voice: 'warm korean male voice' },
|
|
42
|
+
},
|
|
43
|
+
},
|
|
44
|
+
qwen3tts: {
|
|
45
|
+
currentVoiceType: 'korean_preset',
|
|
46
|
+
voices: {
|
|
47
|
+
korean_preset: { label: 'Qwen3 TTS Korean preset', language: 'ko', voice: 'sohee' },
|
|
48
|
+
cloned_reference: { label: 'Qwen3 TTS reference sample', language: 'ko', voice: 'voice-samples/user-reference.wav' },
|
|
49
|
+
designed_speaker: { label: 'Qwen3 TTS designed speaker', language: 'ko', voice: 'calm conversational Korean voice' },
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
mlxaudio: {
|
|
53
|
+
currentVoiceType: 'qwen3_mlx',
|
|
54
|
+
voices: {
|
|
55
|
+
qwen3_mlx: { label: 'MLX Audio Qwen3 speaker', language: 'ko', voice: 'Chelsie' },
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
neuttsair: {
|
|
59
|
+
currentVoiceType: 'cloned_reference',
|
|
60
|
+
voices: {
|
|
61
|
+
cloned_reference: { label: 'NeuTTS Air reference sample', language: 'en', voice: 'voice-samples/user-reference.wav' },
|
|
62
|
+
default_sample: { label: 'NeuTTS Air bundled sample', language: 'en', voice: 'vendor/neutts-air/samples/jo.wav' },
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
fireredtts2: {
|
|
66
|
+
currentVoiceType: 'prompt_reference',
|
|
67
|
+
voices: {
|
|
68
|
+
prompt_reference: { label: 'FireRedTTS-2 prompt reference', language: 'ko', voice: 'voice-samples/user-reference.wav' },
|
|
69
|
+
random_speaker: { label: 'FireRedTTS-2 random speaker', language: 'ko', voice: '' },
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
mossttsnano: {
|
|
73
|
+
currentVoiceType: 'prompt_reference',
|
|
74
|
+
voices: {
|
|
75
|
+
prompt_reference: { label: 'MOSS-TTS-Nano prompt reference', language: 'ko', voice: 'voice-samples/user-reference.wav' },
|
|
76
|
+
continuation: { label: 'MOSS-TTS-Nano continuation/default', language: 'ko', voice: '' },
|
|
77
|
+
},
|
|
78
|
+
},
|
|
79
|
+
mossttsnano_mlx: {
|
|
80
|
+
currentVoiceType: 'prompt_reference',
|
|
81
|
+
voices: {
|
|
82
|
+
prompt_reference: { label: 'MOSS-TTS-Nano MLX hybrid prompt reference', language: 'ko', voice: 'voice-samples/user-reference.wav' },
|
|
83
|
+
continuation: { label: 'MOSS-TTS-Nano MLX hybrid continuation/default', language: 'ko', voice: '' },
|
|
84
|
+
},
|
|
85
|
+
},
|
|
37
86
|
},
|
|
38
87
|
};
|
|
39
88
|
|
|
@@ -47,7 +96,37 @@ export function defaultTtsVoiceConfig() {
|
|
|
47
96
|
|
|
48
97
|
function normalizeBackend(value, config) {
|
|
49
98
|
const key = String(value || '').trim().toLowerCase();
|
|
50
|
-
|
|
99
|
+
const aliases = new Map([
|
|
100
|
+
['q3', 'qwen3tts'],
|
|
101
|
+
['qwen3', 'qwen3tts'],
|
|
102
|
+
['qwen3-tts', 'qwen3tts'],
|
|
103
|
+
['qtts', 'qwen3tts'],
|
|
104
|
+
['qwen3-mlx', 'mlxaudio'],
|
|
105
|
+
['mlx', 'mlxaudio'],
|
|
106
|
+
['mlx-audio', 'mlxaudio'],
|
|
107
|
+
['neutts', 'neuttsair'],
|
|
108
|
+
['neutts-air', 'neuttsair'],
|
|
109
|
+
['neutts air', 'neuttsair'],
|
|
110
|
+
['neuttsair', 'neuttsair'],
|
|
111
|
+
['neu-tts-air', 'neuttsair'],
|
|
112
|
+
['neu tts air', 'neuttsair'],
|
|
113
|
+
['firered', 'fireredtts2'],
|
|
114
|
+
['fireredtts', 'fireredtts2'],
|
|
115
|
+
['firered-tts-2', 'fireredtts2'],
|
|
116
|
+
['fireredtts-2', 'fireredtts2'],
|
|
117
|
+
['moss', 'mossttsnano'],
|
|
118
|
+
['moss-tts', 'mossttsnano'],
|
|
119
|
+
['mossnano', 'mossttsnano'],
|
|
120
|
+
['moss-tts-nano', 'mossttsnano'],
|
|
121
|
+
['openmoss', 'mossttsnano'],
|
|
122
|
+
['moss-mlx', 'mossttsnano_mlx'],
|
|
123
|
+
['moss mlx', 'mossttsnano_mlx'],
|
|
124
|
+
['mossttsnano-mlx', 'mossttsnano_mlx'],
|
|
125
|
+
['mossttsnano_mlx', 'mossttsnano_mlx'],
|
|
126
|
+
['openmoss-mlx', 'mossttsnano_mlx'],
|
|
127
|
+
]);
|
|
128
|
+
const normalized = aliases.get(key) || key;
|
|
129
|
+
return config.backends?.[normalized] ? normalized : 'edge';
|
|
51
130
|
}
|
|
52
131
|
|
|
53
132
|
function normalizeVoiceType(backendConfig, requested) {
|
|
@@ -75,7 +154,10 @@ export function effectiveTtsVoiceSelection(config, env = {}) {
|
|
|
75
154
|
const merged = config || defaultTtsVoiceConfig();
|
|
76
155
|
const backend = normalizeBackend(env.TTS_BACKEND || merged.currentBackend, merged);
|
|
77
156
|
const backendConfig = merged.backends[backend] || merged.backends.edge;
|
|
78
|
-
const
|
|
157
|
+
const requestedVoiceType = env.TTS_VOICE_TYPE || merged.currentVoiceType;
|
|
158
|
+
const voiceType = backendConfig.voices?.[requestedVoiceType]
|
|
159
|
+
? requestedVoiceType
|
|
160
|
+
: normalizeVoiceType(backendConfig, backendConfig.currentVoiceType);
|
|
79
161
|
const voice = backendConfig.voices[voiceType];
|
|
80
162
|
return { backend, voiceType, voice, backendConfig };
|
|
81
163
|
}
|
|
@@ -83,6 +165,41 @@ export function effectiveTtsVoiceSelection(config, env = {}) {
|
|
|
83
165
|
export function applyTtsVoiceSelectionToEnv(env = {}, selection) {
|
|
84
166
|
const next = { ...env, TTS_BACKEND: selection.backend, TTS_VOICE_TYPE: selection.voiceType };
|
|
85
167
|
if (selection.backend === 'edge') next.TTS_VOICE = selection.voice.voice;
|
|
168
|
+
if (selection.backend === 'qwen3tts') {
|
|
169
|
+
if (selection.voiceType === 'cloned_reference') {
|
|
170
|
+
next.QWEN3TTS_MODE = 'clone';
|
|
171
|
+
next.QWEN3TTS_REF_AUDIO = selection.voice.voice;
|
|
172
|
+
} else if (selection.voiceType === 'designed_speaker') {
|
|
173
|
+
next.QWEN3TTS_MODE = 'design';
|
|
174
|
+
next.QWEN3TTS_INSTRUCT = selection.voice.voice;
|
|
175
|
+
} else {
|
|
176
|
+
next.QWEN3TTS_MODE = 'custom';
|
|
177
|
+
next.QWEN3TTS_SPEAKER = selection.voice.voice;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
if (selection.backend === 'mlxaudio') {
|
|
181
|
+
if (selection.voice?.voice) next.MLXAUDIO_VOICE = selection.voice.voice;
|
|
182
|
+
}
|
|
183
|
+
if (selection.backend === 'neuttsair') {
|
|
184
|
+
if (selection.voice?.voice) next.NEUTTSAIR_REF_AUDIO = selection.voice.voice;
|
|
185
|
+
}
|
|
186
|
+
if (selection.backend === 'fireredtts2') {
|
|
187
|
+
if (selection.voice?.voice) next.FIREREDTTS2_PROMPT_AUDIO = selection.voice.voice;
|
|
188
|
+
}
|
|
189
|
+
if (selection.backend === 'mossttsnano') {
|
|
190
|
+
if (selection.voiceType === 'continuation') next.MOSSTTSNANO_MODE = 'continuation';
|
|
191
|
+
else {
|
|
192
|
+
next.MOSSTTSNANO_MODE = 'voice_clone';
|
|
193
|
+
if (selection.voice?.voice) next.MOSSTTSNANO_PROMPT_AUDIO = selection.voice.voice;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
if (selection.backend === 'mossttsnano_mlx') {
|
|
197
|
+
if (selection.voiceType === 'continuation') next.MOSSTTSNANO_MODE = 'continuation';
|
|
198
|
+
else {
|
|
199
|
+
next.MOSSTTSNANO_MODE = 'voice_clone';
|
|
200
|
+
if (selection.voice?.voice) next.MOSSTTSNANO_PROMPT_AUDIO = selection.voice.voice;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
86
203
|
if (selection.voice?.language) next.VOICE_LANGUAGE = selection.voice.language;
|
|
87
204
|
return next;
|
|
88
205
|
}
|
|
@@ -113,6 +230,23 @@ export function voiceCommandFromTranscript(text) {
|
|
|
113
230
|
const raw = String(text || '').trim();
|
|
114
231
|
if (!raw) return null;
|
|
115
232
|
const compact = raw.toLowerCase().replace(/\s+/g, '');
|
|
233
|
+
const looksLikeBackend = /\b(tts|voice|speech|audio)\b.*\bbackend\b|\bbackend\b.*\b(tts|voice|speech|audio)\b/i.test(raw)
|
|
234
|
+
|| /(tts|음성|목소리).*(백엔드|백앤드|backend).*(바꿔|변경|설정|해줘|로)/iu.test(raw)
|
|
235
|
+
|| /(백엔드|백앤드|backend).*(옴니보이스|오픈보이스|엣지|수퍼토닉|슈퍼토닉|스피치스위프트|큐원|큐웬|qwen|q3|qtts|firered|moss|openmoss|neutts|neu\s*tts|뉴티티에스|뉴티TS|speechswift|omnivoice|openvoice|edge|supertonic)/iu.test(raw)
|
|
236
|
+
|| /tts를.*(옴니보이스|오픈보이스|엣지|수퍼토닉|슈퍼토닉|스피치스위프트|큐원|큐웬|qwen|q3|qtts|firered|moss|openmoss|neutts|neu\s*tts|뉴티티에스|뉴티TS|omnivoice|openvoice|edge|supertonic|speechswift).*바꿔/iu.test(raw);
|
|
237
|
+
if (looksLikeBackend) {
|
|
238
|
+
if (/(neutts\s*-?\s*air|neu\s*tts\s*-?\s*air|neuttsair|뉴\s*티\s*티\s*에스\s*에어|뉴티티에스\s*에어|뉴티TS\s*에어)/iu.test(raw)) return { backend: 'neuttsair' };
|
|
239
|
+
if (/(omnivoice|omni voice|옴니보이스|업니보이스|옴니|업니)/iu.test(raw)) return { backend: 'omnivoice' };
|
|
240
|
+
if (/(openvoice|open voice|오픈보이스|오픈 보이스)/iu.test(raw)) return { backend: 'openvoice' };
|
|
241
|
+
if (/(speechswift|speech swift|스피치스위프트|스피치 스위프트|cosyvoice|코지보이스)/iu.test(raw)) return { backend: 'speechswift' };
|
|
242
|
+
if (/(qwen3mlx|qwen mlx|qwen3 mlx|mlx-audio|mlx audio|엠엘엑스|mlx)/iu.test(raw)) return { backend: 'mlxaudio' };
|
|
243
|
+
if (/(neutts-air|neuttsair|neutts|neu tts air|뉴티티에스|뉴티츠|뉴티에스)/iu.test(raw)) return { backend: 'neuttsair' };
|
|
244
|
+
if (/(qwen3|qwen|q3|qtts|큐원|큐웬|큐엔|큐3|큐삼)/iu.test(raw)) return { backend: 'qwen3tts' };
|
|
245
|
+
if (/(fireredtts2|fireredtts|firered|fire red|파이어레드)/iu.test(raw)) return { backend: 'fireredtts2' };
|
|
246
|
+
if (/(moss-tts-nano|moss tts nano|mossnano|moss|openmoss|모스|오픈모스)/iu.test(raw)) return { backend: 'mossttsnano' };
|
|
247
|
+
if (/(supertonic|수퍼토닉|슈퍼토닉)/iu.test(raw)) return { backend: 'supertonic' };
|
|
248
|
+
if (/(edge|엣지)/iu.test(raw)) return { backend: 'edge' };
|
|
249
|
+
}
|
|
116
250
|
const looksLikeVoice = /\b(change|switch|set)\b.*\b(voice|speaker)\b/i.test(raw)
|
|
117
251
|
|| /\b(voice|speaker)\b.*\b(to|as)\b/i.test(raw)
|
|
118
252
|
|| /(목소리|음성).*(바꿔|변경|설정|해줘)|목소리.*로|음성.*로/u.test(compact);
|
|
@@ -15,6 +15,11 @@ import {
|
|
|
15
15
|
writeTtsVoiceConfig,
|
|
16
16
|
} from './tts_voice_config.mjs';
|
|
17
17
|
|
|
18
|
+
const __tempRoots = [];
|
|
19
|
+
test.after(() => {
|
|
20
|
+
for (const root of __tempRoots) try { fs.rmSync(root, { recursive: true, force: true }); } catch {}
|
|
21
|
+
});
|
|
22
|
+
|
|
18
23
|
test('effectiveTtsVoiceSelection reads backend and voice type from config', () => {
|
|
19
24
|
const config = defaultTtsVoiceConfig();
|
|
20
25
|
config.currentBackend = 'edge';
|
|
@@ -27,6 +32,26 @@ test('effectiveTtsVoiceSelection reads backend and voice type from config', () =
|
|
|
27
32
|
assert.equal(selected.voice.voice, 'ko-KR-SunHiNeural');
|
|
28
33
|
});
|
|
29
34
|
|
|
35
|
+
test('effectiveTtsVoiceSelection falls back to backend voice when env voice type belongs to another backend', () => {
|
|
36
|
+
const config = defaultTtsVoiceConfig();
|
|
37
|
+
config.currentBackend = 'edge';
|
|
38
|
+
config.currentVoiceType = 'korean_male';
|
|
39
|
+
config.backends.edge.currentVoiceType = 'korean_male';
|
|
40
|
+
|
|
41
|
+
const selected = effectiveTtsVoiceSelection(config, { TTS_BACKEND: 'edge', TTS_VOICE_TYPE: 'cloned_reference' });
|
|
42
|
+
|
|
43
|
+
assert.equal(selected.backend, 'edge');
|
|
44
|
+
assert.equal(selected.voiceType, 'korean_male');
|
|
45
|
+
assert.equal(selected.voice.voice, 'ko-KR-InJoonNeural');
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
test('effectiveTtsVoiceSelection accepts Qwen3 backend aliases from env', () => {
|
|
49
|
+
const selected = effectiveTtsVoiceSelection(defaultTtsVoiceConfig(), { TTS_BACKEND: 'qwen3' });
|
|
50
|
+
|
|
51
|
+
assert.equal(selected.backend, 'qwen3tts');
|
|
52
|
+
assert.equal(selected.voiceType, 'korean_preset');
|
|
53
|
+
});
|
|
54
|
+
|
|
30
55
|
test('applyTtsVoiceSelectionToEnv updates backend voice and voice language together', () => {
|
|
31
56
|
const selected = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { voiceType: 'korean_male' }), {});
|
|
32
57
|
|
|
@@ -52,8 +77,77 @@ test('voiceCommandFromTranscript detects voice type changes', () => {
|
|
|
52
77
|
assert.equal(voiceCommandFromTranscript('change language to Korean'), null);
|
|
53
78
|
});
|
|
54
79
|
|
|
80
|
+
test('voiceCommandFromTranscript detects TTS backend changes', () => {
|
|
81
|
+
assert.deepEqual(voiceCommandFromTranscript('change TTS backend to OmniVoice'), { backend: 'omnivoice' });
|
|
82
|
+
assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 옴니보이스로 바꿔'), { backend: 'omnivoice' });
|
|
83
|
+
assert.deepEqual(voiceCommandFromTranscript('TTS를 Edge로 바꿔'), { backend: 'edge' });
|
|
84
|
+
assert.deepEqual(voiceCommandFromTranscript('TTS를 qwen3로 바꿔'), { backend: 'qwen3tts' });
|
|
85
|
+
assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 큐웬으로 바꿔'), { backend: 'qwen3tts' });
|
|
86
|
+
assert.deepEqual(voiceCommandFromTranscript('TTS backend to FireRedTTS-2'), { backend: 'fireredtts2' });
|
|
87
|
+
assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 모스로 바꿔'), { backend: 'mossttsnano' });
|
|
88
|
+
assert.deepEqual(voiceCommandFromTranscript('TTS backend to NeuTTS Air'), { backend: 'neuttsair' });
|
|
89
|
+
assert.deepEqual(voiceCommandFromTranscript('음성 백엔드 뉴티티에스 에어로 바꿔'), { backend: 'neuttsair' });
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
test('applyTtsVoiceSelectionToEnv maps Qwen3 voice types to CLI mode env', () => {
|
|
93
|
+
const preset = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'qwen3tts', voiceType: 'korean_preset' }), {});
|
|
94
|
+
assert.deepEqual(applyTtsVoiceSelectionToEnv({}, preset), {
|
|
95
|
+
TTS_BACKEND: 'qwen3tts',
|
|
96
|
+
TTS_VOICE_TYPE: 'korean_preset',
|
|
97
|
+
QWEN3TTS_MODE: 'custom',
|
|
98
|
+
QWEN3TTS_SPEAKER: 'sohee',
|
|
99
|
+
VOICE_LANGUAGE: 'ko',
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
const clone = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'qwen3tts', voiceType: 'cloned_reference' }), {});
|
|
103
|
+
assert.deepEqual(applyTtsVoiceSelectionToEnv({}, clone), {
|
|
104
|
+
TTS_BACKEND: 'qwen3tts',
|
|
105
|
+
TTS_VOICE_TYPE: 'cloned_reference',
|
|
106
|
+
QWEN3TTS_MODE: 'clone',
|
|
107
|
+
QWEN3TTS_REF_AUDIO: 'voice-samples/user-reference.wav',
|
|
108
|
+
VOICE_LANGUAGE: 'ko',
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test('applyTtsVoiceSelectionToEnv maps FireRedTTS-2 and MOSS prompt references', () => {
|
|
113
|
+
const fire = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'fireredtts2', voiceType: 'prompt_reference' }), {});
|
|
114
|
+
assert.deepEqual(applyTtsVoiceSelectionToEnv({}, fire), {
|
|
115
|
+
TTS_BACKEND: 'fireredtts2',
|
|
116
|
+
TTS_VOICE_TYPE: 'prompt_reference',
|
|
117
|
+
FIREREDTTS2_PROMPT_AUDIO: 'voice-samples/user-reference.wav',
|
|
118
|
+
VOICE_LANGUAGE: 'ko',
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
const moss = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'mossttsnano', voiceType: 'prompt_reference' }), {});
|
|
122
|
+
assert.deepEqual(applyTtsVoiceSelectionToEnv({}, moss), {
|
|
123
|
+
TTS_BACKEND: 'mossttsnano',
|
|
124
|
+
TTS_VOICE_TYPE: 'prompt_reference',
|
|
125
|
+
MOSSTTSNANO_MODE: 'voice_clone',
|
|
126
|
+
MOSSTTSNANO_PROMPT_AUDIO: 'voice-samples/user-reference.wav',
|
|
127
|
+
VOICE_LANGUAGE: 'ko',
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
const neutts = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'neuttsair', voiceType: 'cloned_reference' }), {});
|
|
131
|
+
assert.deepEqual(applyTtsVoiceSelectionToEnv({}, neutts), {
|
|
132
|
+
TTS_BACKEND: 'neuttsair',
|
|
133
|
+
TTS_VOICE_TYPE: 'cloned_reference',
|
|
134
|
+
NEUTTSAIR_REF_AUDIO: 'voice-samples/user-reference.wav',
|
|
135
|
+
VOICE_LANGUAGE: 'en',
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
test('updateTtsVoiceConfig can switch to OmniVoice backend default voice', () => {
|
|
140
|
+
const config = updateTtsVoiceConfig(defaultTtsVoiceConfig(), { backend: 'omnivoice' });
|
|
141
|
+
const selected = effectiveTtsVoiceSelection(config, {});
|
|
142
|
+
|
|
143
|
+
assert.equal(selected.backend, 'omnivoice');
|
|
144
|
+
assert.equal(selected.voiceType, 'cloned_reference');
|
|
145
|
+
assert.equal(selected.voice.voice, 'voice-samples/user-reference.wav');
|
|
146
|
+
});
|
|
147
|
+
|
|
55
148
|
test('read and write voice config round trips current selection', () => {
|
|
56
149
|
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'voice-config-test-'));
|
|
150
|
+
__tempRoots.push(dir);
|
|
57
151
|
const file = path.join(dir, 'tts-voices.json');
|
|
58
152
|
const config = updateTtsVoiceConfig(defaultTtsVoiceConfig(), { voiceType: 'korean_female' });
|
|
59
153
|
|