verbalcoding 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/.env.example +83 -0
  2. package/LICENSE +21 -0
  3. package/README.md +157 -0
  4. package/app-node/agent_adapters.mjs +576 -0
  5. package/app-node/agent_adapters.test.mjs +455 -0
  6. package/app-node/agent_contract.mjs +45 -0
  7. package/app-node/barge_in.mjs +148 -0
  8. package/app-node/barge_in.test.mjs +179 -0
  9. package/app-node/bridge_logger.mjs +66 -0
  10. package/app-node/bridge_logger.test.mjs +73 -0
  11. package/app-node/bridge_state.mjs +104 -0
  12. package/app-node/bridge_state.test.mjs +64 -0
  13. package/app-node/cli_install.test.mjs +97 -0
  14. package/app-node/deferred_queue.mjs +12 -0
  15. package/app-node/deferred_queue.test.mjs +20 -0
  16. package/app-node/discord_invite_cli.test.mjs +31 -0
  17. package/app-node/discord_text.mjs +29 -0
  18. package/app-node/discord_text.test.mjs +32 -0
  19. package/app-node/hermes_profiles.mjs +164 -0
  20. package/app-node/hermes_profiles.test.mjs +276 -0
  21. package/app-node/install_config.mjs +263 -0
  22. package/app-node/install_config.test.mjs +205 -0
  23. package/app-node/instance_doctor.mjs +137 -0
  24. package/app-node/instance_doctor.test.mjs +128 -0
  25. package/app-node/instance_profile_lifecycle.mjs +16 -0
  26. package/app-node/instances.mjs +153 -0
  27. package/app-node/instances.test.mjs +102 -0
  28. package/app-node/language_config.mjs +73 -0
  29. package/app-node/language_config.test.mjs +51 -0
  30. package/app-node/latency_metrics.mjs +133 -0
  31. package/app-node/latency_metrics.test.mjs +71 -0
  32. package/app-node/main.mjs +1771 -0
  33. package/app-node/mcp_tools.mjs +198 -0
  34. package/app-node/mcp_tools.test.mjs +39 -0
  35. package/app-node/progress_cache.mjs +7 -0
  36. package/app-node/progress_cache.test.mjs +23 -0
  37. package/app-node/progress_speech.mjs +102 -0
  38. package/app-node/progress_speech.test.mjs +48 -0
  39. package/app-node/project_sessions.mjs +148 -0
  40. package/app-node/project_sessions.test.mjs +77 -0
  41. package/app-node/restart_notice.mjs +57 -0
  42. package/app-node/restart_notice.test.mjs +37 -0
  43. package/app-node/restart_policy.mjs +27 -0
  44. package/app-node/restart_policy.test.mjs +33 -0
  45. package/app-node/text_routing.mjs +8 -0
  46. package/app-node/text_routing.test.mjs +18 -0
  47. package/app-node/tts_backends.mjs +251 -0
  48. package/app-node/tts_backends.test.mjs +400 -0
  49. package/app-node/tts_chunks.mjs +57 -0
  50. package/app-node/tts_chunks.test.mjs +35 -0
  51. package/app-node/tts_prefetch.mjs +38 -0
  52. package/app-node/tts_prefetch.test.mjs +49 -0
  53. package/app-node/tts_settings.mjs +72 -0
  54. package/app-node/tts_settings.test.mjs +127 -0
  55. package/app-node/tts_voice_config.mjs +127 -0
  56. package/app-node/tts_voice_config.test.mjs +64 -0
  57. package/app-node/voice_clone_capture.mjs +76 -0
  58. package/app-node/voice_clone_capture.test.mjs +51 -0
  59. package/app-node/voice_messages.mjs +62 -0
  60. package/app-node/voice_messages.test.mjs +33 -0
  61. package/docs/CONFIGURATION.md +183 -0
  62. package/docs/FRESH_INSTALL.md +193 -0
  63. package/docs/MULTI_INSTANCE.md +183 -0
  64. package/docs/RELEASE.md +72 -0
  65. package/docs/USAGE.md +108 -0
  66. package/docs/assets/figures/verbalcoding-flow.svg +63 -0
  67. package/docs/i18n/README.es.md +121 -0
  68. package/docs/i18n/README.fr.md +121 -0
  69. package/docs/i18n/README.ja.md +121 -0
  70. package/docs/i18n/README.ko.md +121 -0
  71. package/docs/i18n/README.ru.md +121 -0
  72. package/docs/i18n/README.zh.md +121 -0
  73. package/package.json +58 -0
  74. package/run.sh +82 -0
  75. package/scripts/bootstrap_prereqs.sh +193 -0
  76. package/scripts/cli.mjs +369 -0
  77. package/scripts/docker_ubuntu_smoke.sh +76 -0
  78. package/scripts/doctor.mjs +134 -0
  79. package/scripts/install.mjs +108 -0
  80. package/scripts/install.sh +44 -0
  81. package/scripts/mcp-server.mjs +84 -0
  82. package/scripts/openvoice_smoke.py +34 -0
  83. package/scripts/openvoice_synth.py +103 -0
  84. package/scripts/setup_openvoice.sh +34 -0
  85. package/scripts/setup_supertonic.sh +18 -0
@@ -0,0 +1,400 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import path from 'node:path';
4
+
5
+ import { createTtsBackend } from './tts_backends.mjs';
6
+
7
+ function baseSettings() {
8
+ return {
9
+ backend: 'edge',
10
+ edge: { voice: 'ko-KR-InJoonNeural', rate: '+10%' },
11
+ openvoice: {
12
+ dir: '/project/vendor/OpenVoice',
13
+ venv: '/project/.venv-openvoice',
14
+ refAudio: '/project/voice-samples/me.wav',
15
+ language: 'KR',
16
+ style: 'default',
17
+ timeoutMs: 90000,
18
+ useForProgress: false,
19
+ },
20
+ speechswift: {
21
+ command: 'audio',
22
+ engine: 'cosyvoice',
23
+ language: 'korean',
24
+ refAudio: '/project/voice-samples/me.wav',
25
+ modelId: 'aufklarer/CosyVoice3-0.5B-MLX-4bit',
26
+ model: 'base',
27
+ speaker: '',
28
+ instruct: '',
29
+ timeoutMs: 120000,
30
+ stream: true,
31
+ useForProgress: false,
32
+ mode: 'cli',
33
+ serverUrl: 'http://127.0.0.1:18080',
34
+ },
35
+ supertonic: {
36
+ command: 'supertonic',
37
+ voice: 'M1',
38
+ language: 'ko',
39
+ steps: 2,
40
+ speed: 1.08,
41
+ maxChunkLength: 300,
42
+ silenceDuration: 0.15,
43
+ customStylePath: '',
44
+ timeoutMs: 60000,
45
+ useForProgress: false,
46
+ cacheDir: '',
47
+ intraOpThreads: '',
48
+ interOpThreads: '',
49
+ },
50
+ };
51
+ }
52
+
53
+ test('Edge backend calls edge-tts with voice, rate, text, and output path', async () => {
54
+ const calls = [];
55
+ const backend = createTtsBackend(baseSettings(), {
56
+ tmpdir: '/tmp',
57
+ existsSync: () => true,
58
+ statSync: () => ({ size: 123 }),
59
+ execFileAsync: async (cmd, args, options) => {
60
+ calls.push({ cmd, args, options });
61
+ },
62
+ });
63
+
64
+ const out = await backend.synthesize('안녕하세요', { kind: 'final' });
65
+
66
+ assert.equal(calls[0].cmd, 'edge-tts');
67
+ assert.deepEqual(calls[0].args.slice(0, 5), ['-v', 'ko-KR-InJoonNeural', '--rate', '+10%', '-t']);
68
+ assert.equal(calls[0].args[5], '안녕하세요');
69
+ assert.equal(calls[0].args[6], '--write-media');
70
+ assert.match(out, /^\/tmp\/verbalcoding-edge-/);
71
+ assert.equal(calls[0].options.timeout, 60000);
72
+ assert.deepEqual(backend.cacheKeyParts(), ['edge', 'ko-KR-InJoonNeural', '+10%']);
73
+ });
74
+
75
+ test('Edge backend reads dynamic voice before each TTS request', async () => {
76
+ const calls = [];
77
+ const backend = createTtsBackend(baseSettings(), {
78
+ tmpdir: '/tmp',
79
+ existsSync: () => true,
80
+ statSync: () => ({ size: 123 }),
81
+ voiceProvider: () => 'en-US-GuyNeural',
82
+ execFileAsync: async (cmd, args, options) => {
83
+ calls.push({ cmd, args, options });
84
+ },
85
+ });
86
+
87
+ await backend.synthesize('hello', { kind: 'final' });
88
+
89
+ assert.deepEqual(calls[0].args.slice(0, 2), ['-v', 'en-US-GuyNeural']);
90
+ assert.deepEqual(backend.cacheKeyParts(), ['edge', 'en-US-GuyNeural', '+10%']);
91
+ });
92
+
93
+ test('Edge backend honors configurable command path', async () => {
94
+ const calls = [];
95
+ const settings = baseSettings();
96
+ settings.edge.command = '/project/.venv/bin/edge-tts';
97
+ const backend = createTtsBackend(settings, {
98
+ tmpdir: '/tmp',
99
+ existsSync: () => true,
100
+ statSync: () => ({ size: 123 }),
101
+ execFileAsync: async (cmd, args, options) => {
102
+ calls.push({ cmd, args, options });
103
+ },
104
+ });
105
+
106
+ await backend.synthesize('안녕하세요', { kind: 'final' });
107
+
108
+ assert.equal(calls[0].cmd, '/project/.venv/bin/edge-tts');
109
+ });
110
+
111
+ test('OpenVoice final synthesis calls Python wrapper with reference audio and output path', async () => {
112
+ const calls = [];
113
+ const settings = { ...baseSettings(), backend: 'openvoice' };
114
+ const backend = createTtsBackend(settings, {
115
+ tmpdir: '/tmp',
116
+ existsSync: file => file.endsWith('python') || file.endsWith('.wav'),
117
+ statSync: () => ({ size: 999 }),
118
+ execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
119
+ });
120
+
121
+ const out = await backend.synthesize('복제 음성 테스트', { kind: 'final' });
122
+
123
+ assert.equal(calls[0].cmd, path.join('/project/.venv-openvoice', 'bin', 'python'));
124
+ assert.ok(calls[0].args.some(arg => String(arg).endsWith('scripts/openvoice_synth.py')));
125
+ assert.ok(calls[0].args.includes('--ref-audio'));
126
+ assert.ok(calls[0].args.includes('/project/voice-samples/me.wav'));
127
+ assert.ok(calls[0].args.includes('--text'));
128
+ assert.ok(calls[0].args.includes('복제 음성 테스트'));
129
+ assert.equal(calls[0].options.timeout, 90000);
130
+ assert.match(out, /^\/tmp\/verbalcoding-openvoice-/);
131
+ assert.deepEqual(backend.cacheKeyParts(), ['openvoice', '/project/voice-samples/me.wav', 'KR', 'default']);
132
+ });
133
+
134
+ test('OpenVoice progress uses Edge fallback unless explicitly enabled', async () => {
135
+ const calls = [];
136
+ const settings = { ...baseSettings(), backend: 'openvoice' };
137
+ const backend = createTtsBackend(settings, {
138
+ tmpdir: '/tmp',
139
+ existsSync: () => true,
140
+ statSync: () => ({ size: 123 }),
141
+ execFileAsync: async (cmd, args) => calls.push({ cmd, args }),
142
+ });
143
+
144
+ await backend.synthesize('파일 읽기', { kind: 'progress' });
145
+
146
+ assert.equal(calls[0].cmd, 'edge-tts');
147
+ });
148
+
149
+ test('OpenVoice final synthesis falls back to Edge when wrapper fails', async () => {
150
+ const calls = [];
151
+ const settings = { ...baseSettings(), backend: 'openvoice' };
152
+ const backend = createTtsBackend(settings, {
153
+ tmpdir: '/tmp',
154
+ existsSync: () => true,
155
+ statSync: () => ({ size: 123 }),
156
+ warn: (...args) => calls.push({ warn: args.join(' ') }),
157
+ execFileAsync: async (cmd, args) => {
158
+ calls.push({ cmd, args });
159
+ if (cmd.includes('.venv-openvoice')) throw new Error('openvoice missing');
160
+ },
161
+ });
162
+
163
+ await backend.synthesize('fallback', { kind: 'final' });
164
+
165
+ assert.ok(calls.some(call => call.cmd?.includes('.venv-openvoice')));
166
+ assert.ok(calls.some(call => call.cmd === 'edge-tts'));
167
+ assert.ok(calls.some(call => /falling back to edge/i.test(call.warn || '')));
168
+ });
169
+
170
+
171
+ test('OpenVoice backend falls back to python3 when configured venv python is missing', async () => {
172
+ const calls = [];
173
+ const settings = { ...baseSettings(), backend: 'openvoice' };
174
+ const backend = createTtsBackend(settings, {
175
+ tmpdir: '/tmp',
176
+ existsSync: file => file.endsWith('.wav'),
177
+ statSync: () => ({ size: 999 }),
178
+ execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
179
+ });
180
+
181
+ await backend.synthesize('복제 음성 테스트', { kind: 'final' });
182
+
183
+ assert.equal(calls[0].cmd, 'python3');
184
+ });
185
+
186
+ test('SpeechSwift CosyVoice backend calls audio CLI with reference sample and output path', async () => {
187
+ const calls = [];
188
+ const settings = { ...baseSettings(), backend: 'speechswift' };
189
+ const backend = createTtsBackend(settings, {
190
+ tmpdir: '/tmp',
191
+ existsSync: () => true,
192
+ statSync: () => ({ size: 999 }),
193
+ execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
194
+ });
195
+
196
+ const out = await backend.synthesize('복제 음성 테스트', { kind: 'final' });
197
+
198
+ assert.equal(calls[0].cmd, 'audio');
199
+ assert.deepEqual(calls[0].args.slice(0, 4), ['speak', '복제 음성 테스트', '--engine', 'cosyvoice']);
200
+ assert.ok(calls[0].args.includes('--voice-sample'));
201
+ assert.ok(calls[0].args.includes('/project/voice-samples/me.wav'));
202
+ assert.ok(calls[0].args.includes('--stream'));
203
+ assert.ok(calls[0].args.includes('--model-id'));
204
+ assert.equal(calls[0].options.timeout, 120000);
205
+ assert.match(out, /^\/tmp\/verbalcoding-speechswift-/);
206
+ assert.deepEqual(backend.cacheKeyParts(), ['speechswift', 'cli', 'http://127.0.0.1:18080', 'cosyvoice', '/project/voice-samples/me.wav', 'korean', 'aufklarer/CosyVoice3-0.5B-MLX-4bit', 'base', '', '']);
207
+ });
208
+
209
+ test('SpeechSwift progress uses Edge fallback unless explicitly enabled', async () => {
210
+ const calls = [];
211
+ const settings = { ...baseSettings(), backend: 'speechswift' };
212
+ const backend = createTtsBackend(settings, {
213
+ tmpdir: '/tmp',
214
+ existsSync: () => true,
215
+ statSync: () => ({ size: 123 }),
216
+ execFileAsync: async (cmd, args) => calls.push({ cmd, args }),
217
+ });
218
+
219
+ await backend.synthesize('진행 안내', { kind: 'progress' });
220
+
221
+ assert.equal(calls[0].cmd, 'edge-tts');
222
+ });
223
+
224
+ test('SpeechSwift falls back to Edge when audio CLI fails', async () => {
225
+ const calls = [];
226
+ const settings = { ...baseSettings(), backend: 'speechswift' };
227
+ const backend = createTtsBackend(settings, {
228
+ tmpdir: '/tmp',
229
+ existsSync: () => true,
230
+ statSync: () => ({ size: 123 }),
231
+ warn: (...args) => calls.push({ warn: args.join(' ') }),
232
+ execFileAsync: async (cmd, args) => {
233
+ calls.push({ cmd, args });
234
+ if (cmd === 'audio') throw new Error('speech-swift missing');
235
+ },
236
+ });
237
+
238
+ await backend.synthesize('fallback', { kind: 'final' });
239
+
240
+ assert.ok(calls.some(call => call.cmd === 'audio'));
241
+ assert.ok(calls.some(call => call.cmd === 'edge-tts'));
242
+ assert.ok(calls.some(call => /speech-swift failed; falling back to edge/i.test(call.warn || '')));
243
+ });
244
+
245
+ test('SpeechSwift server mode passes an AbortSignal timeout to audio-server fetch', async () => {
246
+ const calls = [];
247
+ const settings = { ...baseSettings(), backend: 'speechswift', speechswift: { ...baseSettings().speechswift, mode: 'server', serverUrl: 'http://127.0.0.1:18080', timeoutMs: 50 } };
248
+ const backend = createTtsBackend(settings, {
249
+ tmpdir: '/tmp',
250
+ existsSync: () => true,
251
+ statSync: () => ({ size: 999 }),
252
+ writeFileAsync: async () => {},
253
+ execFileAsync: async () => {},
254
+ fetch: async (url, options) => {
255
+ calls.push({ url, options });
256
+ return {
257
+ ok: true,
258
+ status: 200,
259
+ statusText: 'OK',
260
+ arrayBuffer: async () => new Uint8Array([1, 2, 3, 4]).buffer,
261
+ };
262
+ },
263
+ });
264
+
265
+ await backend.synthesize('서버 음성 테스트', { kind: 'final' });
266
+
267
+ assert.equal(calls[0].url, 'http://127.0.0.1:18080/speak');
268
+ assert.ok(calls[0].options.signal instanceof AbortSignal);
269
+ assert.equal(calls[0].options.signal.aborted, false);
270
+ });
271
+
272
+ test('SpeechSwift server mode posts to audio-server and writes returned WAV', async () => {
273
+ const calls = [];
274
+ const settings = { ...baseSettings(), backend: 'speechswift', speechswift: { ...baseSettings().speechswift, mode: 'server', serverUrl: 'http://127.0.0.1:18080' } };
275
+ const backend = createTtsBackend(settings, {
276
+ tmpdir: '/tmp',
277
+ existsSync: () => true,
278
+ statSync: () => ({ size: 999 }),
279
+ writeFileAsync: async (file, bytes) => calls.push({ write: file, bytes: bytes.length }),
280
+ execFileAsync: async (cmd, args) => calls.push({ cmd, args }),
281
+ fetch: async (url, options) => {
282
+ calls.push({ url, options });
283
+ return {
284
+ ok: true,
285
+ status: 200,
286
+ statusText: 'OK',
287
+ arrayBuffer: async () => new Uint8Array([1, 2, 3, 4]).buffer,
288
+ };
289
+ },
290
+ });
291
+
292
+ const out = await backend.synthesize('서버 음성 테스트', { kind: 'final' });
293
+
294
+ assert.equal(calls[0].url, 'http://127.0.0.1:18080/speak');
295
+ assert.equal(calls[0].options.method, 'POST');
296
+ assert.equal(calls[0].options.headers['content-type'], 'application/json');
297
+ assert.deepEqual(JSON.parse(calls[0].options.body), {
298
+ text: '서버 음성 테스트',
299
+ engine: 'cosyvoice',
300
+ language: 'korean',
301
+ });
302
+ assert.match(calls[1].write, /^\/tmp\/verbalcoding-speechswift-server-/);
303
+ assert.equal(calls[1].bytes, 4);
304
+ assert.match(out, /^\/tmp\/verbalcoding-speechswift-server-/);
305
+ });
306
+
307
+ test('SpeechSwift server mode falls back to Edge when audio-server fails', async () => {
308
+ const calls = [];
309
+ const settings = { ...baseSettings(), backend: 'speechswift', speechswift: { ...baseSettings().speechswift, mode: 'server' } };
310
+ const backend = createTtsBackend(settings, {
311
+ tmpdir: '/tmp',
312
+ existsSync: () => true,
313
+ statSync: () => ({ size: 123 }),
314
+ warn: (...args) => calls.push({ warn: args.join(' ') }),
315
+ writeFileAsync: async () => {},
316
+ fetch: async () => ({ ok: false, status: 503, statusText: 'Unavailable', text: async () => 'loading' }),
317
+ execFileAsync: async (cmd, args) => calls.push({ cmd, args }),
318
+ });
319
+
320
+ await backend.synthesize('fallback', { kind: 'final' });
321
+
322
+ assert.ok(calls.some(call => call.cmd === 'edge-tts'));
323
+ assert.ok(calls.some(call => /speech-swift failed; falling back to edge/i.test(call.warn || '')));
324
+ });
325
+
326
+ test('Supertonic backend calls local supertonic CLI with Korean low-latency options', async () => {
327
+ const calls = [];
328
+ const settings = { ...baseSettings(), backend: 'supertonic' };
329
+ const backend = createTtsBackend(settings, {
330
+ tmpdir: '/tmp',
331
+ existsSync: () => true,
332
+ statSync: () => ({ size: 999 }),
333
+ execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
334
+ });
335
+
336
+ const out = await backend.synthesize('수퍼토닉 테스트', { kind: 'final' });
337
+
338
+ assert.equal(calls[0].cmd, 'supertonic');
339
+ assert.deepEqual(calls[0].args.slice(0, 6), ['tts', '수퍼토닉 테스트', '-o', calls[0].args[3], '--lang', 'ko']);
340
+ assert.ok(calls[0].args.includes('--voice'));
341
+ assert.ok(calls[0].args.includes('M1'));
342
+ assert.ok(calls[0].args.includes('--steps'));
343
+ assert.ok(calls[0].args.includes('2'));
344
+ assert.ok(calls[0].args.includes('--speed'));
345
+ assert.ok(calls[0].args.includes('1.08'));
346
+ assert.equal(calls[0].options.timeout, 60000);
347
+ assert.match(out, /^\/tmp\/verbalcoding-supertonic-/);
348
+ assert.deepEqual(backend.cacheKeyParts(), ['supertonic', 'supertonic', 'M1', 'ko', 2, 1.08, 300, 0.15, '']);
349
+ });
350
+
351
+ test('Supertonic progress uses Edge fallback unless explicitly enabled', async () => {
352
+ const calls = [];
353
+ const settings = { ...baseSettings(), backend: 'supertonic' };
354
+ const backend = createTtsBackend(settings, {
355
+ tmpdir: '/tmp',
356
+ existsSync: () => true,
357
+ statSync: () => ({ size: 123 }),
358
+ execFileAsync: async (cmd, args) => calls.push({ cmd, args }),
359
+ });
360
+
361
+ await backend.synthesize('진행 안내', { kind: 'progress' });
362
+
363
+ assert.equal(calls[0].cmd, 'edge-tts');
364
+ });
365
+
366
+ test('Supertonic falls back to Edge when local CLI fails', async () => {
367
+ const calls = [];
368
+ const settings = { ...baseSettings(), backend: 'supertonic' };
369
+ const backend = createTtsBackend(settings, {
370
+ tmpdir: '/tmp',
371
+ existsSync: () => true,
372
+ statSync: () => ({ size: 123 }),
373
+ warn: (...args) => calls.push({ warn: args.join(' ') }),
374
+ execFileAsync: async (cmd, args) => {
375
+ calls.push({ cmd, args });
376
+ if (cmd === 'supertonic') throw new Error('supertonic missing');
377
+ },
378
+ });
379
+
380
+ await backend.synthesize('fallback', { kind: 'final' });
381
+
382
+ assert.ok(calls.some(call => call.cmd === 'supertonic'));
383
+ assert.ok(calls.some(call => call.cmd === 'edge-tts'));
384
+ assert.ok(calls.some(call => /supertonic failed; falling back to edge/i.test(call.warn || '')));
385
+ });
386
+
387
+ test('TTS backends omit signal option when no AbortSignal is provided', async () => {
388
+ const calls = [];
389
+ const backend = createTtsBackend(baseSettings(), {
390
+ tmpdir: '/tmp',
391
+ existsSync: () => true,
392
+ statSync: () => ({ size: 123 }),
393
+ execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
394
+ });
395
+
396
+ await backend.synthesize('신호 없는 음성 테스트', { signal: null, kind: 'final' });
397
+
398
+ assert.equal(calls[0].cmd, 'edge-tts');
399
+ assert.equal(Object.hasOwn(calls[0].options, 'signal'), false);
400
+ });
@@ -0,0 +1,57 @@
1
+ const DEFAULT_MAX_CHARS = 450;
2
+
3
+ function hasSpeakableText(text) {
4
+ return /[\p{L}\p{N}]/u.test(String(text || ''));
5
+ }
6
+
7
+ export function splitForTTS(text, maxChars = DEFAULT_MAX_CHARS) {
8
+ const normalized = String(text || '').replace(/\s+/g, ' ').trim();
9
+ if (!normalized || !hasSpeakableText(normalized)) return [];
10
+ const limit = Math.max(1, Number(maxChars) || DEFAULT_MAX_CHARS);
11
+
12
+ const sentences = normalized.match(/[^.!?。!?…]+[.!?。!?…]*|.+$/gu) || [normalized];
13
+ const chunks = [];
14
+
15
+ for (const rawSentence of sentences) {
16
+ const sentence = rawSentence.trim();
17
+ if (!sentence || !hasSpeakableText(sentence)) continue;
18
+ if (sentence.length <= limit) {
19
+ chunks.push(sentence);
20
+ } else {
21
+ chunks.push(...splitLongSentence(sentence, limit).filter(hasSpeakableText));
22
+ }
23
+ }
24
+ return chunks;
25
+ }
26
+
27
+ function splitLongSentence(sentence, limit) {
28
+ const words = sentence.split(/\s+/).filter(Boolean);
29
+ if (words.length <= 1) return splitByLength(sentence, limit);
30
+
31
+ const chunks = [];
32
+ let current = '';
33
+ for (const word of words) {
34
+ if (word.length > limit) {
35
+ if (current) {
36
+ chunks.push(current);
37
+ current = '';
38
+ }
39
+ chunks.push(...splitByLength(word, limit));
40
+ } else if (!current) {
41
+ current = word;
42
+ } else if ((current + ' ' + word).length <= limit) {
43
+ current += ' ' + word;
44
+ } else {
45
+ chunks.push(current);
46
+ current = word;
47
+ }
48
+ }
49
+ if (current) chunks.push(current);
50
+ return chunks;
51
+ }
52
+
53
+ function splitByLength(text, limit) {
54
+ const chunks = [];
55
+ for (let i = 0; i < text.length; i += limit) chunks.push(text.slice(i, i + limit));
56
+ return chunks;
57
+ }
@@ -0,0 +1,35 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+
4
+ import { splitForTTS } from './tts_chunks.mjs';
5
+
6
+ test('splitForTTS keeps short text as one chunk', () => {
7
+ assert.deepEqual(splitForTTS('짧은 답변이야.', 80), ['짧은 답변이야.']);
8
+ });
9
+
10
+ test('splitForTTS splits long Korean text on sentence boundaries without exceeding the limit', () => {
11
+ const text = '첫 번째 문장은 충분히 길지만 하나의 단위야. 두 번째 문장도 이어서 말해야 해. 세 번째 문장은 너무 길어지면 다음 조각으로 넘어가야 해.';
12
+ const chunks = splitForTTS(text, 45);
13
+
14
+ assert.deepEqual(chunks, [
15
+ '첫 번째 문장은 충분히 길지만 하나의 단위야.',
16
+ '두 번째 문장도 이어서 말해야 해.',
17
+ '세 번째 문장은 너무 길어지면 다음 조각으로 넘어가야 해.',
18
+ ]);
19
+ assert.ok(chunks.every(chunk => chunk.length <= 45));
20
+ });
21
+
22
+ test('splitForTTS falls back to whitespace splitting when a sentence is too long', () => {
23
+ const text = '하나 둘 셋 넷 다섯 여섯 일곱 여덟 아홉 열 열하나 열둘';
24
+ const chunks = splitForTTS(text, 12);
25
+
26
+ assert.ok(chunks.length > 1);
27
+ assert.equal(chunks.join(' '), text);
28
+ assert.ok(chunks.every(chunk => chunk.length <= 12));
29
+ });
30
+
31
+ test('splitForTTS drops punctuation-only chunks that make Edge TTS fail', () => {
32
+ assert.deepEqual(splitForTTS('”.', 80), []);
33
+ assert.deepEqual(splitForTTS('Done. ”.', 80), ['Done.']);
34
+ assert.deepEqual(splitForTTS('응답 완료. ”.', 80), ['응답 완료.']);
35
+ });
@@ -0,0 +1,38 @@
1
+ function isAbortError(e) {
2
+ return e?.name === 'AbortError' || e?.code === 'ABORT_ERR';
3
+ }
4
+
5
+ export async function playChunkedTTSWithPrefetch(chunks, { synth, play, cleanup, signal, log = () => {} }) {
6
+ if (!Array.isArray(chunks) || chunks.length === 0) return;
7
+
8
+ const startSynth = index => Promise.resolve()
9
+ .then(() => {
10
+ if (signal?.aborted) return null;
11
+ log('TTS chunk start', index + 1, '/', chunks.length, 'chars', String(chunks[index] || '').length);
12
+ return synth(chunks[index], index);
13
+ });
14
+
15
+ let currentPromise = startSynth(0);
16
+ for (let index = 0; index < chunks.length; index += 1) {
17
+ if (signal?.aborted) return;
18
+ const file = await currentPromise;
19
+ if (!file) return;
20
+
21
+ const nextPromise = index + 1 < chunks.length ? startSynth(index + 1) : null;
22
+
23
+ if (signal?.aborted) {
24
+ await cleanup?.(file).catch?.(() => {});
25
+ return;
26
+ }
27
+
28
+ try {
29
+ await play(file, index);
30
+ } catch (e) {
31
+ if (isAbortError(e) || signal?.aborted) return;
32
+ throw e;
33
+ }
34
+
35
+ if (signal?.aborted) return;
36
+ currentPromise = nextPromise;
37
+ }
38
+ }
@@ -0,0 +1,49 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+
4
+ import { playChunkedTTSWithPrefetch } from './tts_prefetch.mjs';
5
+
6
+ function deferred() {
7
+ let resolve;
8
+ const promise = new Promise(r => { resolve = r; });
9
+ return { promise, resolve };
10
+ }
11
+
12
+ test('playChunkedTTSWithPrefetch starts synthesizing the next chunk while current chunk is playing', async () => {
13
+ const events = [];
14
+ const firstPlayStarted = deferred();
15
+ const releaseFirstPlay = deferred();
16
+
17
+ async function synth(chunk, index) {
18
+ events.push(`synth-start-${index}:${chunk}`);
19
+ return `mp3-${index}`;
20
+ }
21
+
22
+ async function play(file, index) {
23
+ events.push(`play-start-${index}:${file}`);
24
+ if (index === 0) {
25
+ firstPlayStarted.resolve();
26
+ await releaseFirstPlay.promise;
27
+ }
28
+ events.push(`play-end-${index}:${file}`);
29
+ }
30
+
31
+ const run = playChunkedTTSWithPrefetch(['하나.', '둘.'], { synth, play });
32
+ await firstPlayStarted.promise;
33
+
34
+ assert.ok(events.includes('synth-start-1:둘.'), 'next chunk should begin synthesizing before first playback ends');
35
+ assert.equal(events.includes('play-end-0:mp3-0'), false, 'first playback should still be running');
36
+
37
+ releaseFirstPlay.resolve();
38
+ await run;
39
+ });
40
+
41
+ test('playChunkedTTSWithPrefetch preserves playback order', async () => {
42
+ const played = [];
43
+ await playChunkedTTSWithPrefetch(['하나.', '둘.', '셋.'], {
44
+ synth: async (_chunk, index) => `mp3-${index}`,
45
+ play: async file => { played.push(file); },
46
+ });
47
+
48
+ assert.deepEqual(played, ['mp3-0', 'mp3-1', 'mp3-2']);
49
+ });
@@ -0,0 +1,72 @@
1
+ import path from 'node:path';
2
+
3
+ function boolEnv(value, fallback = false) {
4
+ if (value == null || value === '') return fallback;
5
+ return ['1', 'true', 'yes', 'on'].includes(String(value).toLowerCase());
6
+ }
7
+
8
+ function positiveNumber(value, fallback) {
9
+ const n = Number(value);
10
+ return Number.isFinite(n) && n > 0 ? n : fallback;
11
+ }
12
+
13
+ function resolveUnderRoot(root, value, fallback) {
14
+ const raw = value == null || value === '' ? fallback : String(value);
15
+ return path.isAbsolute(raw) ? raw : path.join(root, raw);
16
+ }
17
+
18
+ export function buildTtsSettings(env = process.env, root = process.cwd()) {
19
+ const requestedBackend = String(env.TTS_BACKEND || 'edge').trim().toLowerCase();
20
+ const supportedBackends = new Set(['edge', 'openvoice', 'speechswift', 'supertonic']);
21
+ const backend = supportedBackends.has(requestedBackend) ? requestedBackend : 'edge';
22
+ return {
23
+ backend,
24
+ maxChars: positiveNumber(env.TTS_MAX_CHARS, 495),
25
+ volume: positiveNumber(env.TTS_VOLUME, 1.0),
26
+ progressCacheDir: resolveUnderRoot(root, env.PROGRESS_TTS_CACHE_DIR, path.join('.cache', 'progress-tts')),
27
+ edge: {
28
+ command: env.EDGE_TTS_COMMAND || env.TTS_EDGE_COMMAND || 'edge-tts',
29
+ voice: env.TTS_VOICE || 'ko-KR-SunHiNeural',
30
+ rate: env.TTS_RATE || '+10%',
31
+ },
32
+ openvoice: {
33
+ dir: resolveUnderRoot(root, env.OPENVOICE_DIR, path.join('vendor', 'OpenVoice')),
34
+ venv: resolveUnderRoot(root, env.OPENVOICE_VENV, '.venv-openvoice'),
35
+ refAudio: resolveUnderRoot(root, env.OPENVOICE_REF_AUDIO, path.join('voice-samples', 'user-reference.wav')),
36
+ language: env.OPENVOICE_LANGUAGE || 'KR',
37
+ style: env.OPENVOICE_STYLE || 'default',
38
+ timeoutMs: positiveNumber(env.OPENVOICE_TIMEOUT_MS, 90000),
39
+ useForProgress: boolEnv(env.OPENVOICE_PROGRESS, false),
40
+ },
41
+ speechswift: {
42
+ command: env.SPEECHSWIFT_COMMAND || 'audio',
43
+ engine: env.SPEECHSWIFT_ENGINE || 'cosyvoice',
44
+ language: env.SPEECHSWIFT_LANGUAGE || 'korean',
45
+ refAudio: resolveUnderRoot(root, env.SPEECHSWIFT_REF_AUDIO || env.OPENVOICE_REF_AUDIO, path.join('voice-samples', 'user-reference.wav')),
46
+ modelId: env.SPEECHSWIFT_MODEL_ID || 'aufklarer/CosyVoice3-0.5B-MLX-4bit',
47
+ model: env.SPEECHSWIFT_MODEL || 'base',
48
+ speaker: env.SPEECHSWIFT_SPEAKER || '',
49
+ instruct: env.SPEECHSWIFT_INSTRUCT || '',
50
+ timeoutMs: positiveNumber(env.SPEECHSWIFT_TIMEOUT_MS, 120000),
51
+ stream: boolEnv(env.SPEECHSWIFT_STREAM, true),
52
+ useForProgress: boolEnv(env.SPEECHSWIFT_PROGRESS, false),
53
+ mode: String(env.SPEECHSWIFT_MODE || 'cli').trim().toLowerCase() === 'server' ? 'server' : 'cli',
54
+ serverUrl: String(env.SPEECHSWIFT_SERVER_URL || 'http://127.0.0.1:18080').replace(/\/+$/, ''),
55
+ },
56
+ supertonic: {
57
+ command: env.SUPERTONIC_COMMAND || 'supertonic',
58
+ voice: env.SUPERTONIC_VOICE || 'M1',
59
+ language: env.SUPERTONIC_LANGUAGE || 'ko',
60
+ steps: positiveNumber(env.SUPERTONIC_STEPS, 2),
61
+ speed: positiveNumber(env.SUPERTONIC_SPEED, 1.08),
62
+ maxChunkLength: positiveNumber(env.SUPERTONIC_MAX_CHUNK_LENGTH, 300),
63
+ silenceDuration: positiveNumber(env.SUPERTONIC_SILENCE_DURATION, 0.15),
64
+ customStylePath: env.SUPERTONIC_CUSTOM_STYLE_PATH ? resolveUnderRoot(root, env.SUPERTONIC_CUSTOM_STYLE_PATH, '') : '',
65
+ timeoutMs: positiveNumber(env.SUPERTONIC_TIMEOUT_MS, 60000),
66
+ useForProgress: boolEnv(env.SUPERTONIC_PROGRESS, false),
67
+ cacheDir: env.SUPERTONIC_CACHE_DIR ? resolveUnderRoot(root, env.SUPERTONIC_CACHE_DIR, '') : '',
68
+ intraOpThreads: env.SUPERTONIC_INTRA_OP_THREADS || '',
69
+ interOpThreads: env.SUPERTONIC_INTER_OP_THREADS || '',
70
+ },
71
+ };
72
+ }