verbalcoding 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/.env.example +83 -0
  2. package/LICENSE +21 -0
  3. package/README.md +157 -0
  4. package/app-node/agent_adapters.mjs +576 -0
  5. package/app-node/agent_adapters.test.mjs +455 -0
  6. package/app-node/agent_contract.mjs +45 -0
  7. package/app-node/barge_in.mjs +148 -0
  8. package/app-node/barge_in.test.mjs +179 -0
  9. package/app-node/bridge_logger.mjs +66 -0
  10. package/app-node/bridge_logger.test.mjs +73 -0
  11. package/app-node/bridge_state.mjs +104 -0
  12. package/app-node/bridge_state.test.mjs +64 -0
  13. package/app-node/cli_install.test.mjs +97 -0
  14. package/app-node/deferred_queue.mjs +12 -0
  15. package/app-node/deferred_queue.test.mjs +20 -0
  16. package/app-node/discord_invite_cli.test.mjs +31 -0
  17. package/app-node/discord_text.mjs +29 -0
  18. package/app-node/discord_text.test.mjs +32 -0
  19. package/app-node/hermes_profiles.mjs +164 -0
  20. package/app-node/hermes_profiles.test.mjs +276 -0
  21. package/app-node/install_config.mjs +263 -0
  22. package/app-node/install_config.test.mjs +205 -0
  23. package/app-node/instance_doctor.mjs +137 -0
  24. package/app-node/instance_doctor.test.mjs +128 -0
  25. package/app-node/instance_profile_lifecycle.mjs +16 -0
  26. package/app-node/instances.mjs +153 -0
  27. package/app-node/instances.test.mjs +102 -0
  28. package/app-node/language_config.mjs +73 -0
  29. package/app-node/language_config.test.mjs +51 -0
  30. package/app-node/latency_metrics.mjs +133 -0
  31. package/app-node/latency_metrics.test.mjs +71 -0
  32. package/app-node/main.mjs +1771 -0
  33. package/app-node/mcp_tools.mjs +198 -0
  34. package/app-node/mcp_tools.test.mjs +39 -0
  35. package/app-node/progress_cache.mjs +7 -0
  36. package/app-node/progress_cache.test.mjs +23 -0
  37. package/app-node/progress_speech.mjs +102 -0
  38. package/app-node/progress_speech.test.mjs +48 -0
  39. package/app-node/project_sessions.mjs +148 -0
  40. package/app-node/project_sessions.test.mjs +77 -0
  41. package/app-node/restart_notice.mjs +57 -0
  42. package/app-node/restart_notice.test.mjs +37 -0
  43. package/app-node/restart_policy.mjs +27 -0
  44. package/app-node/restart_policy.test.mjs +33 -0
  45. package/app-node/text_routing.mjs +8 -0
  46. package/app-node/text_routing.test.mjs +18 -0
  47. package/app-node/tts_backends.mjs +251 -0
  48. package/app-node/tts_backends.test.mjs +400 -0
  49. package/app-node/tts_chunks.mjs +57 -0
  50. package/app-node/tts_chunks.test.mjs +35 -0
  51. package/app-node/tts_prefetch.mjs +38 -0
  52. package/app-node/tts_prefetch.test.mjs +49 -0
  53. package/app-node/tts_settings.mjs +72 -0
  54. package/app-node/tts_settings.test.mjs +127 -0
  55. package/app-node/tts_voice_config.mjs +127 -0
  56. package/app-node/tts_voice_config.test.mjs +64 -0
  57. package/app-node/voice_clone_capture.mjs +76 -0
  58. package/app-node/voice_clone_capture.test.mjs +51 -0
  59. package/app-node/voice_messages.mjs +62 -0
  60. package/app-node/voice_messages.test.mjs +33 -0
  61. package/docs/CONFIGURATION.md +183 -0
  62. package/docs/FRESH_INSTALL.md +193 -0
  63. package/docs/MULTI_INSTANCE.md +183 -0
  64. package/docs/RELEASE.md +72 -0
  65. package/docs/USAGE.md +108 -0
  66. package/docs/assets/figures/verbalcoding-flow.svg +63 -0
  67. package/docs/i18n/README.es.md +121 -0
  68. package/docs/i18n/README.fr.md +121 -0
  69. package/docs/i18n/README.ja.md +121 -0
  70. package/docs/i18n/README.ko.md +121 -0
  71. package/docs/i18n/README.ru.md +121 -0
  72. package/docs/i18n/README.zh.md +121 -0
  73. package/package.json +58 -0
  74. package/run.sh +82 -0
  75. package/scripts/bootstrap_prereqs.sh +193 -0
  76. package/scripts/cli.mjs +369 -0
  77. package/scripts/docker_ubuntu_smoke.sh +76 -0
  78. package/scripts/doctor.mjs +134 -0
  79. package/scripts/install.mjs +108 -0
  80. package/scripts/install.sh +44 -0
  81. package/scripts/mcp-server.mjs +84 -0
  82. package/scripts/openvoice_smoke.py +34 -0
  83. package/scripts/openvoice_synth.py +103 -0
  84. package/scripts/setup_openvoice.sh +34 -0
  85. package/scripts/setup_supertonic.sh +18 -0
@@ -0,0 +1,57 @@
1
+ export function restartNoticeLanguage(ttsVoice = '') {
2
+ const voice = String(ttsVoice || '').toLowerCase();
3
+ if (voice.startsWith('en-')) return 'en';
4
+ return 'ko';
5
+ }
6
+
7
+ export function cleanRestartDetail(detail = '', ttsVoice = '') {
8
+ const raw = String(detail || '').replace(/\s+/g, ' ').trim();
9
+ if (!raw) return '';
10
+ if (restartNoticeLanguage(ttsVoice) === 'en') {
11
+ return raw
12
+ .replace(/\b(restarting now|i'?ll restart now)\b[.!?\s]*/ig, '')
13
+ .replace(/\bvoice may cut out briefly\b[.!?\s]*/ig, '')
14
+ .replace(/\s+/g, ' ')
15
+ .trim();
16
+ }
17
+ return raw
18
+ .replace(/이제\s*재시작할게[.!?。!?\s]*/gu, '')
19
+ .replace(/잠깐\s*음성이\s*끊길\s*수\s*있어[.!?。!?\s]*/gu, '')
20
+ .replace(/\s+/g, ' ')
21
+ .trim();
22
+ }
23
+
24
+ export function formatRestartCompleteNotice(detail = '', ttsVoice = '') {
25
+ const cleanDetail = cleanRestartDetail(detail, ttsVoice);
26
+ if (restartNoticeLanguage(ttsVoice) === 'en') {
27
+ return {
28
+ text: cleanDetail
29
+ ? `✅ Restart complete. I am back online. Applied: ${cleanDetail}`
30
+ : '✅ Restart complete. I am back online.',
31
+ speech: cleanDetail
32
+ ? `Restart complete. I am back online. ${cleanDetail}`
33
+ : 'Restart complete. I am back online.',
34
+ };
35
+ }
36
+ return {
37
+ text: cleanDetail
38
+ ? `✅ 재시작 완료. 다시 온라인이야. 적용 내용: ${cleanDetail}`
39
+ : '✅ 재시작 완료. 다시 온라인이야.',
40
+ speech: cleanDetail
41
+ ? `재시작 완료. 다시 온라인이야. ${cleanDetail}`
42
+ : '재시작 완료. 다시 온라인이야.',
43
+ };
44
+ }
45
+
46
+ export function formatRestartShutdownNotice(detail = '', ttsVoice = '') {
47
+ const cleanDetail = cleanRestartDetail(detail, ttsVoice);
48
+ const detailNoPeriod = cleanDetail.replace(/[.!?。!?]+$/u, '');
49
+ if (restartNoticeLanguage(ttsVoice) === 'en') {
50
+ return detailNoPeriod
51
+ ? `I applied this change: ${detailNoPeriod}. Restarting now. Voice may cut out briefly.`
52
+ : 'Restarting now. Voice may cut out briefly.';
53
+ }
54
+ return detailNoPeriod
55
+ ? `방금 한 작업은 ${detailNoPeriod}. 이제 재시작할게. 잠깐 음성이 끊길 수 있어.`
56
+ : '방금 변경사항을 적용했어. 이제 재시작할게. 잠깐 음성이 끊길 수 있어.';
57
+ }
@@ -0,0 +1,37 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import { cleanRestartDetail, formatRestartCompleteNotice, formatRestartShutdownNotice } from './restart_notice.mjs';
4
+
5
+ test('restart complete notice follows English TTS voice so Edge can synthesize it', () => {
6
+ const notice = formatRestartCompleteNotice('English speech detection fixed.', 'en-US-GuyNeural');
7
+ assert.equal(notice.text, '✅ Restart complete. I am back online. Applied: English speech detection fixed.');
8
+ assert.equal(notice.speech, 'Restart complete. I am back online. English speech detection fixed.');
9
+ });
10
+
11
+ test('restart complete notice stays Korean for Korean TTS voice', () => {
12
+ const notice = formatRestartCompleteNotice('영어 STT 정리 버그 수정.', 'ko-KR-SunHiNeural');
13
+ assert.equal(notice.text, '✅ 재시작 완료. 다시 온라인이야. 적용 내용: 영어 STT 정리 버그 수정.');
14
+ assert.equal(notice.speech, '재시작 완료. 다시 온라인이야. 영어 STT 정리 버그 수정.');
15
+ });
16
+
17
+ test('shutdown restart notice follows English TTS voice', () => {
18
+ assert.equal(
19
+ formatRestartShutdownNotice('English speech detection fixed.', 'en-US-GuyNeural'),
20
+ 'I applied this change: English speech detection fixed. Restarting now. Voice may cut out briefly.',
21
+ );
22
+ });
23
+
24
+ test('restart detail strips restart boilerplate before formatting', () => {
25
+ assert.equal(
26
+ cleanRestartDetail('에이전트 안내 고쳤어. 이제 재시작할게. 잠깐 음성이 끊길 수 있어.', 'ko-KR-InJoonNeural'),
27
+ '에이전트 안내 고쳤어.',
28
+ );
29
+ assert.equal(
30
+ formatRestartShutdownNotice('에이전트 안내 고쳤어. 이제 재시작할게. 잠깐 음성이 끊길 수 있어.', 'ko-KR-InJoonNeural'),
31
+ '방금 한 작업은 에이전트 안내 고쳤어. 이제 재시작할게. 잠깐 음성이 끊길 수 있어.',
32
+ );
33
+ assert.equal(
34
+ formatRestartCompleteNotice('에이전트 안내 고쳤어. 이제 재시작할게. 잠깐 음성이 끊길 수 있어.', 'ko-KR-InJoonNeural').speech,
35
+ '재시작 완료. 다시 온라인이야. 에이전트 안내 고쳤어.',
36
+ );
37
+ });
@@ -0,0 +1,27 @@
1
+ export const AUTO_RESTART_ENV_KEY = 'VERBALCODING_AUTO_RESTART_VOICE_BOT';
2
+
3
+ export function parseBooleanFlag(value, defaultValue = false) {
4
+ if (value === undefined || value === null || String(value).trim() === '') return Boolean(defaultValue);
5
+ const normalized = String(value).trim().toLowerCase();
6
+ if (['1', 'true', 'yes', 'on', 'enable', 'enabled'].includes(normalized)) return true;
7
+ if (['0', 'false', 'no', 'off', 'disable', 'disabled'].includes(normalized)) return false;
8
+ return Boolean(defaultValue);
9
+ }
10
+
11
+ export function autoRestartVoiceBotEnabled(env = process.env) {
12
+ return parseBooleanFlag(env[AUTO_RESTART_ENV_KEY], false);
13
+ }
14
+
15
+ export function normalizeAutoRestartCommand(value) {
16
+ const normalized = String(value || '').trim().toLowerCase();
17
+ if (['on', 'true', '1', 'yes', 'enable', 'enabled', '켜', '켜줘'].includes(normalized)) return '1';
18
+ if (['off', 'false', '0', 'no', 'disable', 'disabled', '꺼', '꺼줘'].includes(normalized)) return '0';
19
+ return null;
20
+ }
21
+
22
+ export function autoRestartStatusText(env = process.env, language = 'ko') {
23
+ const enabled = autoRestartVoiceBotEnabled(env);
24
+ const english = /^en/i.test(String(language || ''));
25
+ if (english) return `Auto restart voice bot after commits: ${enabled ? 'on' : 'off'}.`;
26
+ return `커밋 후 음성봇 자동 재시작: ${enabled ? '켜짐' : '꺼짐'}.`;
27
+ }
@@ -0,0 +1,33 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+
4
+ import {
5
+ AUTO_RESTART_ENV_KEY,
6
+ autoRestartStatusText,
7
+ autoRestartVoiceBotEnabled,
8
+ normalizeAutoRestartCommand,
9
+ parseBooleanFlag,
10
+ } from './restart_policy.mjs';
11
+
12
+ test('auto restart defaults off unless explicitly enabled', () => {
13
+ assert.equal(autoRestartVoiceBotEnabled({}), false);
14
+ assert.equal(autoRestartVoiceBotEnabled({ [AUTO_RESTART_ENV_KEY]: '' }), false);
15
+ assert.equal(autoRestartVoiceBotEnabled({ [AUTO_RESTART_ENV_KEY]: '0' }), false);
16
+ assert.equal(autoRestartVoiceBotEnabled({ [AUTO_RESTART_ENV_KEY]: 'off' }), false);
17
+ assert.equal(autoRestartVoiceBotEnabled({ [AUTO_RESTART_ENV_KEY]: '1' }), true);
18
+ assert.equal(autoRestartVoiceBotEnabled({ [AUTO_RESTART_ENV_KEY]: 'on' }), true);
19
+ });
20
+
21
+ test('normalizes user auto restart commands', () => {
22
+ assert.equal(normalizeAutoRestartCommand('on'), '1');
23
+ assert.equal(normalizeAutoRestartCommand('켜'), '1');
24
+ assert.equal(normalizeAutoRestartCommand('off'), '0');
25
+ assert.equal(normalizeAutoRestartCommand('꺼'), '0');
26
+ assert.equal(normalizeAutoRestartCommand('maybe'), null);
27
+ });
28
+
29
+ test('auto restart status text is explicit', () => {
30
+ assert.equal(autoRestartStatusText({}, 'ko'), '커밋 후 음성봇 자동 재시작: 꺼짐.');
31
+ assert.equal(autoRestartStatusText({ [AUTO_RESTART_ENV_KEY]: '1' }, 'en'), 'Auto restart voice bot after commits: on.');
32
+ assert.equal(parseBooleanFlag('enabled'), true);
33
+ });
@@ -0,0 +1,8 @@
1
+ export function shouldRouteDiscordTextToAgent({ content = '', channelId = '', transcriptChannelId = '' } = {}) {
2
+ const text = String(content || '').trim();
3
+ if (!text) return false;
4
+ if (text.startsWith('!')) return false;
5
+ const target = String(transcriptChannelId || '').trim();
6
+ if (!target) return true;
7
+ return String(channelId || '') === target;
8
+ }
@@ -0,0 +1,18 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+
4
+ import { shouldRouteDiscordTextToAgent } from './text_routing.mjs';
5
+
6
+ test('routes normal transcript-channel text to the shared agent session', () => {
7
+ assert.equal(shouldRouteDiscordTextToAgent({
8
+ content: '이 작업 이어서 해줘',
9
+ channelId: 'transcript',
10
+ transcriptChannelId: 'transcript',
11
+ }), true);
12
+ });
13
+
14
+ test('does not route commands or other channels to the shared agent session', () => {
15
+ assert.equal(shouldRouteDiscordTextToAgent({ content: '!ping', channelId: 'transcript', transcriptChannelId: 'transcript' }), false);
16
+ assert.equal(shouldRouteDiscordTextToAgent({ content: '다른 채널 말', channelId: 'other', transcriptChannelId: 'transcript' }), false);
17
+ assert.equal(shouldRouteDiscordTextToAgent({ content: ' ', channelId: 'transcript', transcriptChannelId: 'transcript' }), false);
18
+ });
@@ -0,0 +1,251 @@
1
+ import fs from 'node:fs';
2
+ import os from 'node:os';
3
+ import path from 'node:path';
4
+
5
+ function uniquePath(tmpdir, prefix, ext) {
6
+ return path.join(tmpdir, `${prefix}-${Date.now()}-${Math.random().toString(16).slice(2)}.${ext}`);
7
+ }
8
+
9
+ function validateOutput(file, fsApi) {
10
+ if (!fsApi.existsSync(file) || fsApi.statSync(file).size <= 0) {
11
+ throw new Error(`TTS backend produced empty output: ${file}`);
12
+ }
13
+ return file;
14
+ }
15
+
16
+ function execOptions(base, signal) {
17
+ return signal ? { ...base, signal } : base;
18
+ }
19
+
20
+ function openVoicePython(openvoice, existsSync = fs.existsSync) {
21
+ const venvPython = path.join(openvoice.venv, 'bin', 'python');
22
+ if (existsSync(venvPython)) return venvPython;
23
+ return 'python3';
24
+ }
25
+
26
+ function speechSwiftArgs(text, out, speechswift) {
27
+ const args = ['speak', text, '--engine', speechswift.engine, '--output', out];
28
+ if (speechswift.language) args.push('--language', speechswift.language);
29
+ if (speechswift.stream) args.push('--stream');
30
+ if (speechswift.refAudio) args.push('--voice-sample', speechswift.refAudio);
31
+ if (speechswift.engine === 'cosyvoice' && speechswift.modelId) args.push('--model-id', speechswift.modelId);
32
+ if (speechswift.engine === 'qwen3') {
33
+ if (speechswift.model) args.push('--model', speechswift.model);
34
+ if (speechswift.speaker) args.push('--speaker', speechswift.speaker);
35
+ if (speechswift.instruct) args.push('--instruct', speechswift.instruct);
36
+ }
37
+ return args;
38
+ }
39
+
40
+ function supertonicArgs(text, out, supertonic) {
41
+ const args = ['tts', text, '-o', out, '--lang', supertonic.language];
42
+ if (supertonic.customStylePath) args.push('--custom-style-path', supertonic.customStylePath);
43
+ else if (supertonic.voice) args.push('--voice', supertonic.voice);
44
+ if (supertonic.steps) args.push('--steps', String(supertonic.steps));
45
+ if (supertonic.speed) args.push('--speed', String(supertonic.speed));
46
+ if (supertonic.maxChunkLength) args.push('--max-chunk-length', String(supertonic.maxChunkLength));
47
+ if (supertonic.silenceDuration != null) args.push('--silence-duration', String(supertonic.silenceDuration));
48
+ return args;
49
+ }
50
+
51
+ function supertonicEnv(baseEnv, supertonic) {
52
+ const env = { ...baseEnv };
53
+ if (supertonic.cacheDir) env.SUPERTONIC_CACHE_DIR = supertonic.cacheDir;
54
+ if (supertonic.intraOpThreads) env.SUPERTONIC_INTRA_OP_THREADS = String(supertonic.intraOpThreads);
55
+ if (supertonic.interOpThreads) env.SUPERTONIC_INTER_OP_THREADS = String(supertonic.interOpThreads);
56
+ return env;
57
+ }
58
+
59
+ async function speechSwiftServerRequest({ fetchImpl, speechswift, text, signal }) {
60
+ const controller = new AbortController();
61
+ const timeout = setTimeout(() => controller.abort(), speechswift.timeoutMs);
62
+ const abortFromCaller = () => controller.abort(signal.reason);
63
+ if (signal?.aborted) controller.abort(signal.reason);
64
+ else signal?.addEventListener?.('abort', abortFromCaller, { once: true });
65
+ try {
66
+ const response = await fetchImpl(`${speechswift.serverUrl}/speak`, {
67
+ method: 'POST',
68
+ headers: { 'content-type': 'application/json' },
69
+ body: JSON.stringify({
70
+ text,
71
+ engine: speechswift.engine,
72
+ language: speechswift.language,
73
+ }),
74
+ signal: controller.signal,
75
+ });
76
+ if (!response.ok) {
77
+ const detail = typeof response.text === 'function' ? await response.text().catch(() => '') : '';
78
+ throw new Error(`audio-server /speak failed ${response.status} ${response.statusText}${detail ? `: ${detail.slice(0, 200)}` : ''}`);
79
+ }
80
+ return Buffer.from(await response.arrayBuffer());
81
+ } finally {
82
+ clearTimeout(timeout);
83
+ signal?.removeEventListener?.('abort', abortFromCaller);
84
+ }
85
+ }
86
+
87
+ export function createEdgeTtsBackend(settings, deps = {}) {
88
+ const execFileAsync = deps.execFileAsync;
89
+ if (!execFileAsync) throw new Error('execFileAsync dependency is required');
90
+ const fsApi = {
91
+ existsSync: deps.existsSync || fs.existsSync,
92
+ statSync: deps.statSync || fs.statSync,
93
+ };
94
+ const tmpdir = deps.tmpdir || os.tmpdir();
95
+ const edge = settings.edge || {};
96
+ const voiceProvider = deps.voiceProvider || (() => edge.voice);
97
+ const currentVoice = () => voiceProvider() || edge.voice;
98
+ const edgeCommand = edge.command || 'edge-tts';
99
+ return {
100
+ name: 'edge',
101
+ outputExtension: 'mp3',
102
+ cacheKeyParts() {
103
+ return ['edge', currentVoice(), edge.rate];
104
+ },
105
+ async synthesize(text, { signal } = {}) {
106
+ const out = uniquePath(tmpdir, 'verbalcoding-edge', 'mp3');
107
+ await execFileAsync(edgeCommand, ['-v', currentVoice(), '--rate', edge.rate, '-t', text, '--write-media', out], execOptions({
108
+ timeout: 60000,
109
+ maxBuffer: 2 * 1024 * 1024,
110
+ }, signal));
111
+ return validateOutput(out, fsApi);
112
+ },
113
+ };
114
+ }
115
+
116
+ export function createOpenVoiceBackend(settings, deps = {}) {
117
+ const execFileAsync = deps.execFileAsync;
118
+ if (!execFileAsync) throw new Error('execFileAsync dependency is required');
119
+ const tmpdir = deps.tmpdir || os.tmpdir();
120
+ const warn = deps.warn || (() => {});
121
+ const fsApi = {
122
+ existsSync: deps.existsSync || fs.existsSync,
123
+ statSync: deps.statSync || fs.statSync,
124
+ };
125
+ const edge = createEdgeTtsBackend(settings, deps);
126
+ const openvoice = settings.openvoice;
127
+ return {
128
+ name: 'openvoice',
129
+ outputExtension: openvoice.useForProgress ? 'wav' : 'mp3',
130
+ cacheKeyParts() {
131
+ return ['openvoice', openvoice.refAudio, openvoice.language, openvoice.style];
132
+ },
133
+ async synthesize(text, { signal, kind = 'final' } = {}) {
134
+ if (kind === 'progress' && !openvoice.useForProgress) {
135
+ return edge.synthesize(text, { signal, kind });
136
+ }
137
+ const out = uniquePath(tmpdir, 'verbalcoding-openvoice', 'wav');
138
+ const script = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', 'scripts', 'openvoice_synth.py');
139
+ const args = [
140
+ script,
141
+ '--openvoice-dir', openvoice.dir,
142
+ '--ref-audio', openvoice.refAudio,
143
+ '--text', text,
144
+ '--language', openvoice.language,
145
+ '--style', openvoice.style,
146
+ '--output', out,
147
+ ];
148
+ try {
149
+ await execFileAsync(openVoicePython(openvoice, fsApi.existsSync), args, execOptions({
150
+ timeout: openvoice.timeoutMs,
151
+ maxBuffer: 2 * 1024 * 1024,
152
+ }, signal));
153
+ return validateOutput(out, fsApi);
154
+ } catch (error) {
155
+ fs.rm(out, { force: true }, () => {});
156
+ warn('openvoice failed; falling back to edge', error?.message || error);
157
+ return edge.synthesize(text, { signal, kind });
158
+ }
159
+ },
160
+ };
161
+ }
162
+
163
+ export function createSpeechSwiftBackend(settings, deps = {}) {
164
+ const execFileAsync = deps.execFileAsync;
165
+ const tmpdir = deps.tmpdir || os.tmpdir();
166
+ const warn = deps.warn || (() => {});
167
+ const fsApi = {
168
+ existsSync: deps.existsSync || fs.existsSync,
169
+ statSync: deps.statSync || fs.statSync,
170
+ };
171
+ const fetchImpl = deps.fetch || globalThis.fetch;
172
+ const writeFileAsync = deps.writeFileAsync || fs.promises.writeFile;
173
+ const edge = createEdgeTtsBackend(settings, deps);
174
+ const speechswift = settings.speechswift;
175
+ return {
176
+ name: 'speechswift',
177
+ outputExtension: speechswift.useForProgress ? 'wav' : 'mp3',
178
+ cacheKeyParts() {
179
+ return ['speechswift', speechswift.mode, speechswift.serverUrl, speechswift.engine, speechswift.refAudio, speechswift.language, speechswift.modelId, speechswift.model, speechswift.speaker, speechswift.instruct];
180
+ },
181
+ async synthesize(text, { signal, kind = 'final' } = {}) {
182
+ if (kind === 'progress' && !speechswift.useForProgress) {
183
+ return edge.synthesize(text, { signal, kind });
184
+ }
185
+ const out = uniquePath(tmpdir, speechswift.mode === 'server' ? 'verbalcoding-speechswift-server' : 'verbalcoding-speechswift', 'wav');
186
+ try {
187
+ if (speechswift.mode === 'server') {
188
+ if (!fetchImpl) throw new Error('fetch is not available for speech-swift server mode');
189
+ const wavBytes = await speechSwiftServerRequest({ fetchImpl, speechswift, text, signal });
190
+ await writeFileAsync(out, wavBytes);
191
+ } else {
192
+ if (!execFileAsync) throw new Error('execFileAsync dependency is required');
193
+ await execFileAsync(speechswift.command, speechSwiftArgs(text, out, speechswift), execOptions({
194
+ timeout: speechswift.timeoutMs,
195
+ maxBuffer: 4 * 1024 * 1024,
196
+ }, signal));
197
+ }
198
+ return validateOutput(out, fsApi);
199
+ } catch (error) {
200
+ fs.rm(out, { force: true }, () => {});
201
+ warn('speech-swift failed; falling back to edge', error?.message || error);
202
+ return edge.synthesize(text, { signal, kind });
203
+ }
204
+ },
205
+ };
206
+ }
207
+
208
+ export function createSupertonicBackend(settings, deps = {}) {
209
+ const execFileAsync = deps.execFileAsync;
210
+ if (!execFileAsync) throw new Error('execFileAsync dependency is required');
211
+ const tmpdir = deps.tmpdir || os.tmpdir();
212
+ const warn = deps.warn || (() => {});
213
+ const fsApi = {
214
+ existsSync: deps.existsSync || fs.existsSync,
215
+ statSync: deps.statSync || fs.statSync,
216
+ };
217
+ const edge = createEdgeTtsBackend(settings, deps);
218
+ const supertonic = settings.supertonic;
219
+ return {
220
+ name: 'supertonic',
221
+ outputExtension: supertonic.useForProgress ? 'wav' : 'mp3',
222
+ cacheKeyParts() {
223
+ return ['supertonic', supertonic.command, supertonic.voice, supertonic.language, supertonic.steps, supertonic.speed, supertonic.maxChunkLength, supertonic.silenceDuration, supertonic.customStylePath];
224
+ },
225
+ async synthesize(text, { signal, kind = 'final' } = {}) {
226
+ if (kind === 'progress' && !supertonic.useForProgress) {
227
+ return edge.synthesize(text, { signal, kind });
228
+ }
229
+ const out = uniquePath(tmpdir, 'verbalcoding-supertonic', 'wav');
230
+ try {
231
+ await execFileAsync(supertonic.command, supertonicArgs(text, out, supertonic), execOptions({
232
+ timeout: supertonic.timeoutMs,
233
+ maxBuffer: 4 * 1024 * 1024,
234
+ env: supertonicEnv(process.env, supertonic),
235
+ }, signal));
236
+ return validateOutput(out, fsApi);
237
+ } catch (error) {
238
+ fs.rm(out, { force: true }, () => {});
239
+ warn('supertonic failed; falling back to edge', error?.message || error);
240
+ return edge.synthesize(text, { signal, kind });
241
+ }
242
+ },
243
+ };
244
+ }
245
+
246
+ export function createTtsBackend(settings, deps = {}) {
247
+ if (settings.backend === 'openvoice') return createOpenVoiceBackend(settings, deps);
248
+ if (settings.backend === 'speechswift') return createSpeechSwiftBackend(settings, deps);
249
+ if (settings.backend === 'supertonic') return createSupertonicBackend(settings, deps);
250
+ return createEdgeTtsBackend(settings, deps);
251
+ }