verbalcoding 0.2.11 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/.env.example +98 -2
  2. package/README.es.md +134 -0
  3. package/README.fr.md +134 -0
  4. package/README.ja.md +134 -0
  5. package/README.ko.md +134 -0
  6. package/README.md +118 -74
  7. package/README.ru.md +134 -0
  8. package/README.zh.md +133 -0
  9. package/app-node/agent_adapters.mjs +37 -5
  10. package/app-node/agent_adapters.test.mjs +27 -1
  11. package/app-node/agent_detect.mjs +73 -0
  12. package/app-node/agent_detect.test.mjs +77 -0
  13. package/app-node/agent_routing.mjs +148 -0
  14. package/app-node/agent_routing.test.mjs +138 -0
  15. package/app-node/agent_turn.mjs +86 -0
  16. package/app-node/agent_turn.test.mjs +109 -0
  17. package/app-node/bridge_context.mjs +73 -0
  18. package/app-node/bridge_context.test.mjs +54 -0
  19. package/app-node/bridge_state.mjs +4 -0
  20. package/app-node/bridge_wireup.test.mjs +462 -0
  21. package/app-node/cli_install.test.mjs +31 -0
  22. package/app-node/cross_agent_routing.test.mjs +78 -0
  23. package/app-node/discord_command_router.mjs +204 -0
  24. package/app-node/discord_command_router.test.mjs +311 -0
  25. package/app-node/discord_voice_setup.mjs +251 -0
  26. package/app-node/discord_voice_setup.test.mjs +86 -0
  27. package/app-node/hermes_profiles.test.mjs +12 -1
  28. package/app-node/install_config.mjs +113 -3
  29. package/app-node/install_config.test.mjs +8 -0
  30. package/app-node/instance_doctor.test.mjs +9 -0
  31. package/app-node/instances.test.mjs +8 -1
  32. package/app-node/main.mjs +513 -1058
  33. package/app-node/mcp_tools.test.mjs +7 -0
  34. package/app-node/notification_handler.mjs +89 -0
  35. package/app-node/notification_handler.test.mjs +187 -0
  36. package/app-node/notify.mjs +73 -0
  37. package/app-node/notify.test.mjs +68 -0
  38. package/app-node/plan_dispatcher.mjs +215 -0
  39. package/app-node/plan_dispatcher.test.mjs +101 -0
  40. package/app-node/plan_mode.mjs +203 -0
  41. package/app-node/plan_mode.test.mjs +231 -0
  42. package/app-node/progress_handler.mjs +220 -0
  43. package/app-node/progress_handler.test.mjs +193 -0
  44. package/app-node/progress_speech.mjs +54 -32
  45. package/app-node/progress_speech.test.mjs +12 -3
  46. package/app-node/project_sessions.mjs +5 -2
  47. package/app-node/project_sessions.test.mjs +7 -0
  48. package/app-node/research_mode.mjs +282 -0
  49. package/app-node/research_mode.test.mjs +264 -0
  50. package/app-node/restart_notice.mjs +3 -0
  51. package/app-node/restart_notice.test.mjs +11 -0
  52. package/app-node/session_ontology.mjs +271 -0
  53. package/app-node/session_ontology.test.mjs +130 -0
  54. package/app-node/smart_progress.mjs +94 -0
  55. package/app-node/smart_progress.test.mjs +66 -0
  56. package/app-node/stream_sentencer.mjs +91 -0
  57. package/app-node/stream_sentencer.test.mjs +129 -0
  58. package/app-node/streaming_tts_queue.mjs +52 -0
  59. package/app-node/streaming_tts_queue.test.mjs +64 -0
  60. package/app-node/stt_whisper.mjs +24 -0
  61. package/app-node/stt_whisper.test.mjs +32 -0
  62. package/app-node/text_routing.mjs +22 -0
  63. package/app-node/text_routing.test.mjs +23 -1
  64. package/app-node/tts_backends.mjs +537 -3
  65. package/app-node/tts_backends.test.mjs +454 -0
  66. package/app-node/tts_player.mjs +164 -0
  67. package/app-node/tts_player.test.mjs +202 -0
  68. package/app-node/tts_runtime.mjs +134 -0
  69. package/app-node/tts_runtime.test.mjs +89 -0
  70. package/app-node/tts_settings.mjs +150 -3
  71. package/app-node/tts_settings.test.mjs +204 -0
  72. package/app-node/tts_voice_config.mjs +136 -2
  73. package/app-node/tts_voice_config.test.mjs +94 -0
  74. package/app-node/utterance_router.mjs +216 -0
  75. package/app-node/utterance_router.test.mjs +236 -0
  76. package/app-node/voice_autojoin.mjs +37 -0
  77. package/app-node/voice_autojoin.test.mjs +59 -0
  78. package/app-node/voice_io.mjs +272 -0
  79. package/app-node/voice_io.test.mjs +102 -0
  80. package/app-node/voice_turn_runner.mjs +449 -0
  81. package/app-node/voice_turn_runner.test.mjs +289 -0
  82. package/docs/CONFIGURATION.md +79 -96
  83. package/docs/FRESH_INSTALL.md +105 -63
  84. package/docs/HARNESSES.md +58 -0
  85. package/docs/HARNESS_AIDER.md +50 -0
  86. package/docs/HARNESS_CLAUDE.md +56 -0
  87. package/docs/HARNESS_CODEX.md +56 -0
  88. package/docs/HARNESS_CURSOR.md +45 -0
  89. package/docs/HARNESS_GEMINI.md +45 -0
  90. package/docs/HARNESS_HERMES.md +57 -0
  91. package/docs/HARNESS_OPENCLAW.md +44 -0
  92. package/docs/HARNESS_OPENCODE.md +44 -0
  93. package/docs/HERMES_VOICE.md +65 -0
  94. package/docs/MULTI_INSTANCE.md +16 -0
  95. package/docs/README.md +50 -0
  96. package/docs/RELEASE.md +42 -19
  97. package/docs/ROADMAP.md +53 -0
  98. package/docs/TROUBLESHOOTING.md +126 -0
  99. package/docs/TTS_BACKENDS.md +227 -0
  100. package/docs/USAGE.md +94 -40
  101. package/docs/assets/figures/verbalcoding-flow.svg +1 -1
  102. package/docs/i18n/AGENTS.es.md +34 -0
  103. package/docs/i18n/AGENTS.fr.md +34 -0
  104. package/docs/i18n/AGENTS.ja.md +34 -0
  105. package/docs/i18n/AGENTS.ko.md +34 -0
  106. package/docs/i18n/AGENTS.ru.md +34 -0
  107. package/docs/i18n/AGENTS.zh.md +34 -0
  108. package/docs/i18n/CONFIGURATION.es.md +25 -0
  109. package/docs/i18n/CONFIGURATION.fr.md +25 -0
  110. package/docs/i18n/CONFIGURATION.ja.md +25 -0
  111. package/docs/i18n/CONFIGURATION.ko.md +25 -0
  112. package/docs/i18n/CONFIGURATION.ru.md +25 -0
  113. package/docs/i18n/CONFIGURATION.zh.md +25 -0
  114. package/docs/i18n/FRESH_INSTALL.es.md +27 -2
  115. package/docs/i18n/FRESH_INSTALL.fr.md +27 -2
  116. package/docs/i18n/FRESH_INSTALL.ja.md +27 -2
  117. package/docs/i18n/FRESH_INSTALL.ko.md +27 -2
  118. package/docs/i18n/FRESH_INSTALL.ru.md +27 -2
  119. package/docs/i18n/FRESH_INSTALL.zh.md +27 -2
  120. package/docs/i18n/HARNESSES.es.md +58 -0
  121. package/docs/i18n/HARNESSES.fr.md +58 -0
  122. package/docs/i18n/HARNESSES.ja.md +58 -0
  123. package/docs/i18n/HARNESSES.ko.md +58 -0
  124. package/docs/i18n/HARNESSES.ru.md +58 -0
  125. package/docs/i18n/HARNESSES.zh.md +58 -0
  126. package/docs/i18n/HARNESS_AIDER.es.md +48 -0
  127. package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
  128. package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
  129. package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
  130. package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
  131. package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
  132. package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
  133. package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
  134. package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
  135. package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
  136. package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
  137. package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
  138. package/docs/i18n/HARNESS_CODEX.es.md +55 -0
  139. package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
  140. package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
  141. package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
  142. package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
  143. package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
  144. package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
  145. package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
  146. package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
  147. package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
  148. package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
  149. package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
  150. package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
  151. package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
  152. package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
  153. package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
  154. package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
  155. package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
  156. package/docs/i18n/HARNESS_HERMES.es.md +54 -0
  157. package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
  158. package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
  159. package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
  160. package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
  161. package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
  162. package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
  163. package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
  164. package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
  165. package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
  166. package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
  167. package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
  168. package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
  169. package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
  170. package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
  171. package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
  172. package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
  173. package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
  174. package/docs/i18n/HERMES_VOICE.es.md +46 -0
  175. package/docs/i18n/HERMES_VOICE.fr.md +46 -0
  176. package/docs/i18n/HERMES_VOICE.ja.md +46 -0
  177. package/docs/i18n/HERMES_VOICE.ko.md +65 -0
  178. package/docs/i18n/HERMES_VOICE.ru.md +46 -0
  179. package/docs/i18n/HERMES_VOICE.zh.md +46 -0
  180. package/docs/i18n/MULTI_INSTANCE.es.md +25 -0
  181. package/docs/i18n/MULTI_INSTANCE.fr.md +25 -0
  182. package/docs/i18n/MULTI_INSTANCE.ja.md +25 -0
  183. package/docs/i18n/MULTI_INSTANCE.ko.md +25 -0
  184. package/docs/i18n/MULTI_INSTANCE.ru.md +25 -0
  185. package/docs/i18n/MULTI_INSTANCE.zh.md +25 -0
  186. package/docs/i18n/README.es.md +20 -134
  187. package/docs/i18n/README.fr.md +20 -134
  188. package/docs/i18n/README.ja.md +20 -134
  189. package/docs/i18n/README.ko.md +20 -133
  190. package/docs/i18n/README.ru.md +20 -134
  191. package/docs/i18n/README.zh.md +20 -133
  192. package/docs/i18n/RELEASE.es.md +26 -1
  193. package/docs/i18n/RELEASE.fr.md +26 -1
  194. package/docs/i18n/RELEASE.ja.md +26 -1
  195. package/docs/i18n/RELEASE.ko.md +26 -1
  196. package/docs/i18n/RELEASE.ru.md +26 -1
  197. package/docs/i18n/RELEASE.zh.md +26 -1
  198. package/docs/i18n/TROUBLESHOOTING.es.md +39 -0
  199. package/docs/i18n/TROUBLESHOOTING.fr.md +39 -0
  200. package/docs/i18n/TROUBLESHOOTING.ja.md +39 -0
  201. package/docs/i18n/TROUBLESHOOTING.ko.md +39 -0
  202. package/docs/i18n/TROUBLESHOOTING.ru.md +39 -0
  203. package/docs/i18n/TROUBLESHOOTING.zh.md +39 -0
  204. package/docs/i18n/USAGE.es.md +25 -0
  205. package/docs/i18n/USAGE.fr.md +25 -0
  206. package/docs/i18n/USAGE.ja.md +25 -0
  207. package/docs/i18n/USAGE.ko.md +25 -0
  208. package/docs/i18n/USAGE.ru.md +25 -0
  209. package/docs/i18n/USAGE.zh.md +25 -0
  210. package/docs/superpowers/plans/2026-05-13-phase1-streaming-pipeline.md +122 -0
  211. package/docs/superpowers/plans/2026-05-13-phase10-push-notifications.md +152 -0
  212. package/docs/superpowers/plans/2026-05-13-phase2-agent-adapters.md +242 -0
  213. package/docs/superpowers/plans/2026-05-13-phase6-smart-progress.md +172 -0
  214. package/docs/superpowers/plans/2026-05-13-phase7-voice-plan-mode.md +108 -0
  215. package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
  216. package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
  217. package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
  218. package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
  219. package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
  220. package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
  221. package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
  222. package/integrations/fireredtts2/mlx_llm.py +183 -0
  223. package/integrations/fireredtts2/synth.py +156 -0
  224. package/integrations/fireredtts2/synth_mlx.py +196 -0
  225. package/integrations/mlxaudio/synth.py +74 -0
  226. package/integrations/neuttsair/synth.py +104 -0
  227. package/integrations/omnivoice/synth.py +110 -0
  228. package/package.json +7 -1
  229. package/scripts/cli.mjs +88 -3
  230. package/scripts/doctor.mjs +115 -4
  231. package/scripts/install.mjs +20 -2
  232. package/scripts/install_fireredtts2.sh +109 -0
  233. package/scripts/install_mlxaudio.sh +34 -0
  234. package/scripts/install_mossttsnano.sh +46 -0
  235. package/scripts/postinstall.mjs +34 -0
@@ -1,3 +1,4 @@
1
+ import { spawn as spawnProcess } from 'node:child_process';
1
2
  import fs from 'node:fs';
2
3
  import os from 'node:os';
3
4
  import path from 'node:path';
@@ -17,6 +18,11 @@ function execOptions(base, signal) {
17
18
  return signal ? { ...base, signal } : base;
18
19
  }
19
20
 
21
+ export function notifyTtsFallback(deps, backend, error, kind) {
22
+ (deps.warn || (() => {}))(`${backend} failed; falling back to edge`, error?.message || error);
23
+ try { deps.onFallback?.({ backend, error, kind }); } catch {}
24
+ }
25
+
20
26
  function openVoicePython(openvoice, existsSync = fs.existsSync) {
21
27
  const venvPython = path.join(openvoice.venv, 'bin', 'python');
22
28
  if (existsSync(venvPython)) return venvPython;
@@ -56,6 +62,119 @@ function supertonicEnv(baseEnv, supertonic) {
56
62
  return env;
57
63
  }
58
64
 
65
+ function omniVoiceArgs(text, out, omnivoice) {
66
+ const args = [
67
+ path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', 'integrations', 'omnivoice', 'synth.py'),
68
+ '--text', text,
69
+ '--output', out,
70
+ '--model', omnivoice.model,
71
+ '--device', omnivoice.device,
72
+ '--dtype', omnivoice.dtype,
73
+ ];
74
+ if (omnivoice.refAudio) args.push('--ref-audio', omnivoice.refAudio);
75
+ if (omnivoice.refText) args.push('--ref-text', omnivoice.refText);
76
+ if (omnivoice.language) args.push('--language', omnivoice.language);
77
+ if (omnivoice.speaker) args.push('--speaker', omnivoice.speaker);
78
+ return args;
79
+ }
80
+
81
+ function qwen3TtsArgs(text, out, qwen3tts) {
82
+ const args = ['speak', text, '--engine', 'qwen3', '--output', out];
83
+ if (qwen3tts.language) args.push('--language', qwen3tts.language);
84
+ if (qwen3tts.stream) args.push('--stream');
85
+ if (qwen3tts.mode === 'clone') {
86
+ args.push('--model', qwen3tts.model || 'base');
87
+ if (qwen3tts.refAudio) args.push('--voice-sample', qwen3tts.refAudio);
88
+ } else if (qwen3tts.mode === 'design') {
89
+ args.push('--model', qwen3tts.model || 'customVoice');
90
+ if (qwen3tts.instruct) args.push('--instruct', qwen3tts.instruct);
91
+ } else {
92
+ args.push('--model', qwen3tts.model || 'customVoice');
93
+ if (qwen3tts.speaker) args.push('--speaker', qwen3tts.speaker);
94
+ if (qwen3tts.instruct) args.push('--instruct', qwen3tts.instruct);
95
+ }
96
+ return args;
97
+ }
98
+
99
+ function mlxAudioArgs(text, out, mlxaudio) {
100
+ const args = [
101
+ path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', 'integrations', 'mlxaudio', 'synth.py'),
102
+ '--text', text,
103
+ '--output', out,
104
+ '--model', mlxaudio.model,
105
+ '--voice', mlxaudio.voice,
106
+ ];
107
+ if (mlxaudio.langCode) args.push('--lang-code', mlxaudio.langCode);
108
+ if (mlxaudio.stream) args.push('--stream');
109
+ return args;
110
+ }
111
+
112
+ function neuTtsAirArgs(text, out, neuttsair) {
113
+ const args = [
114
+ neuttsair.script,
115
+ '--text', text,
116
+ '--output', out,
117
+ '--backbone-repo', neuttsair.backboneRepo,
118
+ '--codec-repo', neuttsair.codecRepo,
119
+ '--backbone-device', neuttsair.backboneDevice,
120
+ '--codec-device', neuttsair.codecDevice,
121
+ '--ref-audio', neuttsair.refAudio,
122
+ '--language', neuttsair.language,
123
+ '--sample-rate', String(neuttsair.sampleRate),
124
+ ];
125
+ if (neuttsair.refText) args.push('--ref-text', neuttsair.refText);
126
+ if (neuttsair.refTextFile) args.push('--ref-text-file', neuttsair.refTextFile);
127
+ if (neuttsair.cacheRef) args.push('--cache-ref');
128
+ return args;
129
+ }
130
+
131
+ function fireRedTts2Args(text, out, fireredtts2) {
132
+ const args = ['--text', text, '--output', out];
133
+ if (fireredtts2.pretrainedDir) args.push('--pretrained-dir', fireredtts2.pretrainedDir);
134
+ if (fireredtts2.device) args.push('--device', fireredtts2.device);
135
+ if (fireredtts2.genType) args.push('--gen-type', fireredtts2.genType);
136
+ if (fireredtts2.speaker) args.push('--speaker', fireredtts2.speaker);
137
+ if (fireredtts2.promptAudio) args.push('--prompt-audio', fireredtts2.promptAudio);
138
+ if (fireredtts2.promptText) args.push('--prompt-text', fireredtts2.promptText);
139
+ if (fireredtts2.useBf16) args.push('--bf16');
140
+ return args;
141
+ }
142
+
143
+ function mossTtsNanoArgs(text, out, mossttsnano) {
144
+ const args = [mossttsnano.script || 'infer.py', '--text', text, '--output-audio-path', out];
145
+ if (mossttsnano.checkpoint) args.push('--checkpoint', mossttsnano.checkpoint);
146
+ if (mossttsnano.audioTokenizer) args.push('--audio-tokenizer-pretrained-name-or-path', mossttsnano.audioTokenizer);
147
+ if (mossttsnano.mode) args.push('--mode', mossttsnano.mode);
148
+ if (mossttsnano.device) args.push('--device', mossttsnano.device);
149
+ if (mossttsnano.dtype) args.push('--dtype', mossttsnano.dtype);
150
+ if (mossttsnano.promptAudio) args.push('--prompt-audio-path', mossttsnano.promptAudio);
151
+ if (mossttsnano.promptText) args.push('--prompt-text', mossttsnano.promptText);
152
+ if (mossttsnano.maxNewFrames) args.push('--max-new-frames', String(mossttsnano.maxNewFrames));
153
+ if (mossttsnano.seed) args.push('--seed', String(mossttsnano.seed));
154
+ if (mossttsnano.disableWetext !== false) args.push('--disable-wetext-processing');
155
+ return args;
156
+ }
157
+
158
+ function mossTtsNanoMlxArgs(text, out, mossttsnanoMlx) {
159
+ const args = [
160
+ mossttsnanoMlx.script,
161
+ '--text', text,
162
+ '--output-audio-path', out,
163
+ '--checkpoint', mossttsnanoMlx.checkpoint,
164
+ '--audio-tokenizer-pretrained-name-or-path', mossttsnanoMlx.audioTokenizer,
165
+ '--mode', mossttsnanoMlx.mode,
166
+ '--torch-infer-script', mossttsnanoMlx.torchInferScript,
167
+ '--torch-device', mossttsnanoMlx.torchDevice,
168
+ '--torch-dtype', mossttsnanoMlx.torchDtype,
169
+ '--max-new-frames', String(mossttsnanoMlx.maxNewFrames),
170
+ '--disable-wetext-processing',
171
+ ];
172
+ if (mossttsnanoMlx.promptAudio) args.push('--prompt-audio-path', mossttsnanoMlx.promptAudio);
173
+ if (mossttsnanoMlx.promptText) args.push('--prompt-text', mossttsnanoMlx.promptText);
174
+ if (mossttsnanoMlx.seed) args.push('--seed', String(mossttsnanoMlx.seed));
175
+ return args;
176
+ }
177
+
59
178
  async function speechSwiftServerRequest({ fetchImpl, speechswift, text, signal }) {
60
179
  const controller = new AbortController();
61
180
  const timeout = setTimeout(() => controller.abort(), speechswift.timeoutMs);
@@ -153,7 +272,7 @@ export function createOpenVoiceBackend(settings, deps = {}) {
153
272
  return validateOutput(out, fsApi);
154
273
  } catch (error) {
155
274
  fs.rm(out, { force: true }, () => {});
156
- warn('openvoice failed; falling back to edge', error?.message || error);
275
+ notifyTtsFallback(deps, 'openvoice', error, kind);
157
276
  return edge.synthesize(text, { signal, kind });
158
277
  }
159
278
  },
@@ -198,7 +317,7 @@ export function createSpeechSwiftBackend(settings, deps = {}) {
198
317
  return validateOutput(out, fsApi);
199
318
  } catch (error) {
200
319
  fs.rm(out, { force: true }, () => {});
201
- warn('speech-swift failed; falling back to edge', error?.message || error);
320
+ notifyTtsFallback(deps, 'speech-swift', error, kind);
202
321
  return edge.synthesize(text, { signal, kind });
203
322
  }
204
323
  },
@@ -236,16 +355,431 @@ export function createSupertonicBackend(settings, deps = {}) {
236
355
  return validateOutput(out, fsApi);
237
356
  } catch (error) {
238
357
  fs.rm(out, { force: true }, () => {});
239
- warn('supertonic failed; falling back to edge', error?.message || error);
358
+ notifyTtsFallback(deps, 'supertonic', error, kind);
359
+ return edge.synthesize(text, { signal, kind });
360
+ }
361
+ },
362
+ };
363
+ }
364
+
365
+ export function createOmniVoiceBackend(settings, deps = {}) {
366
+ const execFileAsync = deps.execFileAsync;
367
+ if (!execFileAsync) throw new Error('execFileAsync dependency is required');
368
+ const tmpdir = deps.tmpdir || os.tmpdir();
369
+ const warn = deps.warn || (() => {});
370
+ const fsApi = {
371
+ existsSync: deps.existsSync || fs.existsSync,
372
+ statSync: deps.statSync || fs.statSync,
373
+ };
374
+ const edge = createEdgeTtsBackend(settings, deps);
375
+ const omnivoice = settings.omnivoice;
376
+ return {
377
+ name: 'omnivoice',
378
+ outputExtension: omnivoice.useForProgress ? 'wav' : 'mp3',
379
+ cacheKeyParts() {
380
+ return ['omnivoice', omnivoice.model, omnivoice.device, omnivoice.dtype, omnivoice.refAudio, omnivoice.refText, omnivoice.language, omnivoice.speaker];
381
+ },
382
+ async synthesize(text, { signal, kind = 'final' } = {}) {
383
+ if (kind === 'progress' && !omnivoice.useForProgress) {
384
+ return edge.synthesize(text, { signal, kind });
385
+ }
386
+ const out = uniquePath(tmpdir, 'verbalcoding-omnivoice', 'wav');
387
+ try {
388
+ await execFileAsync(omnivoice.python || 'python3', omniVoiceArgs(text, out, omnivoice), execOptions({
389
+ timeout: omnivoice.timeoutMs,
390
+ maxBuffer: 4 * 1024 * 1024,
391
+ }, signal));
392
+ return validateOutput(out, fsApi);
393
+ } catch (error) {
394
+ fs.rm(out, { force: true }, () => {});
395
+ notifyTtsFallback(deps, 'omnivoice', error, kind);
396
+ return edge.synthesize(text, { signal, kind });
397
+ }
398
+ },
399
+ };
400
+ }
401
+
402
+ export function createQwen3TtsBackend(settings, deps = {}) {
403
+ const execFileAsync = deps.execFileAsync;
404
+ if (!execFileAsync) throw new Error('execFileAsync dependency is required');
405
+ const tmpdir = deps.tmpdir || os.tmpdir();
406
+ const warn = deps.warn || (() => {});
407
+ const fsApi = {
408
+ existsSync: deps.existsSync || fs.existsSync,
409
+ statSync: deps.statSync || fs.statSync,
410
+ };
411
+ const edge = createEdgeTtsBackend(settings, deps);
412
+ const qwen3tts = settings.qwen3tts;
413
+ return {
414
+ name: 'qwen3tts',
415
+ outputExtension: qwen3tts.useForProgress ? 'mp3' : 'mp3',
416
+ cacheKeyParts() {
417
+ return ['qwen3tts', qwen3tts.command, qwen3tts.mode, qwen3tts.language, qwen3tts.speaker, qwen3tts.instruct, qwen3tts.refAudio, qwen3tts.refText];
418
+ },
419
+ async synthesize(text, { signal, kind = 'final' } = {}) {
420
+ if (kind === 'progress' && !qwen3tts.useForProgress) {
421
+ return edge.synthesize(text, { signal, kind });
422
+ }
423
+ const out = uniquePath(tmpdir, 'verbalcoding-qwen3tts', 'mp3');
424
+ try {
425
+ await execFileAsync(qwen3tts.command, qwen3TtsArgs(text, out, qwen3tts), execOptions({
426
+ timeout: qwen3tts.timeoutMs,
427
+ maxBuffer: 4 * 1024 * 1024,
428
+ }, signal));
429
+ return validateOutput(out, fsApi);
430
+ } catch (error) {
431
+ fs.rm(out, { force: true }, () => {});
432
+ notifyTtsFallback(deps, 'qwen3tts', error, kind);
433
+ return edge.synthesize(text, { signal, kind });
434
+ }
435
+ },
436
+ };
437
+ }
438
+
439
+ export function createMlxAudioBackend(settings, deps = {}) {
440
+ const execFileAsync = deps.execFileAsync;
441
+ if (!execFileAsync) throw new Error('execFileAsync dependency is required');
442
+ const tmpdir = deps.tmpdir || os.tmpdir();
443
+ const warn = deps.warn || (() => {});
444
+ const fsApi = {
445
+ existsSync: deps.existsSync || fs.existsSync,
446
+ statSync: deps.statSync || fs.statSync,
447
+ };
448
+ const edge = createEdgeTtsBackend(settings, deps);
449
+ const mlxaudio = settings.mlxaudio;
450
+ return {
451
+ name: 'mlxaudio',
452
+ outputExtension: mlxaudio.useForProgress ? 'wav' : 'wav',
453
+ cacheKeyParts() {
454
+ return ['mlxaudio', mlxaudio.python, mlxaudio.model, mlxaudio.voice, mlxaudio.langCode, mlxaudio.stream];
455
+ },
456
+ async synthesize(text, { signal, kind = 'final' } = {}) {
457
+ if (kind === 'progress' && !mlxaudio.useForProgress) {
458
+ return edge.synthesize(text, { signal, kind });
459
+ }
460
+ const out = uniquePath(tmpdir, 'verbalcoding-mlxaudio', 'wav');
461
+ try {
462
+ await execFileAsync(mlxaudio.python || 'python3', mlxAudioArgs(text, out, mlxaudio), execOptions({
463
+ timeout: mlxaudio.timeoutMs,
464
+ maxBuffer: 4 * 1024 * 1024,
465
+ }, signal));
466
+ return validateOutput(out, fsApi);
467
+ } catch (error) {
468
+ fs.rm(out, { force: true }, () => {});
469
+ notifyTtsFallback(deps, 'mlxaudio', error, kind);
240
470
  return edge.synthesize(text, { signal, kind });
241
471
  }
242
472
  },
243
473
  };
244
474
  }
245
475
 
476
+ export function createNeuTtsAirBackend(settings, deps = {}) {
477
+ const execFileAsync = deps.execFileAsync;
478
+ if (!execFileAsync) throw new Error('execFileAsync dependency is required');
479
+ const tmpdir = deps.tmpdir || os.tmpdir();
480
+ const warn = deps.warn || (() => {});
481
+ const fsApi = {
482
+ existsSync: deps.existsSync || fs.existsSync,
483
+ statSync: deps.statSync || fs.statSync,
484
+ };
485
+ const edge = createEdgeTtsBackend(settings, deps);
486
+ const neuttsair = settings.neuttsair;
487
+ return {
488
+ name: 'neuttsair',
489
+ outputExtension: neuttsair.useForProgress ? 'wav' : 'wav',
490
+ cacheKeyParts() {
491
+ return ['neuttsair', neuttsair.python, neuttsair.script, neuttsair.backboneRepo, neuttsair.backboneDevice, neuttsair.codecRepo, neuttsair.codecDevice, neuttsair.refAudio, neuttsair.refText, neuttsair.language, neuttsair.sampleRate];
492
+ },
493
+ async synthesize(text, { signal, kind = 'final' } = {}) {
494
+ if (kind === 'progress' && !neuttsair.useForProgress) {
495
+ return edge.synthesize(text, { signal, kind });
496
+ }
497
+ const out = uniquePath(tmpdir, 'verbalcoding-neuttsair', 'wav');
498
+ try {
499
+ await execFileAsync(neuttsair.python || 'python3', neuTtsAirArgs(text, out, neuttsair), execOptions({
500
+ timeout: neuttsair.timeoutMs,
501
+ maxBuffer: 4 * 1024 * 1024,
502
+ }, signal));
503
+ return validateOutput(out, fsApi);
504
+ } catch (error) {
505
+ fs.rm(out, { force: true }, () => {});
506
+ notifyTtsFallback(deps, 'neuttsair', error, kind);
507
+ return edge.synthesize(text, { signal, kind });
508
+ }
509
+ },
510
+ };
511
+ }
512
+
513
+ export function createFireRedTts2Backend(settings, deps = {}) {
514
+ const execFileAsync = deps.execFileAsync;
515
+ if (!execFileAsync) throw new Error('execFileAsync dependency is required');
516
+ const tmpdir = deps.tmpdir || os.tmpdir();
517
+ const warn = deps.warn || (() => {});
518
+ const fsApi = {
519
+ existsSync: deps.existsSync || fs.existsSync,
520
+ statSync: deps.statSync || fs.statSync,
521
+ };
522
+ const edge = createEdgeTtsBackend(settings, deps);
523
+ const fireredtts2 = settings.fireredtts2;
524
+ return {
525
+ name: 'fireredtts2',
526
+ outputExtension: fireredtts2.useForProgress ? 'wav' : 'wav',
527
+ cacheKeyParts() {
528
+ return ['fireredtts2', fireredtts2.command, fireredtts2.pretrainedDir, fireredtts2.device, fireredtts2.genType, fireredtts2.speaker, fireredtts2.promptAudio, fireredtts2.promptText, fireredtts2.useBf16];
529
+ },
530
+ async synthesize(text, { signal, kind = 'final' } = {}) {
531
+ if (kind === 'progress' && !fireredtts2.useForProgress) {
532
+ return edge.synthesize(text, { signal, kind });
533
+ }
534
+ const out = uniquePath(tmpdir, 'verbalcoding-fireredtts2', 'wav');
535
+ try {
536
+ await execFileAsync(fireredtts2.command, fireRedTts2Args(text, out, fireredtts2), execOptions({
537
+ timeout: fireredtts2.timeoutMs,
538
+ maxBuffer: 4 * 1024 * 1024,
539
+ }, signal));
540
+ return validateOutput(out, fsApi);
541
+ } catch (error) {
542
+ fs.rm(out, { force: true }, () => {});
543
+ notifyTtsFallback(deps, 'fireredtts2', error, kind);
544
+ return edge.synthesize(text, { signal, kind });
545
+ }
546
+ },
547
+ };
548
+ }
549
+
550
+ export function createMossTtsNanoBackend(settings, deps = {}) {
551
+ const execFileAsync = deps.execFileAsync;
552
+ if (!execFileAsync) throw new Error('execFileAsync dependency is required');
553
+ const tmpdir = deps.tmpdir || os.tmpdir();
554
+ const warn = deps.warn || (() => {});
555
+ const fsApi = {
556
+ existsSync: deps.existsSync || fs.existsSync,
557
+ statSync: deps.statSync || fs.statSync,
558
+ };
559
+ const edge = createEdgeTtsBackend(settings, deps);
560
+ const mossttsnano = settings.mossttsnano;
561
+ return {
562
+ name: 'mossttsnano',
563
+ outputExtension: mossttsnano.useForProgress ? 'wav' : 'wav',
564
+ cacheKeyParts() {
565
+ return ['mossttsnano', mossttsnano.command, mossttsnano.script, mossttsnano.checkpoint, mossttsnano.audioTokenizer, mossttsnano.mode, mossttsnano.language, mossttsnano.device, mossttsnano.dtype, mossttsnano.promptAudio, mossttsnano.promptText, mossttsnano.maxNewFrames, mossttsnano.seed];
566
+ },
567
+ async synthesize(text, { signal, kind = 'final' } = {}) {
568
+ if (kind === 'progress' && !mossttsnano.useForProgress) {
569
+ return edge.synthesize(text, { signal, kind });
570
+ }
571
+ const out = uniquePath(tmpdir, 'verbalcoding-mossttsnano', 'wav');
572
+ try {
573
+ await execFileAsync(mossttsnano.command, mossTtsNanoArgs(text, out, mossttsnano), execOptions({
574
+ timeout: mossttsnano.timeoutMs,
575
+ maxBuffer: 4 * 1024 * 1024,
576
+ }, signal));
577
+ return validateOutput(out, fsApi);
578
+ } catch (error) {
579
+ fs.rm(out, { force: true }, () => {});
580
+ notifyTtsFallback(deps, 'mossttsnano', error, kind);
581
+ return edge.synthesize(text, { signal, kind });
582
+ }
583
+ },
584
+ };
585
+ }
586
+
587
+ function mossTtsNanoMlxWorkerArgs(mossttsnanoMlx) {
588
+ const args = [
589
+ mossttsnanoMlx.workerScript,
590
+ '--checkpoint', mossttsnanoMlx.checkpoint,
591
+ '--audio-tokenizer-pretrained-name-or-path', mossttsnanoMlx.audioTokenizer,
592
+ '--mode', mossttsnanoMlx.mode,
593
+ '--torch-device', mossttsnanoMlx.torchDevice,
594
+ '--torch-dtype', mossttsnanoMlx.torchDtype,
595
+ '--max-new-frames', String(mossttsnanoMlx.maxNewFrames),
596
+ ];
597
+ if (mossttsnanoMlx.promptAudio) args.push('--prompt-audio-path', mossttsnanoMlx.promptAudio);
598
+ if (mossttsnanoMlx.promptText) args.push('--prompt-text', mossttsnanoMlx.promptText);
599
+ if (mossttsnanoMlx.seed) args.push('--seed', String(mossttsnanoMlx.seed));
600
+ return args;
601
+ }
602
+
603
+ function createJsonLineWorker({ command, args, spawn = spawnProcess, startupTimeoutMs = 120000, warn = () => {} }) {
604
+ let child = null;
605
+ let readyPromise = null;
606
+ let nextId = 1;
607
+ let stdoutBuffer = '';
608
+ const pending = new Map();
609
+
610
+ function rejectPending(error) {
611
+ for (const { reject, timer } of pending.values()) {
612
+ clearTimeout(timer);
613
+ reject(error);
614
+ }
615
+ pending.clear();
616
+ }
617
+
618
+ function handleMessage(message) {
619
+ if (message?.type === 'ready') {
620
+ if (message.ok === false) throw new Error(message.error || 'worker failed startup');
621
+ return;
622
+ }
623
+ const entry = pending.get(message.id);
624
+ if (!entry) return;
625
+ pending.delete(message.id);
626
+ clearTimeout(entry.timer);
627
+ if (message.ok) entry.resolve(message);
628
+ else entry.reject(new Error(message.error || 'worker request failed'));
629
+ }
630
+
631
+ function start() {
632
+ if (child && !child.killed) return readyPromise;
633
+ child = spawn(command, args, { stdio: ['pipe', 'pipe', 'pipe'] });
634
+ stdoutBuffer = '';
635
+ readyPromise = new Promise((resolve, reject) => {
636
+ const startupTimer = setTimeout(() => reject(new Error('worker startup timed out')), startupTimeoutMs);
637
+ const onData = chunk => {
638
+ stdoutBuffer += chunk.toString('utf8');
639
+ let index;
640
+ while ((index = stdoutBuffer.indexOf('\n')) >= 0) {
641
+ const line = stdoutBuffer.slice(0, index).trim();
642
+ stdoutBuffer = stdoutBuffer.slice(index + 1);
643
+ if (!line) continue;
644
+ let message;
645
+ try {
646
+ message = JSON.parse(line);
647
+ } catch (error) {
648
+ warn('mossttsnano_mlx worker emitted non-json stdout', line.slice(0, 300));
649
+ continue;
650
+ }
651
+ try {
652
+ handleMessage(message);
653
+ if (message?.type === 'ready') {
654
+ clearTimeout(startupTimer);
655
+ resolve();
656
+ }
657
+ } catch (error) {
658
+ clearTimeout(startupTimer);
659
+ reject(error);
660
+ }
661
+ }
662
+ };
663
+ child.stdout.on('data', onData);
664
+ child.stderr.on('data', chunk => warn('mossttsnano_mlx worker', chunk.toString('utf8').trim()));
665
+ child.on('error', error => {
666
+ clearTimeout(startupTimer);
667
+ reject(error);
668
+ rejectPending(error);
669
+ });
670
+ child.on('exit', (code, signal) => {
671
+ const error = new Error(`worker exited code=${code} signal=${signal}`);
672
+ child = null;
673
+ readyPromise = null;
674
+ clearTimeout(startupTimer);
675
+ rejectPending(error);
676
+ });
677
+ });
678
+ return readyPromise;
679
+ }
680
+
681
+ async function request(payload, { timeoutMs, signal } = {}) {
682
+ await start();
683
+ if (!child || !child.stdin.writable) throw new Error('worker stdin is not writable');
684
+ const id = nextId++;
685
+ const message = { id, ...payload };
686
+ return await new Promise((resolve, reject) => {
687
+ const timer = setTimeout(() => {
688
+ pending.delete(id);
689
+ reject(new Error('worker request timed out'));
690
+ }, timeoutMs || 180000);
691
+ const abort = () => {
692
+ clearTimeout(timer);
693
+ pending.delete(id);
694
+ reject(new Error('worker request aborted'));
695
+ };
696
+ if (signal) {
697
+ if (signal.aborted) return abort();
698
+ signal.addEventListener('abort', abort, { once: true });
699
+ }
700
+ pending.set(id, { resolve, reject, timer });
701
+ child.stdin.write(`${JSON.stringify(message)}\n`, error => {
702
+ if (error) {
703
+ clearTimeout(timer);
704
+ pending.delete(id);
705
+ reject(error);
706
+ }
707
+ });
708
+ });
709
+ }
710
+
711
+ function stop() {
712
+ if (child && !child.killed) child.kill('SIGTERM');
713
+ child = null;
714
+ readyPromise = null;
715
+ }
716
+
717
+ return { request, stop };
718
+ }
719
+
720
+ export function createMossTtsNanoMlxBackend(settings, deps = {}) {
721
+ const execFileAsync = deps.execFileAsync;
722
+ if (!execFileAsync) throw new Error('execFileAsync dependency is required');
723
+ const tmpdir = deps.tmpdir || os.tmpdir();
724
+ const warn = deps.warn || (() => {});
725
+ const fsApi = {
726
+ existsSync: deps.existsSync || fs.existsSync,
727
+ statSync: deps.statSync || fs.statSync,
728
+ };
729
+ const edge = createEdgeTtsBackend(settings, deps);
730
+ const mossttsnanoMlx = settings.mossttsnano_mlx;
731
+ const worker = mossttsnanoMlx.workerEnabled
732
+ ? createJsonLineWorker({
733
+ command: mossttsnanoMlx.python,
734
+ args: mossTtsNanoMlxWorkerArgs(mossttsnanoMlx),
735
+ spawn: deps.spawn || spawnProcess,
736
+ startupTimeoutMs: mossttsnanoMlx.workerStartupTimeoutMs,
737
+ warn,
738
+ })
739
+ : null;
740
+ return {
741
+ name: 'mossttsnano_mlx',
742
+ outputExtension: mossttsnanoMlx.useForProgress ? 'wav' : 'wav',
743
+ cacheKeyParts() {
744
+ return ['mossttsnano_mlx', mossttsnanoMlx.workerEnabled ? 'worker' : 'subprocess', mossttsnanoMlx.python, mossttsnanoMlx.script, mossttsnanoMlx.workerScript, mossttsnanoMlx.torchInferScript, mossttsnanoMlx.checkpoint, mossttsnanoMlx.audioTokenizer, mossttsnanoMlx.mode, mossttsnanoMlx.language, mossttsnanoMlx.torchDevice, mossttsnanoMlx.torchDtype, mossttsnanoMlx.promptAudio, mossttsnanoMlx.promptText, mossttsnanoMlx.maxNewFrames, mossttsnanoMlx.seed];
745
+ },
746
+ async synthesize(text, { signal, kind = 'final' } = {}) {
747
+ if (kind === 'progress' && !mossttsnanoMlx.useForProgress) {
748
+ return edge.synthesize(text, { signal, kind });
749
+ }
750
+ const out = uniquePath(tmpdir, 'verbalcoding-mossttsnano-mlx', 'wav');
751
+ try {
752
+ if (worker) {
753
+ await worker.request({ text, output_audio_path: out }, { timeoutMs: mossttsnanoMlx.timeoutMs, signal });
754
+ } else {
755
+ await execFileAsync(mossttsnanoMlx.python, mossTtsNanoMlxArgs(text, out, mossttsnanoMlx), execOptions({
756
+ timeout: mossttsnanoMlx.timeoutMs,
757
+ maxBuffer: 4 * 1024 * 1024,
758
+ }, signal));
759
+ }
760
+ return validateOutput(out, fsApi);
761
+ } catch (error) {
762
+ fs.rm(out, { force: true }, () => {});
763
+ notifyTtsFallback(deps, 'mossttsnano_mlx', error, kind);
764
+ return edge.synthesize(text, { signal, kind });
765
+ }
766
+ },
767
+ close() {
768
+ if (worker) worker.stop();
769
+ },
770
+ };
771
+ }
772
+
246
773
  export function createTtsBackend(settings, deps = {}) {
247
774
  if (settings.backend === 'openvoice') return createOpenVoiceBackend(settings, deps);
248
775
  if (settings.backend === 'speechswift') return createSpeechSwiftBackend(settings, deps);
249
776
  if (settings.backend === 'supertonic') return createSupertonicBackend(settings, deps);
777
+ if (settings.backend === 'omnivoice') return createOmniVoiceBackend(settings, deps);
778
+ if (settings.backend === 'qwen3tts') return createQwen3TtsBackend(settings, deps);
779
+ if (settings.backend === 'mlxaudio') return createMlxAudioBackend(settings, deps);
780
+ if (settings.backend === 'neuttsair') return createNeuTtsAirBackend(settings, deps);
781
+ if (settings.backend === 'fireredtts2') return createFireRedTts2Backend(settings, deps);
782
+ if (settings.backend === 'mossttsnano') return createMossTtsNanoBackend(settings, deps);
783
+ if (settings.backend === 'mossttsnano_mlx') return createMossTtsNanoMlxBackend(settings, deps);
250
784
  return createEdgeTtsBackend(settings, deps);
251
785
  }