verbalcoding 0.2.11 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/.env.example +98 -2
  2. package/README.es.md +134 -0
  3. package/README.fr.md +134 -0
  4. package/README.ja.md +134 -0
  5. package/README.ko.md +134 -0
  6. package/README.md +118 -74
  7. package/README.ru.md +134 -0
  8. package/README.zh.md +133 -0
  9. package/app-node/agent_adapters.mjs +37 -5
  10. package/app-node/agent_adapters.test.mjs +27 -1
  11. package/app-node/agent_detect.mjs +73 -0
  12. package/app-node/agent_detect.test.mjs +77 -0
  13. package/app-node/agent_routing.mjs +148 -0
  14. package/app-node/agent_routing.test.mjs +138 -0
  15. package/app-node/agent_turn.mjs +86 -0
  16. package/app-node/agent_turn.test.mjs +109 -0
  17. package/app-node/bridge_context.mjs +73 -0
  18. package/app-node/bridge_context.test.mjs +54 -0
  19. package/app-node/bridge_state.mjs +4 -0
  20. package/app-node/bridge_wireup.test.mjs +462 -0
  21. package/app-node/cli_install.test.mjs +31 -0
  22. package/app-node/cross_agent_routing.test.mjs +78 -0
  23. package/app-node/discord_command_router.mjs +204 -0
  24. package/app-node/discord_command_router.test.mjs +311 -0
  25. package/app-node/discord_voice_setup.mjs +251 -0
  26. package/app-node/discord_voice_setup.test.mjs +86 -0
  27. package/app-node/hermes_profiles.test.mjs +12 -1
  28. package/app-node/install_config.mjs +113 -3
  29. package/app-node/install_config.test.mjs +8 -0
  30. package/app-node/instance_doctor.test.mjs +9 -0
  31. package/app-node/instances.test.mjs +8 -1
  32. package/app-node/main.mjs +513 -1058
  33. package/app-node/mcp_tools.test.mjs +7 -0
  34. package/app-node/notification_handler.mjs +89 -0
  35. package/app-node/notification_handler.test.mjs +187 -0
  36. package/app-node/notify.mjs +73 -0
  37. package/app-node/notify.test.mjs +68 -0
  38. package/app-node/plan_dispatcher.mjs +215 -0
  39. package/app-node/plan_dispatcher.test.mjs +101 -0
  40. package/app-node/plan_mode.mjs +203 -0
  41. package/app-node/plan_mode.test.mjs +231 -0
  42. package/app-node/progress_handler.mjs +220 -0
  43. package/app-node/progress_handler.test.mjs +193 -0
  44. package/app-node/progress_speech.mjs +54 -32
  45. package/app-node/progress_speech.test.mjs +12 -3
  46. package/app-node/project_sessions.mjs +5 -2
  47. package/app-node/project_sessions.test.mjs +7 -0
  48. package/app-node/research_mode.mjs +282 -0
  49. package/app-node/research_mode.test.mjs +264 -0
  50. package/app-node/restart_notice.mjs +3 -0
  51. package/app-node/restart_notice.test.mjs +11 -0
  52. package/app-node/session_ontology.mjs +271 -0
  53. package/app-node/session_ontology.test.mjs +130 -0
  54. package/app-node/smart_progress.mjs +94 -0
  55. package/app-node/smart_progress.test.mjs +66 -0
  56. package/app-node/stream_sentencer.mjs +91 -0
  57. package/app-node/stream_sentencer.test.mjs +129 -0
  58. package/app-node/streaming_tts_queue.mjs +52 -0
  59. package/app-node/streaming_tts_queue.test.mjs +64 -0
  60. package/app-node/stt_whisper.mjs +24 -0
  61. package/app-node/stt_whisper.test.mjs +32 -0
  62. package/app-node/text_routing.mjs +22 -0
  63. package/app-node/text_routing.test.mjs +23 -1
  64. package/app-node/tts_backends.mjs +537 -3
  65. package/app-node/tts_backends.test.mjs +454 -0
  66. package/app-node/tts_player.mjs +164 -0
  67. package/app-node/tts_player.test.mjs +202 -0
  68. package/app-node/tts_runtime.mjs +134 -0
  69. package/app-node/tts_runtime.test.mjs +89 -0
  70. package/app-node/tts_settings.mjs +150 -3
  71. package/app-node/tts_settings.test.mjs +204 -0
  72. package/app-node/tts_voice_config.mjs +136 -2
  73. package/app-node/tts_voice_config.test.mjs +94 -0
  74. package/app-node/utterance_router.mjs +216 -0
  75. package/app-node/utterance_router.test.mjs +236 -0
  76. package/app-node/voice_autojoin.mjs +37 -0
  77. package/app-node/voice_autojoin.test.mjs +59 -0
  78. package/app-node/voice_io.mjs +272 -0
  79. package/app-node/voice_io.test.mjs +102 -0
  80. package/app-node/voice_turn_runner.mjs +449 -0
  81. package/app-node/voice_turn_runner.test.mjs +289 -0
  82. package/docs/CONFIGURATION.md +79 -96
  83. package/docs/FRESH_INSTALL.md +105 -63
  84. package/docs/HARNESSES.md +58 -0
  85. package/docs/HARNESS_AIDER.md +50 -0
  86. package/docs/HARNESS_CLAUDE.md +56 -0
  87. package/docs/HARNESS_CODEX.md +56 -0
  88. package/docs/HARNESS_CURSOR.md +45 -0
  89. package/docs/HARNESS_GEMINI.md +45 -0
  90. package/docs/HARNESS_HERMES.md +57 -0
  91. package/docs/HARNESS_OPENCLAW.md +44 -0
  92. package/docs/HARNESS_OPENCODE.md +44 -0
  93. package/docs/HERMES_VOICE.md +65 -0
  94. package/docs/MULTI_INSTANCE.md +16 -0
  95. package/docs/README.md +50 -0
  96. package/docs/RELEASE.md +42 -19
  97. package/docs/ROADMAP.md +53 -0
  98. package/docs/TROUBLESHOOTING.md +126 -0
  99. package/docs/TTS_BACKENDS.md +227 -0
  100. package/docs/USAGE.md +94 -40
  101. package/docs/assets/figures/verbalcoding-flow.svg +1 -1
  102. package/docs/i18n/AGENTS.es.md +34 -0
  103. package/docs/i18n/AGENTS.fr.md +34 -0
  104. package/docs/i18n/AGENTS.ja.md +34 -0
  105. package/docs/i18n/AGENTS.ko.md +34 -0
  106. package/docs/i18n/AGENTS.ru.md +34 -0
  107. package/docs/i18n/AGENTS.zh.md +34 -0
  108. package/docs/i18n/CONFIGURATION.es.md +25 -0
  109. package/docs/i18n/CONFIGURATION.fr.md +25 -0
  110. package/docs/i18n/CONFIGURATION.ja.md +25 -0
  111. package/docs/i18n/CONFIGURATION.ko.md +25 -0
  112. package/docs/i18n/CONFIGURATION.ru.md +25 -0
  113. package/docs/i18n/CONFIGURATION.zh.md +25 -0
  114. package/docs/i18n/FRESH_INSTALL.es.md +27 -2
  115. package/docs/i18n/FRESH_INSTALL.fr.md +27 -2
  116. package/docs/i18n/FRESH_INSTALL.ja.md +27 -2
  117. package/docs/i18n/FRESH_INSTALL.ko.md +27 -2
  118. package/docs/i18n/FRESH_INSTALL.ru.md +27 -2
  119. package/docs/i18n/FRESH_INSTALL.zh.md +27 -2
  120. package/docs/i18n/HARNESSES.es.md +58 -0
  121. package/docs/i18n/HARNESSES.fr.md +58 -0
  122. package/docs/i18n/HARNESSES.ja.md +58 -0
  123. package/docs/i18n/HARNESSES.ko.md +58 -0
  124. package/docs/i18n/HARNESSES.ru.md +58 -0
  125. package/docs/i18n/HARNESSES.zh.md +58 -0
  126. package/docs/i18n/HARNESS_AIDER.es.md +48 -0
  127. package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
  128. package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
  129. package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
  130. package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
  131. package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
  132. package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
  133. package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
  134. package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
  135. package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
  136. package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
  137. package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
  138. package/docs/i18n/HARNESS_CODEX.es.md +55 -0
  139. package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
  140. package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
  141. package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
  142. package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
  143. package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
  144. package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
  145. package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
  146. package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
  147. package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
  148. package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
  149. package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
  150. package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
  151. package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
  152. package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
  153. package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
  154. package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
  155. package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
  156. package/docs/i18n/HARNESS_HERMES.es.md +54 -0
  157. package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
  158. package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
  159. package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
  160. package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
  161. package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
  162. package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
  163. package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
  164. package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
  165. package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
  166. package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
  167. package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
  168. package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
  169. package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
  170. package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
  171. package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
  172. package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
  173. package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
  174. package/docs/i18n/HERMES_VOICE.es.md +46 -0
  175. package/docs/i18n/HERMES_VOICE.fr.md +46 -0
  176. package/docs/i18n/HERMES_VOICE.ja.md +46 -0
  177. package/docs/i18n/HERMES_VOICE.ko.md +65 -0
  178. package/docs/i18n/HERMES_VOICE.ru.md +46 -0
  179. package/docs/i18n/HERMES_VOICE.zh.md +46 -0
  180. package/docs/i18n/MULTI_INSTANCE.es.md +25 -0
  181. package/docs/i18n/MULTI_INSTANCE.fr.md +25 -0
  182. package/docs/i18n/MULTI_INSTANCE.ja.md +25 -0
  183. package/docs/i18n/MULTI_INSTANCE.ko.md +25 -0
  184. package/docs/i18n/MULTI_INSTANCE.ru.md +25 -0
  185. package/docs/i18n/MULTI_INSTANCE.zh.md +25 -0
  186. package/docs/i18n/README.es.md +20 -134
  187. package/docs/i18n/README.fr.md +20 -134
  188. package/docs/i18n/README.ja.md +20 -134
  189. package/docs/i18n/README.ko.md +20 -133
  190. package/docs/i18n/README.ru.md +20 -134
  191. package/docs/i18n/README.zh.md +20 -133
  192. package/docs/i18n/RELEASE.es.md +26 -1
  193. package/docs/i18n/RELEASE.fr.md +26 -1
  194. package/docs/i18n/RELEASE.ja.md +26 -1
  195. package/docs/i18n/RELEASE.ko.md +26 -1
  196. package/docs/i18n/RELEASE.ru.md +26 -1
  197. package/docs/i18n/RELEASE.zh.md +26 -1
  198. package/docs/i18n/TROUBLESHOOTING.es.md +39 -0
  199. package/docs/i18n/TROUBLESHOOTING.fr.md +39 -0
  200. package/docs/i18n/TROUBLESHOOTING.ja.md +39 -0
  201. package/docs/i18n/TROUBLESHOOTING.ko.md +39 -0
  202. package/docs/i18n/TROUBLESHOOTING.ru.md +39 -0
  203. package/docs/i18n/TROUBLESHOOTING.zh.md +39 -0
  204. package/docs/i18n/USAGE.es.md +25 -0
  205. package/docs/i18n/USAGE.fr.md +25 -0
  206. package/docs/i18n/USAGE.ja.md +25 -0
  207. package/docs/i18n/USAGE.ko.md +25 -0
  208. package/docs/i18n/USAGE.ru.md +25 -0
  209. package/docs/i18n/USAGE.zh.md +25 -0
  210. package/docs/superpowers/plans/2026-05-13-phase1-streaming-pipeline.md +122 -0
  211. package/docs/superpowers/plans/2026-05-13-phase10-push-notifications.md +152 -0
  212. package/docs/superpowers/plans/2026-05-13-phase2-agent-adapters.md +242 -0
  213. package/docs/superpowers/plans/2026-05-13-phase6-smart-progress.md +172 -0
  214. package/docs/superpowers/plans/2026-05-13-phase7-voice-plan-mode.md +108 -0
  215. package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
  216. package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
  217. package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
  218. package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
  219. package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
  220. package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
  221. package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
  222. package/integrations/fireredtts2/mlx_llm.py +183 -0
  223. package/integrations/fireredtts2/synth.py +156 -0
  224. package/integrations/fireredtts2/synth_mlx.py +196 -0
  225. package/integrations/mlxaudio/synth.py +74 -0
  226. package/integrations/neuttsair/synth.py +104 -0
  227. package/integrations/omnivoice/synth.py +110 -0
  228. package/package.json +7 -1
  229. package/scripts/cli.mjs +88 -3
  230. package/scripts/doctor.mjs +115 -4
  231. package/scripts/install.mjs +20 -2
  232. package/scripts/install_fireredtts2.sh +109 -0
  233. package/scripts/install_mlxaudio.sh +34 -0
  234. package/scripts/install_mossttsnano.sh +46 -0
  235. package/scripts/postinstall.mjs +34 -0
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import os
6
+ import sys
7
+ from pathlib import Path
8
+
9
+
10
+ def repo_root() -> Path:
11
+ return Path(__file__).resolve().parents[2]
12
+
13
+
14
+ def resolve(root: Path, value: str | None) -> str | None:
15
+ if not value:
16
+ return None
17
+ p = Path(value).expanduser()
18
+ if not p.is_absolute():
19
+ p = root / p
20
+ return str(p)
21
+
22
+
23
+ def read_text_arg(value: str | None) -> str:
24
+ if not value:
25
+ return ""
26
+ p = Path(value).expanduser()
27
+ if p.exists():
28
+ return p.read_text(encoding="utf-8").strip()
29
+ return value.strip()
30
+
31
+
32
+ def parse_args() -> argparse.Namespace:
33
+ parser = argparse.ArgumentParser(description="NeuTTS-Air synthesis wrapper for VerbalCoding")
34
+ parser.add_argument("--text", required=True)
35
+ parser.add_argument("--output", required=True)
36
+ parser.add_argument("--backbone", "--backbone-repo", dest="backbone", default="neuphonic/neutts-air-q4-gguf")
37
+ parser.add_argument("--codec", "--codec-repo", dest="codec", default="neuphonic/neucodec")
38
+ parser.add_argument("--backbone-device", default="cpu")
39
+ parser.add_argument("--codec-device", default="cpu")
40
+ parser.add_argument("--ref-audio", default="")
41
+ parser.add_argument("--ref-text", default="")
42
+ parser.add_argument("--ref-text-file", default="")
43
+ parser.add_argument("--language", default="en")
44
+ parser.add_argument("--sample-rate", type=int, default=24000)
45
+ parser.add_argument("--cache-ref", action="store_true")
46
+ return parser.parse_args()
47
+
48
+
49
+ def main() -> int:
50
+ args = parse_args()
51
+ root = repo_root()
52
+ vendor = root / "vendor" / "neutts-air"
53
+ if vendor.exists():
54
+ sys.path.insert(0, str(vendor))
55
+ try:
56
+ import soundfile as sf
57
+ import torch
58
+ from neutts import NeuTTS
59
+ except Exception as exc:
60
+ print(f"NeuTTS-Air dependencies are missing: {exc}", file=sys.stderr, flush=True)
61
+ return 127
62
+
63
+ ref_audio = resolve(root, args.ref_audio)
64
+ ref_text = read_text_arg(args.ref_text_file) or read_text_arg(args.ref_text)
65
+ if not ref_audio or not Path(ref_audio).exists():
66
+ print(f"NeuTTS-Air reference audio not found: {ref_audio}", file=sys.stderr, flush=True)
67
+ return 66
68
+ if not ref_text:
69
+ # Fall back to a short generic transcript; users should configure NEUTTSAIR_REF_TEXT
70
+ # or NEUTTSAIR_REF_TEXT_FILE for best cloning quality.
71
+ ref_text = "This is a reference voice sample."
72
+
73
+ out = Path(args.output).expanduser()
74
+ out.parent.mkdir(parents=True, exist_ok=True)
75
+ cache_path = Path(ref_audio).with_suffix(".neutts.pt")
76
+
77
+ try:
78
+ print(f"[neutts-air] loading backbone={args.backbone} codec={args.codec}", file=sys.stderr, flush=True)
79
+ tts = NeuTTS(
80
+ backbone_repo=args.backbone,
81
+ backbone_device=args.backbone_device,
82
+ codec_repo=args.codec,
83
+ codec_device=args.codec_device,
84
+ )
85
+ if args.cache_ref and cache_path.exists():
86
+ print(f"[neutts-air] loading cached reference {cache_path}", file=sys.stderr, flush=True)
87
+ ref_codes = torch.load(cache_path, map_location="cpu", weights_only=False)
88
+ else:
89
+ print(f"[neutts-air] encoding reference {ref_audio}", file=sys.stderr, flush=True)
90
+ ref_codes = tts.encode_reference(ref_audio)
91
+ if args.cache_ref:
92
+ torch.save(ref_codes, cache_path)
93
+ print(f"[neutts-air] generating chars={len(args.text)}", file=sys.stderr, flush=True)
94
+ wav = tts.infer(args.text, ref_codes, ref_text)
95
+ sf.write(str(out), wav, args.sample_rate)
96
+ print(f"[neutts-air] wrote {out}", file=sys.stderr, flush=True)
97
+ return 0
98
+ except Exception as exc:
99
+ print(f"NeuTTS-Air synthesis failed: {exc}", file=sys.stderr, flush=True)
100
+ return 1
101
+
102
+
103
+ if __name__ == "__main__":
104
+ raise SystemExit(main())
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env python3
2
+ """Synthesize speech with k2-fsa OmniVoice for VerbalCoding.
3
+
4
+ The wrapper keeps the Node bridge independent from OmniVoice's Python runtime.
5
+ It accepts one text chunk and writes a 24 kHz WAV file.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import inspect
12
+ import sys
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+
17
+ def _torch_dtype(name: str):
18
+ import torch
19
+
20
+ normalized = (name or "").lower()
21
+ if normalized in {"auto", ""}:
22
+ return None
23
+ if normalized in {"float16", "fp16", "half"}:
24
+ return torch.float16
25
+ if normalized in {"bfloat16", "bf16"}:
26
+ return torch.bfloat16
27
+ if normalized in {"float32", "fp32"}:
28
+ return torch.float32
29
+ raise ValueError(f"Unsupported OmniVoice dtype: {name}")
30
+
31
+
32
+ def _filtered_call(fn, **kwargs: Any):
33
+ """Call fn with only supported kwargs when the signature is inspectable."""
34
+
35
+ try:
36
+ sig = inspect.signature(fn)
37
+ except (TypeError, ValueError):
38
+ return fn(**{k: v for k, v in kwargs.items() if v not in (None, "")})
39
+
40
+ accepts_kwargs = any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values())
41
+ clean = {k: v for k, v in kwargs.items() if v not in (None, "")}
42
+ if accepts_kwargs:
43
+ return fn(**clean)
44
+ return fn(**{k: v for k, v in clean.items() if k in sig.parameters})
45
+
46
+
47
+ def synthesize(args: argparse.Namespace) -> None:
48
+ try:
49
+ import soundfile as sf
50
+ import torch
51
+ from omnivoice import OmniVoice
52
+ except Exception as exc: # pragma: no cover - exercised in real install
53
+ raise RuntimeError(
54
+ "OmniVoice dependencies are missing. Install them in OMNIVOICE_PYTHON's environment: "
55
+ "pip install torch torchaudio soundfile omnivoice"
56
+ ) from exc
57
+
58
+ dtype = _torch_dtype(args.dtype)
59
+ load_kwargs = {"device_map": args.device}
60
+ if dtype is not None:
61
+ load_kwargs["dtype"] = dtype
62
+
63
+ model = OmniVoice.from_pretrained(args.model, **load_kwargs)
64
+ if hasattr(torch, "set_grad_enabled"):
65
+ torch.set_grad_enabled(False)
66
+
67
+ audio = _filtered_call(
68
+ model.generate,
69
+ text=args.text,
70
+ ref_audio=args.ref_audio,
71
+ ref_text=args.ref_text,
72
+ language=args.language,
73
+ speaker=args.speaker,
74
+ )
75
+ if isinstance(audio, tuple):
76
+ audio = audio[0]
77
+ if isinstance(audio, list):
78
+ if not audio:
79
+ raise RuntimeError("OmniVoice returned no audio")
80
+ audio = audio[0]
81
+
82
+ out = Path(args.output)
83
+ out.parent.mkdir(parents=True, exist_ok=True)
84
+ sf.write(str(out), audio, 24000)
85
+ if not out.exists() or out.stat().st_size <= 0:
86
+ raise RuntimeError(f"OmniVoice wrote empty output: {out}")
87
+
88
+
89
+ def main(argv: list[str] | None = None) -> int:
90
+ parser = argparse.ArgumentParser(description="VerbalCoding OmniVoice TTS wrapper")
91
+ parser.add_argument("--text", required=True)
92
+ parser.add_argument("--output", required=True)
93
+ parser.add_argument("--model", default="k2-fsa/OmniVoice")
94
+ parser.add_argument("--device", default="mps")
95
+ parser.add_argument("--dtype", default="float16")
96
+ parser.add_argument("--ref-audio", default="")
97
+ parser.add_argument("--ref-text", default="")
98
+ parser.add_argument("--language", default="ko")
99
+ parser.add_argument("--speaker", default="")
100
+ args = parser.parse_args(argv)
101
+ synthesize(args)
102
+ return 0
103
+
104
+
105
+ if __name__ == "__main__": # pragma: no cover
106
+ try:
107
+ raise SystemExit(main())
108
+ except Exception as exc:
109
+ print(f"OmniVoice synthesis failed: {exc}", file=sys.stderr)
110
+ raise SystemExit(1)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "verbalcoding",
3
- "version": "0.2.11",
3
+ "version": "0.2.13",
4
4
  "description": "Discord voice bridge for CLI coding agents.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -35,12 +35,18 @@
35
35
  "scripts/*.mjs",
36
36
  "scripts/*.sh",
37
37
  "integrations/openvoice/*.py",
38
+ "integrations/omnivoice/*.py",
39
+ "integrations/fireredtts2/*.py",
40
+ "integrations/mlxaudio/*.py",
41
+ "integrations/neuttsair/*.py",
38
42
  "run.sh",
39
43
  ".env.example",
40
44
  "README.md",
45
+ "README.*.md",
41
46
  "LICENSE"
42
47
  ],
43
48
  "scripts": {
49
+ "postinstall": "node scripts/postinstall.mjs",
44
50
  "start": "node app-node/main.mjs",
45
51
  "setup": "node scripts/install.mjs",
46
52
  "doctor": "node scripts/doctor.mjs",
package/scripts/cli.mjs CHANGED
@@ -10,7 +10,17 @@ import {
10
10
  normalizeInstanceAnswers,
11
11
  parseKeyValueEnv,
12
12
  renderInstanceSetupSummary,
13
+ SUPPORTED_TTS_BACKENDS,
13
14
  } from '../app-node/install_config.mjs';
15
+ import {
16
+ applyTtsVoiceSelectionToEnv,
17
+ defaultTtsVoiceConfig,
18
+ effectiveTtsVoiceSelection,
19
+ readTtsVoiceConfig,
20
+ updateTtsVoiceConfig,
21
+ writeTtsVoiceConfig,
22
+ } from '../app-node/tts_voice_config.mjs';
23
+ import { normalizeTtsBackendName } from '../app-node/tts_settings.mjs';
14
24
  import { ensureHermesProfile, validateProfileName } from '../app-node/hermes_profiles.mjs';
15
25
  import { checkInstanceConfigs } from '../app-node/instance_doctor.mjs';
16
26
  import { healInstanceProfileFromEnv } from '../app-node/instance_profile_lifecycle.mjs';
@@ -38,6 +48,8 @@ Usage:
38
48
  vc language <ko|en|auto>
39
49
  vc language status
40
50
  vc restart auto <on|off|status>
51
+ vc tts backend <${SUPPORTED_TTS_BACKENDS.join('|')}>
52
+ vc tts status
41
53
  vc bot invite <client-id> [--guild <guild-id>]
42
54
  vc instance list
43
55
  vc instance setup [name] [--start]
@@ -48,14 +60,16 @@ Usage:
48
60
  vc doctor
49
61
 
50
62
  Examples:
51
- npx verbalcoding setup --yes
52
- vc setup --yes
53
- vc setup token
63
+ npx verbalcoding setup
64
+ vc setup
65
+ vc setup --yes # automation/non-interactive starter config
66
+ vc setup token # later token update
54
67
  vc setup channels "General,Team Voice"
55
68
  vc start
56
69
  vc language en
57
70
  vc language ko
58
71
  vc language auto
72
+ vc tts backend qwen3
59
73
  vc restart auto off
60
74
  vc bot invite 123456789012345678
61
75
  `;
@@ -96,6 +110,56 @@ function printLanguageStatus(values) {
96
110
  console.log(`TTS voice: ${s.ttsVoice}`);
97
111
  }
98
112
 
113
+ function ttsVoiceConfigPath(values = readEnvFile()) {
114
+ const configured = values.TTS_VOICE_CONFIG || process.env.TTS_VOICE_CONFIG || path.join('config', 'tts-voices.json');
115
+ return path.isAbsolute(configured) ? configured : path.join(ROOT, configured);
116
+ }
117
+
118
+ function compactUpdates(updates) {
119
+ return Object.fromEntries(Object.entries(updates).filter(([, value]) => value != null && value !== ''));
120
+ }
121
+
122
+ function printTtsStatus() {
123
+ const env = readEnvFile();
124
+ const config = readTtsVoiceConfig(ttsVoiceConfigPath(env), defaultTtsVoiceConfig());
125
+ const selected = effectiveTtsVoiceSelection(config, env);
126
+ console.log(`TTS backend: ${selected.backend}`);
127
+ console.log(`TTS voice type: ${selected.voiceType}`);
128
+ console.log(`TTS voice: ${selected.voice?.label || selected.voice?.voice || '-'}`);
129
+ }
130
+
131
+ function setTtsBackendFromCli(rawBackend, rawVoiceType = '') {
132
+ const backend = normalizeTtsBackendName(rawBackend, '');
133
+ if (!backend) {
134
+ throw new Error(`Unknown TTS backend: ${rawBackend}. Supported: ${SUPPORTED_TTS_BACKENDS.join(', ')}`);
135
+ }
136
+ const env = readEnvFile();
137
+ const configPath = ttsVoiceConfigPath(env);
138
+ const baseConfig = readTtsVoiceConfig(configPath, defaultTtsVoiceConfig());
139
+ const nextConfig = updateTtsVoiceConfig(baseConfig, { backend, voiceType: rawVoiceType });
140
+ writeTtsVoiceConfig(configPath, nextConfig);
141
+ const selected = effectiveTtsVoiceSelection(nextConfig, { ...env, TTS_BACKEND: backend, TTS_VOICE_TYPE: rawVoiceType || env.TTS_VOICE_TYPE });
142
+ const nextEnv = applyTtsVoiceSelectionToEnv(env, selected);
143
+ upsertEnvFile(ENV_PATH, compactUpdates({
144
+ TTS_BACKEND: nextEnv.TTS_BACKEND,
145
+ TTS_VOICE_TYPE: nextEnv.TTS_VOICE_TYPE,
146
+ TTS_VOICE: nextEnv.TTS_VOICE || env.TTS_VOICE,
147
+ VOICE_LANGUAGE: nextEnv.VOICE_LANGUAGE || env.VOICE_LANGUAGE,
148
+ QWEN3TTS_MODE: nextEnv.QWEN3TTS_MODE,
149
+ QWEN3TTS_SPEAKER: nextEnv.QWEN3TTS_SPEAKER,
150
+ QWEN3TTS_REF_AUDIO: nextEnv.QWEN3TTS_REF_AUDIO,
151
+ QWEN3TTS_INSTRUCT: nextEnv.QWEN3TTS_INSTRUCT,
152
+ MLXAUDIO_VOICE: nextEnv.MLXAUDIO_VOICE,
153
+ FIREREDTTS2_PROMPT_AUDIO: nextEnv.FIREREDTTS2_PROMPT_AUDIO,
154
+ MOSSTTSNANO_MODE: nextEnv.MOSSTTSNANO_MODE,
155
+ MOSSTTSNANO_PROMPT_AUDIO: nextEnv.MOSSTTSNANO_PROMPT_AUDIO,
156
+ }));
157
+ console.log(`Updated ${ENV_PATH}`);
158
+ console.log(`TTS backend: ${selected.backend}`);
159
+ console.log(`TTS voice type: ${selected.voiceType}`);
160
+ console.log('Restart the bridge for CLI changes to take effect; voice requests switch the running bridge immediately.');
161
+ }
162
+
99
163
  function printInstanceStatus(statuses) {
100
164
  if (statuses.length === 0) {
101
165
  console.log('No instance env files found in instances/*.env');
@@ -307,9 +371,30 @@ async function main(argv = process.argv.slice(2)) {
307
371
  }
308
372
  if (command === 'status') {
309
373
  printLanguageStatus(readEnvFile());
374
+ printTtsStatus();
310
375
  console.log(autoRestartStatusText(readEnvFile()));
311
376
  return;
312
377
  }
378
+ if (command === 'tts' || command === 'voice') {
379
+ if (!subcommand || subcommand === 'status') {
380
+ printTtsStatus();
381
+ return;
382
+ }
383
+ if (subcommand === 'backend' || subcommand === 'switch' || subcommand === 'set') {
384
+ const backend = argv[2];
385
+ const voiceType = argv.includes('--voice-type') ? argv[argv.indexOf('--voice-type') + 1] : '';
386
+ if (!backend || backend.startsWith('--')) {
387
+ console.error(`Use: vc tts backend <${SUPPORTED_TTS_BACKENDS.join('|')}> [--voice-type <name>]`);
388
+ process.exitCode = 2;
389
+ return;
390
+ }
391
+ setTtsBackendFromCli(backend, voiceType);
392
+ return;
393
+ }
394
+ console.error('Use: vc tts status OR vc tts backend <name>');
395
+ process.exitCode = 2;
396
+ return;
397
+ }
313
398
  if (command === 'instance') {
314
399
  await handleInstanceCommand(argv);
315
400
  return;
@@ -5,6 +5,7 @@ import { spawnSync } from 'node:child_process';
5
5
  import { parseKeyValueEnv } from '../app-node/install_config.mjs';
6
6
  import { checkInstanceConfigs, formatInstanceDoctor } from '../app-node/instance_doctor.mjs';
7
7
  import { autoRestartVoiceBotEnabled } from '../app-node/restart_policy.mjs';
8
+ import { detectInstalledAgents, formatAgentDetectionReport } from '../app-node/agent_detect.mjs';
8
9
 
9
10
  const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..');
10
11
  const args = process.argv.slice(2);
@@ -19,12 +20,25 @@ function readEnvFile(file) {
19
20
  }
20
21
  }
21
22
 
23
+ function dropUnexpandedRefs(env) {
24
+ // parseKeyValueEnv does no shell expansion, so values like
25
+ // PATH="$JAVA_HOME/bin:$PATH"
26
+ // would otherwise clobber process.env.PATH with a literal "$VAR" string,
27
+ // breaking every JS-level PATH lookup (e.g. agent_detect.defaultWhich).
28
+ const out = {};
29
+ for (const [key, value] of Object.entries(env)) {
30
+ if (typeof value === 'string' && /\$[A-Za-z_][A-Za-z0-9_]*|\$\{[^}]+\}/.test(value)) continue;
31
+ out[key] = value;
32
+ }
33
+ return out;
34
+ }
35
+
22
36
  function mergeEnv() {
23
37
  // Project .env intentionally wins over ~/.zshrc so local setup is reproducible.
24
38
  return {
25
39
  ...process.env,
26
- ...readEnvFile(path.join(process.env.HOME || '', '.zshrc')),
27
- ...readEnvFile(path.join(ROOT, '.env')),
40
+ ...dropUnexpandedRefs(readEnvFile(path.join(process.env.HOME || '', '.zshrc'))),
41
+ ...dropUnexpandedRefs(readEnvFile(path.join(ROOT, '.env'))),
28
42
  };
29
43
  }
30
44
 
@@ -104,10 +118,43 @@ function fixablePrerequisites(env) {
104
118
  const edgeCommand = env.EDGE_TTS_COMMAND || env.TTS_EDGE_COMMAND || 'edge-tts';
105
119
  if (!resolveCommand(edgeCommand, [path.join(ROOT, '.venv-tts', 'bin', 'edge-tts')])) missing.push('edge-tts');
106
120
  }
121
+ if (ttsBackend === 'fireredtts2') {
122
+ const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
123
+ const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
124
+ const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
125
+ if (!isExecutable(firePath) || !fs.existsSync(fireModel)) missing.push('FireRedTTS-2');
126
+ }
107
127
  if (backend === 'hermes' && !commandExists('hermes')) missing.push('hermes CLI');
108
128
  return missing;
109
129
  }
110
130
 
131
+ function installFireRedTts2IfNeeded(env) {
132
+ const ttsBackend = (env.TTS_BACKEND || 'edge').toLowerCase();
133
+ if (ttsBackend !== 'fireredtts2') return false;
134
+ const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
135
+ const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
136
+ const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
137
+ if (isExecutable(firePath) && fs.existsSync(fireModel)) return false;
138
+ if (['0', 'false', 'no', 'off'].includes(String(process.env.VERBALCODING_DOCTOR_INSTALL_FIREREDTTS2 || '1').toLowerCase())) {
139
+ console.log('Skipping FireRedTTS-2 auto-install because VERBALCODING_DOCTOR_INSTALL_FIREREDTTS2 is off.');
140
+ return false;
141
+ }
142
+ console.log('VerbalCoding doctor: TTS_BACKEND=fireredtts2 but FireRedTTS-2 is missing; installing...');
143
+ const result = spawnSync('bash', [path.join(ROOT, 'scripts', 'install_fireredtts2.sh'), '--yes'], {
144
+ cwd: ROOT,
145
+ stdio: 'inherit',
146
+ env: process.env,
147
+ });
148
+ if (result.status !== 0) {
149
+ console.log(`FireRedTTS-2 installer exited with status ${result.status}. Continuing with checks.`);
150
+ }
151
+ upsertEnvFile(path.join(ROOT, '.env'), {
152
+ FIREREDTTS2_COMMAND: './.local/bin/fireredtts2',
153
+ FIREREDTTS2_PRETRAINED_DIR: 'pretrained_models/FireRedTTS2',
154
+ });
155
+ return true;
156
+ }
157
+
111
158
  function installHermesCliIfNeeded(env) {
112
159
  const backend = (env.AGENT_BACKEND || 'hermes').toLowerCase();
113
160
  if (backend !== 'hermes' || commandExists('hermes')) return false;
@@ -191,6 +238,11 @@ if (autoFixEnabled && missingBeforeFix.length > 0) {
191
238
  env = mergeEnv();
192
239
  }
193
240
  if (autoFixEnabled) {
241
+ const fireAttempted = installFireRedTts2IfNeeded(env);
242
+ if (fireAttempted) {
243
+ console.log('');
244
+ env = mergeEnv();
245
+ }
194
246
  const hermesAttempted = installHermesCliIfNeeded(env);
195
247
  if (hermesAttempted) {
196
248
  console.log('');
@@ -210,6 +262,16 @@ if (!autoFixEnabled) note('Automatic prerequisite bootstrap', 'off');
210
262
  if (autoFixAttempted) note('Automatic prerequisite bootstrap', 'attempted');
211
263
  console.log('');
212
264
 
265
+ try {
266
+ const detection = await detectInstalledAgents(env);
267
+ console.log(formatAgentDetectionReport(detection));
268
+ const selected = detection.find(r => r.backend === backend || r.backend === backend.replace(/-/g, ''));
269
+ if (selected && !selected.present) note(`Selected backend "${backend}"`, `binary ${selected.bin} not on PATH`);
270
+ console.log('');
271
+ } catch (e) {
272
+ note('Agent backend detection', `skipped: ${e?.message || e}`);
273
+ }
274
+
213
275
  const nodeCommand = commandExists('node');
214
276
  const npmCommand = commandExists('npm');
215
277
  const ffmpegCommand = commandExists('ffmpeg');
@@ -235,8 +297,8 @@ note('Progress/voice language', env.VOICE_LANGUAGE || env.WHISPER_CPP_LANGUAGE |
235
297
  note('Latency log path', env.LATENCY_LOG_PATH || './.logs/latency.jsonl');
236
298
  note('TTS voice fallback', env.TTS_VOICE || 'ko-KR-SunHiNeural');
237
299
 
238
- if (!['edge', 'openvoice', 'speechswift', 'supertonic'].includes(ttsBackend)) {
239
- ok = check('TTS_BACKEND value', false, 'must be edge, openvoice, speechswift, or supertonic') && ok;
300
+ if (!['edge', 'openvoice', 'speechswift', 'supertonic', 'omnivoice', 'qwen3tts', 'mlxaudio', 'fireredtts2', 'mossttsnano', 'neuttsair'].includes(ttsBackend)) {
301
+ ok = check('TTS_BACKEND value', false, 'must be edge, openvoice, speechswift, supertonic, omnivoice, qwen3tts, mlxaudio, fireredtts2, mossttsnano, or neuttsair') && ok;
240
302
  }
241
303
  if (ttsBackend === 'edge') {
242
304
  const edgeCommand = env.EDGE_TTS_COMMAND || env.TTS_EDGE_COMMAND || 'edge-tts';
@@ -261,6 +323,55 @@ if (ttsBackend === 'edge') {
261
323
  ok = check('supertonic CLI', commandExists(supertonicCommand), commandExists(supertonicCommand) || 'install with: python3 -m pip install supertonic') && ok;
262
324
  note('Supertonic voice/lang/steps', `${env.SUPERTONIC_VOICE || 'M1'} / ${env.SUPERTONIC_LANGUAGE || 'ko'} / ${env.SUPERTONIC_STEPS || '2'}`);
263
325
  note('Supertonic progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.SUPERTONIC_PROGRESS || '0').toLowerCase()) ? 'supertonic' : 'edge fallback');
326
+ } else if (ttsBackend === 'omnivoice') {
327
+ const omniPython = env.OMNIVOICE_PYTHON || path.join(ROOT, '.venv-omnivoice', 'bin', 'python');
328
+ const resolvedOmniPython = path.isAbsolute(omniPython) ? omniPython : path.resolve(ROOT, omniPython);
329
+ const refAudio = path.resolve(ROOT, env.OMNIVOICE_REF_AUDIO || env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
330
+ ok = check('OmniVoice Python', fs.existsSync(resolvedOmniPython) || commandExists(omniPython), fs.existsSync(resolvedOmniPython) ? path.relative(ROOT, resolvedOmniPython) : 'install with: python -m venv .venv-omnivoice && .venv-omnivoice/bin/pip install torch torchaudio soundfile omnivoice') && ok;
331
+ ok = check('OmniVoice reference audio', fs.existsSync(refAudio), path.relative(ROOT, refAudio)) && ok;
332
+ ok = check('OmniVoice synth wrapper help', spawnSync(fs.existsSync(resolvedOmniPython) ? resolvedOmniPython : 'python3', ['integrations/omnivoice/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/omnivoice/synth.py') && ok;
333
+ note('OmniVoice model/device', `${env.OMNIVOICE_MODEL || 'k2-fsa/OmniVoice'} / ${env.OMNIVOICE_DEVICE || 'mps'}`);
334
+ note('OmniVoice progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.OMNIVOICE_PROGRESS || '0').toLowerCase()) ? 'omnivoice' : 'edge fallback');
335
+ } else if (ttsBackend === 'qwen3tts') {
336
+ const qwenCommand = env.QWEN3TTS_COMMAND || 'audio';
337
+ ok = check('Qwen3 TTS audio CLI', commandExists(qwenCommand), commandExists(qwenCommand) || 'install speech-swift/audio first') && ok;
338
+ note('Qwen3 speaker', env.QWEN3TTS_SPEAKER || 'sohee');
339
+ note('Qwen3 progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.QWEN3TTS_PROGRESS || '0').toLowerCase()) ? 'qwen3tts' : 'edge fallback');
340
+ } else if (ttsBackend === 'mlxaudio') {
341
+ const mlxPython = env.MLXAUDIO_PYTHON || './.venv-mlxaudio/bin/python';
342
+ const mlxPath = path.isAbsolute(mlxPython) ? mlxPython : path.resolve(ROOT, mlxPython);
343
+ ok = check('MLX Audio Python', isExecutable(mlxPath) || commandExists(mlxPython), isExecutable(mlxPath) ? path.relative(ROOT, mlxPath) : (commandExists(mlxPython) || 'install with: scripts/install_mlxaudio.sh --yes')) && ok;
344
+ ok = check('MLX Audio wrapper help', spawnSync(isExecutable(mlxPath) ? mlxPath : 'python3', ['integrations/mlxaudio/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/mlxaudio/synth.py') && ok;
345
+ note('MLX Audio model/voice', `${env.MLXAUDIO_MODEL || 'mlx-community/Qwen3-TTS-12Hz-1.7B-Base-8bit'} / ${env.MLXAUDIO_VOICE || 'Chelsie'}`);
346
+ note('MLX Audio progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.MLXAUDIO_PROGRESS || '0').toLowerCase()) ? 'mlxaudio' : 'edge fallback');
347
+ } else if (ttsBackend === 'fireredtts2') {
348
+ const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
349
+ const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
350
+ const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
351
+ ok = check('FireRedTTS-2 wrapper', isExecutable(firePath), path.relative(ROOT, firePath) || firePath) && ok;
352
+ ok = check('FireRedTTS-2 model', fs.existsSync(fireModel), path.relative(ROOT, fireModel)) && ok;
353
+ ok = check('FireRedTTS-2 synth wrapper help', spawnSync(isExecutable(firePath) ? firePath : process.execPath, isExecutable(firePath) ? ['--help'] : ['integrations/fireredtts2/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/fireredtts2/synth.py') && ok;
354
+ note('FireRedTTS-2 progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.FIREREDTTS2_PROGRESS || '0').toLowerCase()) ? 'fireredtts2' : 'edge fallback');
355
+ } else if (ttsBackend === 'mossttsnano') {
356
+ const mossCommand = env.MOSSTTSNANO_COMMAND || './.venv-mossttsnano/bin/python';
357
+ const mossPath = path.isAbsolute(mossCommand) ? mossCommand : path.resolve(ROOT, mossCommand);
358
+ const mossScript = path.resolve(ROOT, env.MOSSTTSNANO_SCRIPT || 'vendor/MOSS-TTS-Nano/infer.py');
359
+ ok = check('MOSS-TTS-Nano Python', isExecutable(mossPath) || commandExists(mossCommand), isExecutable(mossPath) ? path.relative(ROOT, mossPath) : (commandExists(mossCommand) || 'missing')) && ok;
360
+ ok = check('MOSS-TTS-Nano infer.py', fs.existsSync(mossScript), path.relative(ROOT, mossScript)) && ok;
361
+ note('MOSS checkpoint', env.MOSSTTSNANO_CHECKPOINT || 'OpenMOSS-Team/MOSS-TTS-Nano');
362
+ note('MOSS progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.MOSSTTSNANO_PROGRESS || '0').toLowerCase()) ? 'mossttsnano' : 'edge fallback');
363
+ } else if (ttsBackend === 'neuttsair') {
364
+ const neuPython = env.NEUTTSAIR_PYTHON || './.venv-neuttsair/bin/python';
365
+ const neuPath = path.isAbsolute(neuPython) ? neuPython : path.resolve(ROOT, neuPython);
366
+ const neuScript = path.resolve(ROOT, env.NEUTTSAIR_SCRIPT || 'integrations/neuttsair/synth.py');
367
+ const refAudio = path.resolve(ROOT, env.NEUTTSAIR_REF_AUDIO || env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
368
+ ok = check('NeuTTS Air Python', isExecutable(neuPath) || commandExists(neuPython), isExecutable(neuPath) ? path.relative(ROOT, neuPath) : (commandExists(neuPython) || 'install with: python3 -m venv .venv-neuttsair && .venv-neuttsair/bin/pip install -e vendor/neutts-air')) && ok;
369
+ ok = check('NeuTTS Air wrapper', fs.existsSync(neuScript), path.relative(ROOT, neuScript)) && ok;
370
+ ok = check('NeuTTS Air reference audio', fs.existsSync(refAudio), path.relative(ROOT, refAudio)) && ok;
371
+ ok = check('NeuTTS Air synth wrapper help', spawnSync(isExecutable(neuPath) ? neuPath : 'python3', ['integrations/neuttsair/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/neuttsair/synth.py') && ok;
372
+ note('NeuTTS Air backbone/device', `${env.NEUTTSAIR_BACKBONE_REPO || env.NEUTTSAIR_BACKBONE || 'neuphonic/neutts-air-q4-gguf'} / ${env.NEUTTSAIR_BACKBONE_DEVICE || env.NEUTTSAIR_DEVICE || 'mps'}`);
373
+ note('NeuTTS Air codec/device', `${env.NEUTTSAIR_CODEC_REPO || env.NEUTTSAIR_CODEC || 'neuphonic/neucodec'} / ${env.NEUTTSAIR_CODEC_DEVICE || env.NEUTTSAIR_DEVICE || 'mps'}`);
374
+ note('NeuTTS Air progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.NEUTTSAIR_PROGRESS || '0').toLowerCase()) ? 'neuttsair' : 'edge fallback');
264
375
  }
265
376
 
266
377
  const backendCommand = {
@@ -4,6 +4,7 @@ import path from 'node:path';
4
4
  import readline from 'node:readline/promises';
5
5
  import { stdin as input, stdout as output } from 'node:process';
6
6
  import { buildEnvFile, normalizeInstallAnswers, renderInstallSummary, SUPPORTED_HARNESSES } from '../app-node/install_config.mjs';
7
+ import { detectInstalledAgents, pickDefaultBackend, formatAgentDetectionReport } from '../app-node/agent_detect.mjs';
7
8
 
8
9
  const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..');
9
10
 
@@ -138,7 +139,20 @@ async function main() {
138
139
  try {
139
140
  console.log('VerbalCoding installer');
140
141
  console.log(`Supported harnesses: ${SUPPORTED_HARNESSES.join(', ')}`);
141
- const harness = await ask('Harness/backend', 'hermes');
142
+ console.log('Discord setup: keep https://discord.com/developers/applications open.');
143
+ console.log('Create an application/bot, enable Message Content intent, then paste the bot token and application/client ID below.');
144
+ console.log('If you are not ready, press Enter to skip and run `vc setup token` / `vc setup channels` later.');
145
+ let detectionDefault = 'hermes';
146
+ try {
147
+ const detection = await detectInstalledAgents(process.env);
148
+ console.log('');
149
+ console.log(formatAgentDetectionReport(detection));
150
+ detectionDefault = pickDefaultBackend(detection, process.env.AGENT_BACKEND);
151
+ console.log('');
152
+ } catch (e) {
153
+ console.log(`(agent detection skipped: ${e?.message || e})`);
154
+ }
155
+ const harness = await ask('Harness/backend', detectionDefault);
142
156
  let agentCommand = '';
143
157
  let agentLabel = '';
144
158
  if (harness.toLowerCase() === 'custom') {
@@ -152,7 +166,7 @@ async function main() {
152
166
  const autoJoinVoiceChannels = await ask('Auto-join voice channel names', process.env.AUTO_JOIN_VOICE_CHANNELS || '일반,General,general');
153
167
  const transcriptChannelId = await ask('Transcript text channel/thread ID', process.env.TRANSCRIPT_CHANNEL_ID || '');
154
168
  const language = await ask('Default voice language: ko/en/auto', process.env.VOICE_LANGUAGE || process.env.WHISPER_CPP_LANGUAGE || process.env.STT_LANGUAGE || 'ko');
155
- const ttsBackend = await ask('TTS backend: edge/openvoice/speechswift/supertonic', process.env.TTS_BACKEND || 'edge');
169
+ const ttsBackend = await ask('TTS backend: edge/openvoice/speechswift/supertonic/omnivoice/qwen3tts/mlxaudio/fireredtts2/mossttsnano/neuttsair', process.env.TTS_BACKEND || 'edge');
156
170
  const edgeTtsCommand = await ask('Edge TTS command', process.env.EDGE_TTS_COMMAND || process.env.TTS_EDGE_COMMAND || 'edge-tts');
157
171
  const ttsVoice = await ask('TTS voice', process.env.TTS_VOICE || 'ko-KR-SunHiNeural');
158
172
  const ttsRate = await ask('TTS rate', process.env.TTS_RATE || '+10%');
@@ -165,6 +179,8 @@ async function main() {
165
179
  const openvoiceDir = await ask('OpenVoice repo dir', process.env.OPENVOICE_DIR || './vendor/OpenVoice');
166
180
  const openvoiceVenv = await ask('OpenVoice venv dir', process.env.OPENVOICE_VENV || './.venv-openvoice');
167
181
  const openvoiceRefAudio = await ask('OpenVoice reference audio path', process.env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
182
+ const omnivoicePython = await ask('OmniVoice Python', process.env.OMNIVOICE_PYTHON || './.venv-omnivoice/bin/python');
183
+ const omnivoiceRefAudio = await ask('OmniVoice reference audio path', process.env.OMNIVOICE_REF_AUDIO || process.env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
168
184
  const requireWake = (await ask('Require wake word? 1/0', process.env.REQUIRE_WAKE_WORD || '0')) === '1';
169
185
  const verboseProgress = (await ask('Verbose progress by default? 1/0', process.env.AGENT_VERBOSE_PROGRESS || process.env.VERBALCODING_VERBOSE_PROGRESS || '0')) === '1';
170
186
  const utteranceIdleMs = await ask('Utterance idle wait before STT, ms', process.env.UTTERANCE_IDLE_MS || '4500');
@@ -193,6 +209,8 @@ async function main() {
193
209
  openvoiceDir,
194
210
  openvoiceVenv,
195
211
  openvoiceRefAudio,
212
+ omnivoicePython,
213
+ omnivoiceRefAudio,
196
214
  requireWakeWord: requireWake,
197
215
  verboseProgress,
198
216
  utteranceIdleMs,