verbalcoding 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. package/.env.example +74 -4
  2. package/README.es.md +3 -1
  3. package/README.fr.md +3 -1
  4. package/README.ja.md +3 -1
  5. package/README.ko.md +4 -2
  6. package/README.md +4 -2
  7. package/README.ru.md +3 -1
  8. package/README.zh.md +3 -1
  9. package/app-node/agent_adapters.test.mjs +14 -0
  10. package/app-node/agent_routing.mjs +148 -0
  11. package/app-node/agent_routing.test.mjs +138 -0
  12. package/app-node/agent_turn.mjs +86 -0
  13. package/app-node/agent_turn.test.mjs +109 -0
  14. package/app-node/bridge_context.mjs +73 -0
  15. package/app-node/bridge_context.test.mjs +54 -0
  16. package/app-node/bridge_state.mjs +4 -0
  17. package/app-node/bridge_wireup.test.mjs +462 -0
  18. package/app-node/cli_install.test.mjs +31 -0
  19. package/app-node/cross_agent_routing.test.mjs +78 -0
  20. package/app-node/discord_command_router.mjs +204 -0
  21. package/app-node/discord_command_router.test.mjs +311 -0
  22. package/app-node/discord_voice_setup.mjs +251 -0
  23. package/app-node/discord_voice_setup.test.mjs +86 -0
  24. package/app-node/hermes_profiles.test.mjs +12 -1
  25. package/app-node/install_config.mjs +110 -3
  26. package/app-node/install_config.test.mjs +8 -0
  27. package/app-node/instance_doctor.test.mjs +9 -0
  28. package/app-node/instances.test.mjs +8 -1
  29. package/app-node/main.mjs +488 -1368
  30. package/app-node/mcp_tools.test.mjs +7 -0
  31. package/app-node/notification_handler.mjs +89 -0
  32. package/app-node/notification_handler.test.mjs +187 -0
  33. package/app-node/plan_dispatcher.mjs +215 -0
  34. package/app-node/plan_dispatcher.test.mjs +101 -0
  35. package/app-node/plan_mode.mjs +36 -7
  36. package/app-node/plan_mode.test.mjs +78 -0
  37. package/app-node/progress_handler.mjs +220 -0
  38. package/app-node/progress_handler.test.mjs +193 -0
  39. package/app-node/progress_speech.mjs +54 -32
  40. package/app-node/progress_speech.test.mjs +12 -3
  41. package/app-node/project_sessions.mjs +5 -2
  42. package/app-node/project_sessions.test.mjs +7 -0
  43. package/app-node/research_mode.mjs +282 -0
  44. package/app-node/research_mode.test.mjs +264 -0
  45. package/app-node/restart_notice.mjs +3 -0
  46. package/app-node/restart_notice.test.mjs +11 -0
  47. package/app-node/session_ontology.mjs +271 -0
  48. package/app-node/session_ontology.test.mjs +130 -0
  49. package/app-node/smart_progress.mjs +1 -1
  50. package/app-node/stream_sentencer.mjs +32 -2
  51. package/app-node/stream_sentencer.test.mjs +65 -0
  52. package/app-node/streaming_tts_queue.mjs +5 -1
  53. package/app-node/streaming_tts_queue.test.mjs +7 -1
  54. package/app-node/stt_whisper.mjs +24 -0
  55. package/app-node/stt_whisper.test.mjs +32 -0
  56. package/app-node/text_routing.mjs +4 -2
  57. package/app-node/tts_backends.mjs +537 -3
  58. package/app-node/tts_backends.test.mjs +454 -0
  59. package/app-node/tts_player.mjs +164 -0
  60. package/app-node/tts_player.test.mjs +202 -0
  61. package/app-node/tts_runtime.mjs +134 -0
  62. package/app-node/tts_runtime.test.mjs +89 -0
  63. package/app-node/tts_settings.mjs +150 -3
  64. package/app-node/tts_settings.test.mjs +204 -0
  65. package/app-node/tts_voice_config.mjs +136 -2
  66. package/app-node/tts_voice_config.test.mjs +94 -0
  67. package/app-node/utterance_router.mjs +216 -0
  68. package/app-node/utterance_router.test.mjs +236 -0
  69. package/app-node/voice_autojoin.mjs +37 -0
  70. package/app-node/voice_autojoin.test.mjs +59 -0
  71. package/app-node/voice_io.mjs +272 -0
  72. package/app-node/voice_io.test.mjs +102 -0
  73. package/app-node/voice_turn_runner.mjs +449 -0
  74. package/app-node/voice_turn_runner.test.mjs +289 -0
  75. package/docs/CONFIGURATION.md +12 -2
  76. package/docs/HARNESSES.md +58 -0
  77. package/docs/HARNESS_AIDER.md +50 -0
  78. package/docs/HARNESS_CLAUDE.md +56 -0
  79. package/docs/HARNESS_CODEX.md +56 -0
  80. package/docs/HARNESS_CURSOR.md +45 -0
  81. package/docs/HARNESS_GEMINI.md +45 -0
  82. package/docs/HARNESS_HERMES.md +57 -0
  83. package/docs/HARNESS_OPENCLAW.md +44 -0
  84. package/docs/HARNESS_OPENCODE.md +44 -0
  85. package/docs/README.md +1 -0
  86. package/docs/ROADMAP.md +20 -5
  87. package/docs/TTS_BACKENDS.md +227 -0
  88. package/docs/USAGE.md +22 -0
  89. package/docs/i18n/AGENTS.es.md +34 -0
  90. package/docs/i18n/AGENTS.fr.md +34 -0
  91. package/docs/i18n/AGENTS.ja.md +34 -0
  92. package/docs/i18n/AGENTS.ko.md +34 -0
  93. package/docs/i18n/AGENTS.ru.md +34 -0
  94. package/docs/i18n/AGENTS.zh.md +34 -0
  95. package/docs/i18n/HARNESSES.es.md +58 -0
  96. package/docs/i18n/HARNESSES.fr.md +58 -0
  97. package/docs/i18n/HARNESSES.ja.md +58 -0
  98. package/docs/i18n/HARNESSES.ko.md +58 -0
  99. package/docs/i18n/HARNESSES.ru.md +58 -0
  100. package/docs/i18n/HARNESSES.zh.md +58 -0
  101. package/docs/i18n/HARNESS_AIDER.es.md +48 -0
  102. package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
  103. package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
  104. package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
  105. package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
  106. package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
  107. package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
  108. package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
  109. package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
  110. package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
  111. package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
  112. package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
  113. package/docs/i18n/HARNESS_CODEX.es.md +55 -0
  114. package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
  115. package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
  116. package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
  117. package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
  118. package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
  119. package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
  120. package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
  121. package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
  122. package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
  123. package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
  124. package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
  125. package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
  126. package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
  127. package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
  128. package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
  129. package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
  130. package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
  131. package/docs/i18n/HARNESS_HERMES.es.md +54 -0
  132. package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
  133. package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
  134. package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
  135. package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
  136. package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
  137. package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
  138. package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
  139. package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
  140. package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
  141. package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
  142. package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
  143. package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
  144. package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
  145. package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
  146. package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
  147. package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
  148. package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
  149. package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
  150. package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
  151. package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
  152. package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
  153. package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
  154. package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
  155. package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
  156. package/integrations/fireredtts2/mlx_llm.py +183 -0
  157. package/integrations/fireredtts2/synth.py +156 -0
  158. package/integrations/fireredtts2/synth_mlx.py +196 -0
  159. package/integrations/mlxaudio/synth.py +74 -0
  160. package/integrations/neuttsair/synth.py +104 -0
  161. package/integrations/omnivoice/synth.py +110 -0
  162. package/package.json +6 -1
  163. package/scripts/cli.mjs +84 -0
  164. package/scripts/doctor.mjs +104 -4
  165. package/scripts/install.mjs +5 -1
  166. package/scripts/install_fireredtts2.sh +109 -0
  167. package/scripts/install_mlxaudio.sh +34 -0
  168. package/scripts/install_mossttsnano.sh +46 -0
  169. package/scripts/postinstall.mjs +34 -0
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import os
6
+ import sys
7
+ from pathlib import Path
8
+
9
+
10
+ def repo_root() -> Path:
11
+ return Path(__file__).resolve().parents[2]
12
+
13
+
14
+ def resolve(root: Path, value: str | None) -> str | None:
15
+ if not value:
16
+ return None
17
+ p = Path(value).expanduser()
18
+ if not p.is_absolute():
19
+ p = root / p
20
+ return str(p)
21
+
22
+
23
+ def read_text_arg(value: str | None) -> str:
24
+ if not value:
25
+ return ""
26
+ p = Path(value).expanduser()
27
+ if p.exists():
28
+ return p.read_text(encoding="utf-8").strip()
29
+ return value.strip()
30
+
31
+
32
+ def parse_args() -> argparse.Namespace:
33
+ parser = argparse.ArgumentParser(description="NeuTTS-Air synthesis wrapper for VerbalCoding")
34
+ parser.add_argument("--text", required=True)
35
+ parser.add_argument("--output", required=True)
36
+ parser.add_argument("--backbone", "--backbone-repo", dest="backbone", default="neuphonic/neutts-air-q4-gguf")
37
+ parser.add_argument("--codec", "--codec-repo", dest="codec", default="neuphonic/neucodec")
38
+ parser.add_argument("--backbone-device", default="cpu")
39
+ parser.add_argument("--codec-device", default="cpu")
40
+ parser.add_argument("--ref-audio", default="")
41
+ parser.add_argument("--ref-text", default="")
42
+ parser.add_argument("--ref-text-file", default="")
43
+ parser.add_argument("--language", default="en")
44
+ parser.add_argument("--sample-rate", type=int, default=24000)
45
+ parser.add_argument("--cache-ref", action="store_true")
46
+ return parser.parse_args()
47
+
48
+
49
+ def main() -> int:
50
+ args = parse_args()
51
+ root = repo_root()
52
+ vendor = root / "vendor" / "neutts-air"
53
+ if vendor.exists():
54
+ sys.path.insert(0, str(vendor))
55
+ try:
56
+ import soundfile as sf
57
+ import torch
58
+ from neutts import NeuTTS
59
+ except Exception as exc:
60
+ print(f"NeuTTS-Air dependencies are missing: {exc}", file=sys.stderr, flush=True)
61
+ return 127
62
+
63
+ ref_audio = resolve(root, args.ref_audio)
64
+ ref_text = read_text_arg(args.ref_text_file) or read_text_arg(args.ref_text)
65
+ if not ref_audio or not Path(ref_audio).exists():
66
+ print(f"NeuTTS-Air reference audio not found: {ref_audio}", file=sys.stderr, flush=True)
67
+ return 66
68
+ if not ref_text:
69
+ # Fall back to a short generic transcript; users should configure NEUTTSAIR_REF_TEXT
70
+ # or NEUTTSAIR_REF_TEXT_FILE for best cloning quality.
71
+ ref_text = "This is a reference voice sample."
72
+
73
+ out = Path(args.output).expanduser()
74
+ out.parent.mkdir(parents=True, exist_ok=True)
75
+ cache_path = Path(ref_audio).with_suffix(".neutts.pt")
76
+
77
+ try:
78
+ print(f"[neutts-air] loading backbone={args.backbone} codec={args.codec}", file=sys.stderr, flush=True)
79
+ tts = NeuTTS(
80
+ backbone_repo=args.backbone,
81
+ backbone_device=args.backbone_device,
82
+ codec_repo=args.codec,
83
+ codec_device=args.codec_device,
84
+ )
85
+ if args.cache_ref and cache_path.exists():
86
+ print(f"[neutts-air] loading cached reference {cache_path}", file=sys.stderr, flush=True)
87
+ ref_codes = torch.load(cache_path, map_location="cpu", weights_only=False)
88
+ else:
89
+ print(f"[neutts-air] encoding reference {ref_audio}", file=sys.stderr, flush=True)
90
+ ref_codes = tts.encode_reference(ref_audio)
91
+ if args.cache_ref:
92
+ torch.save(ref_codes, cache_path)
93
+ print(f"[neutts-air] generating chars={len(args.text)}", file=sys.stderr, flush=True)
94
+ wav = tts.infer(args.text, ref_codes, ref_text)
95
+ sf.write(str(out), wav, args.sample_rate)
96
+ print(f"[neutts-air] wrote {out}", file=sys.stderr, flush=True)
97
+ return 0
98
+ except Exception as exc:
99
+ print(f"NeuTTS-Air synthesis failed: {exc}", file=sys.stderr, flush=True)
100
+ return 1
101
+
102
+
103
+ if __name__ == "__main__":
104
+ raise SystemExit(main())
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env python3
2
+ """Synthesize speech with k2-fsa OmniVoice for VerbalCoding.
3
+
4
+ The wrapper keeps the Node bridge independent from OmniVoice's Python runtime.
5
+ It accepts one text chunk and writes a 24 kHz WAV file.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import inspect
12
+ import sys
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+
17
+ def _torch_dtype(name: str):
18
+ import torch
19
+
20
+ normalized = (name or "").lower()
21
+ if normalized in {"auto", ""}:
22
+ return None
23
+ if normalized in {"float16", "fp16", "half"}:
24
+ return torch.float16
25
+ if normalized in {"bfloat16", "bf16"}:
26
+ return torch.bfloat16
27
+ if normalized in {"float32", "fp32"}:
28
+ return torch.float32
29
+ raise ValueError(f"Unsupported OmniVoice dtype: {name}")
30
+
31
+
32
+ def _filtered_call(fn, **kwargs: Any):
33
+ """Call fn with only supported kwargs when the signature is inspectable."""
34
+
35
+ try:
36
+ sig = inspect.signature(fn)
37
+ except (TypeError, ValueError):
38
+ return fn(**{k: v for k, v in kwargs.items() if v not in (None, "")})
39
+
40
+ accepts_kwargs = any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values())
41
+ clean = {k: v for k, v in kwargs.items() if v not in (None, "")}
42
+ if accepts_kwargs:
43
+ return fn(**clean)
44
+ return fn(**{k: v for k, v in clean.items() if k in sig.parameters})
45
+
46
+
47
+ def synthesize(args: argparse.Namespace) -> None:
48
+ try:
49
+ import soundfile as sf
50
+ import torch
51
+ from omnivoice import OmniVoice
52
+ except Exception as exc: # pragma: no cover - exercised in real install
53
+ raise RuntimeError(
54
+ "OmniVoice dependencies are missing. Install them in OMNIVOICE_PYTHON's environment: "
55
+ "pip install torch torchaudio soundfile omnivoice"
56
+ ) from exc
57
+
58
+ dtype = _torch_dtype(args.dtype)
59
+ load_kwargs = {"device_map": args.device}
60
+ if dtype is not None:
61
+ load_kwargs["dtype"] = dtype
62
+
63
+ model = OmniVoice.from_pretrained(args.model, **load_kwargs)
64
+ if hasattr(torch, "set_grad_enabled"):
65
+ torch.set_grad_enabled(False)
66
+
67
+ audio = _filtered_call(
68
+ model.generate,
69
+ text=args.text,
70
+ ref_audio=args.ref_audio,
71
+ ref_text=args.ref_text,
72
+ language=args.language,
73
+ speaker=args.speaker,
74
+ )
75
+ if isinstance(audio, tuple):
76
+ audio = audio[0]
77
+ if isinstance(audio, list):
78
+ if not audio:
79
+ raise RuntimeError("OmniVoice returned no audio")
80
+ audio = audio[0]
81
+
82
+ out = Path(args.output)
83
+ out.parent.mkdir(parents=True, exist_ok=True)
84
+ sf.write(str(out), audio, 24000)
85
+ if not out.exists() or out.stat().st_size <= 0:
86
+ raise RuntimeError(f"OmniVoice wrote empty output: {out}")
87
+
88
+
89
+ def main(argv: list[str] | None = None) -> int:
90
+ parser = argparse.ArgumentParser(description="VerbalCoding OmniVoice TTS wrapper")
91
+ parser.add_argument("--text", required=True)
92
+ parser.add_argument("--output", required=True)
93
+ parser.add_argument("--model", default="k2-fsa/OmniVoice")
94
+ parser.add_argument("--device", default="mps")
95
+ parser.add_argument("--dtype", default="float16")
96
+ parser.add_argument("--ref-audio", default="")
97
+ parser.add_argument("--ref-text", default="")
98
+ parser.add_argument("--language", default="ko")
99
+ parser.add_argument("--speaker", default="")
100
+ args = parser.parse_args(argv)
101
+ synthesize(args)
102
+ return 0
103
+
104
+
105
+ if __name__ == "__main__": # pragma: no cover
106
+ try:
107
+ raise SystemExit(main())
108
+ except Exception as exc:
109
+ print(f"OmniVoice synthesis failed: {exc}", file=sys.stderr)
110
+ raise SystemExit(1)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "verbalcoding",
3
- "version": "0.2.12",
3
+ "version": "0.2.13",
4
4
  "description": "Discord voice bridge for CLI coding agents.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -35,6 +35,10 @@
35
35
  "scripts/*.mjs",
36
36
  "scripts/*.sh",
37
37
  "integrations/openvoice/*.py",
38
+ "integrations/omnivoice/*.py",
39
+ "integrations/fireredtts2/*.py",
40
+ "integrations/mlxaudio/*.py",
41
+ "integrations/neuttsair/*.py",
38
42
  "run.sh",
39
43
  ".env.example",
40
44
  "README.md",
@@ -42,6 +46,7 @@
42
46
  "LICENSE"
43
47
  ],
44
48
  "scripts": {
49
+ "postinstall": "node scripts/postinstall.mjs",
45
50
  "start": "node app-node/main.mjs",
46
51
  "setup": "node scripts/install.mjs",
47
52
  "doctor": "node scripts/doctor.mjs",
package/scripts/cli.mjs CHANGED
@@ -10,7 +10,17 @@ import {
10
10
  normalizeInstanceAnswers,
11
11
  parseKeyValueEnv,
12
12
  renderInstanceSetupSummary,
13
+ SUPPORTED_TTS_BACKENDS,
13
14
  } from '../app-node/install_config.mjs';
15
+ import {
16
+ applyTtsVoiceSelectionToEnv,
17
+ defaultTtsVoiceConfig,
18
+ effectiveTtsVoiceSelection,
19
+ readTtsVoiceConfig,
20
+ updateTtsVoiceConfig,
21
+ writeTtsVoiceConfig,
22
+ } from '../app-node/tts_voice_config.mjs';
23
+ import { normalizeTtsBackendName } from '../app-node/tts_settings.mjs';
14
24
  import { ensureHermesProfile, validateProfileName } from '../app-node/hermes_profiles.mjs';
15
25
  import { checkInstanceConfigs } from '../app-node/instance_doctor.mjs';
16
26
  import { healInstanceProfileFromEnv } from '../app-node/instance_profile_lifecycle.mjs';
@@ -38,6 +48,8 @@ Usage:
38
48
  vc language <ko|en|auto>
39
49
  vc language status
40
50
  vc restart auto <on|off|status>
51
+ vc tts backend <${SUPPORTED_TTS_BACKENDS.join('|')}>
52
+ vc tts status
41
53
  vc bot invite <client-id> [--guild <guild-id>]
42
54
  vc instance list
43
55
  vc instance setup [name] [--start]
@@ -57,6 +69,7 @@ Examples:
57
69
  vc language en
58
70
  vc language ko
59
71
  vc language auto
72
+ vc tts backend qwen3
60
73
  vc restart auto off
61
74
  vc bot invite 123456789012345678
62
75
  `;
@@ -97,6 +110,56 @@ function printLanguageStatus(values) {
97
110
  console.log(`TTS voice: ${s.ttsVoice}`);
98
111
  }
99
112
 
113
+ function ttsVoiceConfigPath(values = readEnvFile()) {
114
+ const configured = values.TTS_VOICE_CONFIG || process.env.TTS_VOICE_CONFIG || path.join('config', 'tts-voices.json');
115
+ return path.isAbsolute(configured) ? configured : path.join(ROOT, configured);
116
+ }
117
+
118
+ function compactUpdates(updates) {
119
+ return Object.fromEntries(Object.entries(updates).filter(([, value]) => value != null && value !== ''));
120
+ }
121
+
122
+ function printTtsStatus() {
123
+ const env = readEnvFile();
124
+ const config = readTtsVoiceConfig(ttsVoiceConfigPath(env), defaultTtsVoiceConfig());
125
+ const selected = effectiveTtsVoiceSelection(config, env);
126
+ console.log(`TTS backend: ${selected.backend}`);
127
+ console.log(`TTS voice type: ${selected.voiceType}`);
128
+ console.log(`TTS voice: ${selected.voice?.label || selected.voice?.voice || '-'}`);
129
+ }
130
+
131
+ function setTtsBackendFromCli(rawBackend, rawVoiceType = '') {
132
+ const backend = normalizeTtsBackendName(rawBackend, '');
133
+ if (!backend) {
134
+ throw new Error(`Unknown TTS backend: ${rawBackend}. Supported: ${SUPPORTED_TTS_BACKENDS.join(', ')}`);
135
+ }
136
+ const env = readEnvFile();
137
+ const configPath = ttsVoiceConfigPath(env);
138
+ const baseConfig = readTtsVoiceConfig(configPath, defaultTtsVoiceConfig());
139
+ const nextConfig = updateTtsVoiceConfig(baseConfig, { backend, voiceType: rawVoiceType });
140
+ writeTtsVoiceConfig(configPath, nextConfig);
141
+ const selected = effectiveTtsVoiceSelection(nextConfig, { ...env, TTS_BACKEND: backend, TTS_VOICE_TYPE: rawVoiceType || env.TTS_VOICE_TYPE });
142
+ const nextEnv = applyTtsVoiceSelectionToEnv(env, selected);
143
+ upsertEnvFile(ENV_PATH, compactUpdates({
144
+ TTS_BACKEND: nextEnv.TTS_BACKEND,
145
+ TTS_VOICE_TYPE: nextEnv.TTS_VOICE_TYPE,
146
+ TTS_VOICE: nextEnv.TTS_VOICE || env.TTS_VOICE,
147
+ VOICE_LANGUAGE: nextEnv.VOICE_LANGUAGE || env.VOICE_LANGUAGE,
148
+ QWEN3TTS_MODE: nextEnv.QWEN3TTS_MODE,
149
+ QWEN3TTS_SPEAKER: nextEnv.QWEN3TTS_SPEAKER,
150
+ QWEN3TTS_REF_AUDIO: nextEnv.QWEN3TTS_REF_AUDIO,
151
+ QWEN3TTS_INSTRUCT: nextEnv.QWEN3TTS_INSTRUCT,
152
+ MLXAUDIO_VOICE: nextEnv.MLXAUDIO_VOICE,
153
+ FIREREDTTS2_PROMPT_AUDIO: nextEnv.FIREREDTTS2_PROMPT_AUDIO,
154
+ MOSSTTSNANO_MODE: nextEnv.MOSSTTSNANO_MODE,
155
+ MOSSTTSNANO_PROMPT_AUDIO: nextEnv.MOSSTTSNANO_PROMPT_AUDIO,
156
+ }));
157
+ console.log(`Updated ${ENV_PATH}`);
158
+ console.log(`TTS backend: ${selected.backend}`);
159
+ console.log(`TTS voice type: ${selected.voiceType}`);
160
+ console.log('Restart the bridge for CLI changes to take effect; voice requests switch the running bridge immediately.');
161
+ }
162
+
100
163
  function printInstanceStatus(statuses) {
101
164
  if (statuses.length === 0) {
102
165
  console.log('No instance env files found in instances/*.env');
@@ -308,9 +371,30 @@ async function main(argv = process.argv.slice(2)) {
308
371
  }
309
372
  if (command === 'status') {
310
373
  printLanguageStatus(readEnvFile());
374
+ printTtsStatus();
311
375
  console.log(autoRestartStatusText(readEnvFile()));
312
376
  return;
313
377
  }
378
+ if (command === 'tts' || command === 'voice') {
379
+ if (!subcommand || subcommand === 'status') {
380
+ printTtsStatus();
381
+ return;
382
+ }
383
+ if (subcommand === 'backend' || subcommand === 'switch' || subcommand === 'set') {
384
+ const backend = argv[2];
385
+ const voiceType = argv.includes('--voice-type') ? argv[argv.indexOf('--voice-type') + 1] : '';
386
+ if (!backend || backend.startsWith('--')) {
387
+ console.error(`Use: vc tts backend <${SUPPORTED_TTS_BACKENDS.join('|')}> [--voice-type <name>]`);
388
+ process.exitCode = 2;
389
+ return;
390
+ }
391
+ setTtsBackendFromCli(backend, voiceType);
392
+ return;
393
+ }
394
+ console.error('Use: vc tts status OR vc tts backend <name>');
395
+ process.exitCode = 2;
396
+ return;
397
+ }
314
398
  if (command === 'instance') {
315
399
  await handleInstanceCommand(argv);
316
400
  return;
@@ -20,12 +20,25 @@ function readEnvFile(file) {
20
20
  }
21
21
  }
22
22
 
23
+ function dropUnexpandedRefs(env) {
24
+ // parseKeyValueEnv does no shell expansion, so values like
25
+ // PATH="$JAVA_HOME/bin:$PATH"
26
+ // would otherwise clobber process.env.PATH with a literal "$VAR" string,
27
+ // breaking every JS-level PATH lookup (e.g. agent_detect.defaultWhich).
28
+ const out = {};
29
+ for (const [key, value] of Object.entries(env)) {
30
+ if (typeof value === 'string' && /\$[A-Za-z_][A-Za-z0-9_]*|\$\{[^}]+\}/.test(value)) continue;
31
+ out[key] = value;
32
+ }
33
+ return out;
34
+ }
35
+
23
36
  function mergeEnv() {
24
37
  // Project .env intentionally wins over ~/.zshrc so local setup is reproducible.
25
38
  return {
26
39
  ...process.env,
27
- ...readEnvFile(path.join(process.env.HOME || '', '.zshrc')),
28
- ...readEnvFile(path.join(ROOT, '.env')),
40
+ ...dropUnexpandedRefs(readEnvFile(path.join(process.env.HOME || '', '.zshrc'))),
41
+ ...dropUnexpandedRefs(readEnvFile(path.join(ROOT, '.env'))),
29
42
  };
30
43
  }
31
44
 
@@ -105,10 +118,43 @@ function fixablePrerequisites(env) {
105
118
  const edgeCommand = env.EDGE_TTS_COMMAND || env.TTS_EDGE_COMMAND || 'edge-tts';
106
119
  if (!resolveCommand(edgeCommand, [path.join(ROOT, '.venv-tts', 'bin', 'edge-tts')])) missing.push('edge-tts');
107
120
  }
121
+ if (ttsBackend === 'fireredtts2') {
122
+ const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
123
+ const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
124
+ const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
125
+ if (!isExecutable(firePath) || !fs.existsSync(fireModel)) missing.push('FireRedTTS-2');
126
+ }
108
127
  if (backend === 'hermes' && !commandExists('hermes')) missing.push('hermes CLI');
109
128
  return missing;
110
129
  }
111
130
 
131
+ function installFireRedTts2IfNeeded(env) {
132
+ const ttsBackend = (env.TTS_BACKEND || 'edge').toLowerCase();
133
+ if (ttsBackend !== 'fireredtts2') return false;
134
+ const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
135
+ const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
136
+ const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
137
+ if (isExecutable(firePath) && fs.existsSync(fireModel)) return false;
138
+ if (['0', 'false', 'no', 'off'].includes(String(process.env.VERBALCODING_DOCTOR_INSTALL_FIREREDTTS2 || '1').toLowerCase())) {
139
+ console.log('Skipping FireRedTTS-2 auto-install because VERBALCODING_DOCTOR_INSTALL_FIREREDTTS2 is off.');
140
+ return false;
141
+ }
142
+ console.log('VerbalCoding doctor: TTS_BACKEND=fireredtts2 but FireRedTTS-2 is missing; installing...');
143
+ const result = spawnSync('bash', [path.join(ROOT, 'scripts', 'install_fireredtts2.sh'), '--yes'], {
144
+ cwd: ROOT,
145
+ stdio: 'inherit',
146
+ env: process.env,
147
+ });
148
+ if (result.status !== 0) {
149
+ console.log(`FireRedTTS-2 installer exited with status ${result.status}. Continuing with checks.`);
150
+ }
151
+ upsertEnvFile(path.join(ROOT, '.env'), {
152
+ FIREREDTTS2_COMMAND: './.local/bin/fireredtts2',
153
+ FIREREDTTS2_PRETRAINED_DIR: 'pretrained_models/FireRedTTS2',
154
+ });
155
+ return true;
156
+ }
157
+
112
158
  function installHermesCliIfNeeded(env) {
113
159
  const backend = (env.AGENT_BACKEND || 'hermes').toLowerCase();
114
160
  if (backend !== 'hermes' || commandExists('hermes')) return false;
@@ -192,6 +238,11 @@ if (autoFixEnabled && missingBeforeFix.length > 0) {
192
238
  env = mergeEnv();
193
239
  }
194
240
  if (autoFixEnabled) {
241
+ const fireAttempted = installFireRedTts2IfNeeded(env);
242
+ if (fireAttempted) {
243
+ console.log('');
244
+ env = mergeEnv();
245
+ }
195
246
  const hermesAttempted = installHermesCliIfNeeded(env);
196
247
  if (hermesAttempted) {
197
248
  console.log('');
@@ -246,8 +297,8 @@ note('Progress/voice language', env.VOICE_LANGUAGE || env.WHISPER_CPP_LANGUAGE |
246
297
  note('Latency log path', env.LATENCY_LOG_PATH || './.logs/latency.jsonl');
247
298
  note('TTS voice fallback', env.TTS_VOICE || 'ko-KR-SunHiNeural');
248
299
 
249
- if (!['edge', 'openvoice', 'speechswift', 'supertonic'].includes(ttsBackend)) {
250
- ok = check('TTS_BACKEND value', false, 'must be edge, openvoice, speechswift, or supertonic') && ok;
300
+ if (!['edge', 'openvoice', 'speechswift', 'supertonic', 'omnivoice', 'qwen3tts', 'mlxaudio', 'fireredtts2', 'mossttsnano', 'neuttsair'].includes(ttsBackend)) {
301
+ ok = check('TTS_BACKEND value', false, 'must be edge, openvoice, speechswift, supertonic, omnivoice, qwen3tts, mlxaudio, fireredtts2, mossttsnano, or neuttsair') && ok;
251
302
  }
252
303
  if (ttsBackend === 'edge') {
253
304
  const edgeCommand = env.EDGE_TTS_COMMAND || env.TTS_EDGE_COMMAND || 'edge-tts';
@@ -272,6 +323,55 @@ if (ttsBackend === 'edge') {
272
323
  ok = check('supertonic CLI', commandExists(supertonicCommand), commandExists(supertonicCommand) || 'install with: python3 -m pip install supertonic') && ok;
273
324
  note('Supertonic voice/lang/steps', `${env.SUPERTONIC_VOICE || 'M1'} / ${env.SUPERTONIC_LANGUAGE || 'ko'} / ${env.SUPERTONIC_STEPS || '2'}`);
274
325
  note('Supertonic progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.SUPERTONIC_PROGRESS || '0').toLowerCase()) ? 'supertonic' : 'edge fallback');
326
+ } else if (ttsBackend === 'omnivoice') {
327
+ const omniPython = env.OMNIVOICE_PYTHON || path.join(ROOT, '.venv-omnivoice', 'bin', 'python');
328
+ const resolvedOmniPython = path.isAbsolute(omniPython) ? omniPython : path.resolve(ROOT, omniPython);
329
+ const refAudio = path.resolve(ROOT, env.OMNIVOICE_REF_AUDIO || env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
330
+ ok = check('OmniVoice Python', fs.existsSync(resolvedOmniPython) || commandExists(omniPython), fs.existsSync(resolvedOmniPython) ? path.relative(ROOT, resolvedOmniPython) : 'install with: python -m venv .venv-omnivoice && .venv-omnivoice/bin/pip install torch torchaudio soundfile omnivoice') && ok;
331
+ ok = check('OmniVoice reference audio', fs.existsSync(refAudio), path.relative(ROOT, refAudio)) && ok;
332
+ ok = check('OmniVoice synth wrapper help', spawnSync(fs.existsSync(resolvedOmniPython) ? resolvedOmniPython : 'python3', ['integrations/omnivoice/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/omnivoice/synth.py') && ok;
333
+ note('OmniVoice model/device', `${env.OMNIVOICE_MODEL || 'k2-fsa/OmniVoice'} / ${env.OMNIVOICE_DEVICE || 'mps'}`);
334
+ note('OmniVoice progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.OMNIVOICE_PROGRESS || '0').toLowerCase()) ? 'omnivoice' : 'edge fallback');
335
+ } else if (ttsBackend === 'qwen3tts') {
336
+ const qwenCommand = env.QWEN3TTS_COMMAND || 'audio';
337
+ ok = check('Qwen3 TTS audio CLI', commandExists(qwenCommand), commandExists(qwenCommand) || 'install speech-swift/audio first') && ok;
338
+ note('Qwen3 speaker', env.QWEN3TTS_SPEAKER || 'sohee');
339
+ note('Qwen3 progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.QWEN3TTS_PROGRESS || '0').toLowerCase()) ? 'qwen3tts' : 'edge fallback');
340
+ } else if (ttsBackend === 'mlxaudio') {
341
+ const mlxPython = env.MLXAUDIO_PYTHON || './.venv-mlxaudio/bin/python';
342
+ const mlxPath = path.isAbsolute(mlxPython) ? mlxPython : path.resolve(ROOT, mlxPython);
343
+ ok = check('MLX Audio Python', isExecutable(mlxPath) || commandExists(mlxPython), isExecutable(mlxPath) ? path.relative(ROOT, mlxPath) : (commandExists(mlxPython) || 'install with: scripts/install_mlxaudio.sh --yes')) && ok;
344
+ ok = check('MLX Audio wrapper help', spawnSync(isExecutable(mlxPath) ? mlxPath : 'python3', ['integrations/mlxaudio/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/mlxaudio/synth.py') && ok;
345
+ note('MLX Audio model/voice', `${env.MLXAUDIO_MODEL || 'mlx-community/Qwen3-TTS-12Hz-1.7B-Base-8bit'} / ${env.MLXAUDIO_VOICE || 'Chelsie'}`);
346
+ note('MLX Audio progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.MLXAUDIO_PROGRESS || '0').toLowerCase()) ? 'mlxaudio' : 'edge fallback');
347
+ } else if (ttsBackend === 'fireredtts2') {
348
+ const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
349
+ const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
350
+ const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
351
+ ok = check('FireRedTTS-2 wrapper', isExecutable(firePath), path.relative(ROOT, firePath) || firePath) && ok;
352
+ ok = check('FireRedTTS-2 model', fs.existsSync(fireModel), path.relative(ROOT, fireModel)) && ok;
353
+ ok = check('FireRedTTS-2 synth wrapper help', spawnSync(isExecutable(firePath) ? firePath : process.execPath, isExecutable(firePath) ? ['--help'] : ['integrations/fireredtts2/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/fireredtts2/synth.py') && ok;
354
+ note('FireRedTTS-2 progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.FIREREDTTS2_PROGRESS || '0').toLowerCase()) ? 'fireredtts2' : 'edge fallback');
355
+ } else if (ttsBackend === 'mossttsnano') {
356
+ const mossCommand = env.MOSSTTSNANO_COMMAND || './.venv-mossttsnano/bin/python';
357
+ const mossPath = path.isAbsolute(mossCommand) ? mossCommand : path.resolve(ROOT, mossCommand);
358
+ const mossScript = path.resolve(ROOT, env.MOSSTTSNANO_SCRIPT || 'vendor/MOSS-TTS-Nano/infer.py');
359
+ ok = check('MOSS-TTS-Nano Python', isExecutable(mossPath) || commandExists(mossCommand), isExecutable(mossPath) ? path.relative(ROOT, mossPath) : (commandExists(mossCommand) || 'missing')) && ok;
360
+ ok = check('MOSS-TTS-Nano infer.py', fs.existsSync(mossScript), path.relative(ROOT, mossScript)) && ok;
361
+ note('MOSS checkpoint', env.MOSSTTSNANO_CHECKPOINT || 'OpenMOSS-Team/MOSS-TTS-Nano');
362
+ note('MOSS progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.MOSSTTSNANO_PROGRESS || '0').toLowerCase()) ? 'mossttsnano' : 'edge fallback');
363
+ } else if (ttsBackend === 'neuttsair') {
364
+ const neuPython = env.NEUTTSAIR_PYTHON || './.venv-neuttsair/bin/python';
365
+ const neuPath = path.isAbsolute(neuPython) ? neuPython : path.resolve(ROOT, neuPython);
366
+ const neuScript = path.resolve(ROOT, env.NEUTTSAIR_SCRIPT || 'integrations/neuttsair/synth.py');
367
+ const refAudio = path.resolve(ROOT, env.NEUTTSAIR_REF_AUDIO || env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
368
+ ok = check('NeuTTS Air Python', isExecutable(neuPath) || commandExists(neuPython), isExecutable(neuPath) ? path.relative(ROOT, neuPath) : (commandExists(neuPython) || 'install with: python3 -m venv .venv-neuttsair && .venv-neuttsair/bin/pip install -e vendor/neutts-air')) && ok;
369
+ ok = check('NeuTTS Air wrapper', fs.existsSync(neuScript), path.relative(ROOT, neuScript)) && ok;
370
+ ok = check('NeuTTS Air reference audio', fs.existsSync(refAudio), path.relative(ROOT, refAudio)) && ok;
371
+ ok = check('NeuTTS Air synth wrapper help', spawnSync(isExecutable(neuPath) ? neuPath : 'python3', ['integrations/neuttsair/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/neuttsair/synth.py') && ok;
372
+ note('NeuTTS Air backbone/device', `${env.NEUTTSAIR_BACKBONE_REPO || env.NEUTTSAIR_BACKBONE || 'neuphonic/neutts-air-q4-gguf'} / ${env.NEUTTSAIR_BACKBONE_DEVICE || env.NEUTTSAIR_DEVICE || 'mps'}`);
373
+ note('NeuTTS Air codec/device', `${env.NEUTTSAIR_CODEC_REPO || env.NEUTTSAIR_CODEC || 'neuphonic/neucodec'} / ${env.NEUTTSAIR_CODEC_DEVICE || env.NEUTTSAIR_DEVICE || 'mps'}`);
374
+ note('NeuTTS Air progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.NEUTTSAIR_PROGRESS || '0').toLowerCase()) ? 'neuttsair' : 'edge fallback');
275
375
  }
276
376
 
277
377
  const backendCommand = {
@@ -166,7 +166,7 @@ async function main() {
166
166
  const autoJoinVoiceChannels = await ask('Auto-join voice channel names', process.env.AUTO_JOIN_VOICE_CHANNELS || '일반,General,general');
167
167
  const transcriptChannelId = await ask('Transcript text channel/thread ID', process.env.TRANSCRIPT_CHANNEL_ID || '');
168
168
  const language = await ask('Default voice language: ko/en/auto', process.env.VOICE_LANGUAGE || process.env.WHISPER_CPP_LANGUAGE || process.env.STT_LANGUAGE || 'ko');
169
- const ttsBackend = await ask('TTS backend: edge/openvoice/speechswift/supertonic', process.env.TTS_BACKEND || 'edge');
169
+ const ttsBackend = await ask('TTS backend: edge/openvoice/speechswift/supertonic/omnivoice/qwen3tts/mlxaudio/fireredtts2/mossttsnano/neuttsair', process.env.TTS_BACKEND || 'edge');
170
170
  const edgeTtsCommand = await ask('Edge TTS command', process.env.EDGE_TTS_COMMAND || process.env.TTS_EDGE_COMMAND || 'edge-tts');
171
171
  const ttsVoice = await ask('TTS voice', process.env.TTS_VOICE || 'ko-KR-SunHiNeural');
172
172
  const ttsRate = await ask('TTS rate', process.env.TTS_RATE || '+10%');
@@ -179,6 +179,8 @@ async function main() {
179
179
  const openvoiceDir = await ask('OpenVoice repo dir', process.env.OPENVOICE_DIR || './vendor/OpenVoice');
180
180
  const openvoiceVenv = await ask('OpenVoice venv dir', process.env.OPENVOICE_VENV || './.venv-openvoice');
181
181
  const openvoiceRefAudio = await ask('OpenVoice reference audio path', process.env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
182
+ const omnivoicePython = await ask('OmniVoice Python', process.env.OMNIVOICE_PYTHON || './.venv-omnivoice/bin/python');
183
+ const omnivoiceRefAudio = await ask('OmniVoice reference audio path', process.env.OMNIVOICE_REF_AUDIO || process.env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
182
184
  const requireWake = (await ask('Require wake word? 1/0', process.env.REQUIRE_WAKE_WORD || '0')) === '1';
183
185
  const verboseProgress = (await ask('Verbose progress by default? 1/0', process.env.AGENT_VERBOSE_PROGRESS || process.env.VERBALCODING_VERBOSE_PROGRESS || '0')) === '1';
184
186
  const utteranceIdleMs = await ask('Utterance idle wait before STT, ms', process.env.UTTERANCE_IDLE_MS || '4500');
@@ -207,6 +209,8 @@ async function main() {
207
209
  openvoiceDir,
208
210
  openvoiceVenv,
209
211
  openvoiceRefAudio,
212
+ omnivoicePython,
213
+ omnivoiceRefAudio,
210
214
  requireWakeWord: requireWake,
211
215
  verboseProgress,
212
216
  utteranceIdleMs,
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT="$(cd "$(dirname "$0")/.." && pwd)"
5
+ cd "$ROOT"
6
+
7
+ ASSUME_YES=0
8
+ SKIP_MODEL=0
9
+ SKIP_PIP=0
10
+ for arg in "$@"; do
11
+ case "$arg" in
12
+ -y|--yes) ASSUME_YES=1 ;;
13
+ --skip-model) SKIP_MODEL=1 ;;
14
+ --skip-pip) SKIP_PIP=1 ;;
15
+ -h|--help)
16
+ cat <<'USAGE'
17
+ Usage: scripts/install_fireredtts2.sh [--yes] [--skip-model] [--skip-pip]
18
+
19
+ Installs FireRedTTS-2 for VerbalCoding:
20
+ - clones FireRedTeam/FireRedTTS2 under vendor/FireRedTTS2
21
+ - creates .venv-fireredtts2
22
+ - installs upstream Python dependencies
23
+ - downloads https://huggingface.co/FireRedTeam/FireRedTTS2 weights under pretrained_models/FireRedTTS2
24
+ - creates .local/bin/fireredtts2 wrapper used by TTS_BACKEND=fireredtts2
25
+
26
+ The model is large. Use --skip-model only if FIREREDTTS2_PRETRAINED_DIR points elsewhere.
27
+ USAGE
28
+ exit 0
29
+ ;;
30
+ esac
31
+ done
32
+
33
+ log() { printf '==> %s\n' "$*"; }
34
+ warn() { printf 'Warning: %s\n' "$*" >&2; }
35
+ has_cmd() { command -v "$1" >/dev/null 2>&1; }
36
+ confirm() {
37
+ if [ "$ASSUME_YES" = "1" ]; then return 0; fi
38
+ printf '%s [y/N]: ' "$1" >&2
39
+ read -r answer
40
+ case "$answer" in y|Y|yes|YES) return 0 ;; *) return 1 ;; esac
41
+ }
42
+
43
+ if [ "$ASSUME_YES" != "1" ]; then
44
+ confirm 'FireRedTTS-2 can download several GB of model/dependency data. Continue?' || exit 2
45
+ fi
46
+
47
+ mkdir -p vendor .local/bin pretrained_models
48
+
49
+ if ! has_cmd git; then
50
+ warn 'git is required to install FireRedTTS-2.'
51
+ exit 1
52
+ fi
53
+ PYTHON_BIN=""
54
+ for candidate in python3.12 python3.11 python3; do
55
+ if has_cmd "$candidate"; then PYTHON_BIN="$candidate"; break; fi
56
+ done
57
+ if [ -z "$PYTHON_BIN" ]; then
58
+ warn 'Python >=3.11 is required to install FireRedTTS-2.'
59
+ exit 1
60
+ fi
61
+ PYTHON_VERSION="$($PYTHON_BIN - <<'PY'
62
+ import sys
63
+ print(f"{sys.version_info.major}.{sys.version_info.minor}")
64
+ PY
65
+ )"
66
+ case "$PYTHON_VERSION" in
67
+ 3.11|3.12|3.13*) ;;
68
+ *) warn "FireRedTTS-2 requires Python >=3.11; found $PYTHON_VERSION at $PYTHON_BIN"; exit 1 ;;
69
+ esac
70
+
71
+ if [ ! -d vendor/FireRedTTS2/.git ]; then
72
+ log 'Cloning FireRedTTS-2'
73
+ git clone --depth 1 https://github.com/FireRedTeam/FireRedTTS2.git vendor/FireRedTTS2
74
+ else
75
+ log 'FireRedTTS-2 repo already exists'
76
+ fi
77
+
78
+ if [ "$SKIP_PIP" != "1" ]; then
79
+ if [ ! -x .venv-fireredtts2/bin/python ]; then
80
+ log 'Creating .venv-fireredtts2'
81
+ "$PYTHON_BIN" -m venv .venv-fireredtts2
82
+ fi
83
+ log 'Installing FireRedTTS-2 Python dependencies'
84
+ .venv-fireredtts2/bin/python -m pip install --upgrade pip setuptools wheel
85
+ .venv-fireredtts2/bin/python -m pip install torch torchaudio huggingface_hub
86
+ .venv-fireredtts2/bin/python -m pip install -e vendor/FireRedTTS2
87
+ if [ -f vendor/FireRedTTS2/requirements.txt ]; then
88
+ .venv-fireredtts2/bin/python -m pip install -r vendor/FireRedTTS2/requirements.txt
89
+ fi
90
+ fi
91
+
92
+ if [ "$SKIP_MODEL" != "1" ]; then
93
+ if [ -d pretrained_models/FireRedTTS2 ] && [ "$(find pretrained_models/FireRedTTS2 -mindepth 1 -maxdepth 1 2>/dev/null | head -n 1)" ]; then
94
+ log 'FireRedTTS-2 pretrained model already exists'
95
+ else
96
+ log 'Downloading FireRedTTS-2 weights from https://huggingface.co/FireRedTeam/FireRedTTS2'
97
+ .venv-fireredtts2/bin/huggingface-cli download FireRedTeam/FireRedTTS2 --local-dir pretrained_models/FireRedTTS2 --local-dir-use-symlinks False
98
+ fi
99
+ fi
100
+
101
+ cat > .local/bin/fireredtts2 <<'SH'
102
+ #!/usr/bin/env bash
103
+ ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
104
+ exec "$ROOT/.venv-fireredtts2/bin/python" "$ROOT/integrations/fireredtts2/synth.py" "$@"
105
+ SH
106
+ chmod +x .local/bin/fireredtts2
107
+
108
+ log 'Installed .local/bin/fireredtts2 wrapper'
109
+ log 'Set FIREREDTTS2_COMMAND=./.local/bin/fireredtts2 and TTS_BACKEND=fireredtts2, then restart VerbalCoding.'