verbalcoding 0.2.12 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +74 -4
- package/README.es.md +3 -1
- package/README.fr.md +3 -1
- package/README.ja.md +3 -1
- package/README.ko.md +4 -2
- package/README.md +4 -2
- package/README.ru.md +3 -1
- package/README.zh.md +3 -1
- package/app-node/agent_adapters.test.mjs +14 -0
- package/app-node/agent_routing.mjs +148 -0
- package/app-node/agent_routing.test.mjs +138 -0
- package/app-node/agent_turn.mjs +86 -0
- package/app-node/agent_turn.test.mjs +109 -0
- package/app-node/bridge_context.mjs +73 -0
- package/app-node/bridge_context.test.mjs +54 -0
- package/app-node/bridge_state.mjs +4 -0
- package/app-node/bridge_wireup.test.mjs +462 -0
- package/app-node/cli_install.test.mjs +31 -0
- package/app-node/cross_agent_routing.test.mjs +78 -0
- package/app-node/discord_command_router.mjs +204 -0
- package/app-node/discord_command_router.test.mjs +311 -0
- package/app-node/discord_voice_setup.mjs +251 -0
- package/app-node/discord_voice_setup.test.mjs +86 -0
- package/app-node/hermes_profiles.test.mjs +12 -1
- package/app-node/install_config.mjs +110 -3
- package/app-node/install_config.test.mjs +8 -0
- package/app-node/instance_doctor.test.mjs +9 -0
- package/app-node/instances.test.mjs +8 -1
- package/app-node/main.mjs +488 -1368
- package/app-node/mcp_tools.test.mjs +7 -0
- package/app-node/notification_handler.mjs +89 -0
- package/app-node/notification_handler.test.mjs +187 -0
- package/app-node/plan_dispatcher.mjs +215 -0
- package/app-node/plan_dispatcher.test.mjs +101 -0
- package/app-node/plan_mode.mjs +36 -7
- package/app-node/plan_mode.test.mjs +78 -0
- package/app-node/progress_handler.mjs +220 -0
- package/app-node/progress_handler.test.mjs +193 -0
- package/app-node/progress_speech.mjs +54 -32
- package/app-node/progress_speech.test.mjs +12 -3
- package/app-node/project_sessions.mjs +5 -2
- package/app-node/project_sessions.test.mjs +7 -0
- package/app-node/research_mode.mjs +282 -0
- package/app-node/research_mode.test.mjs +264 -0
- package/app-node/restart_notice.mjs +3 -0
- package/app-node/restart_notice.test.mjs +11 -0
- package/app-node/session_ontology.mjs +271 -0
- package/app-node/session_ontology.test.mjs +130 -0
- package/app-node/smart_progress.mjs +1 -1
- package/app-node/stream_sentencer.mjs +32 -2
- package/app-node/stream_sentencer.test.mjs +65 -0
- package/app-node/streaming_tts_queue.mjs +5 -1
- package/app-node/streaming_tts_queue.test.mjs +7 -1
- package/app-node/stt_whisper.mjs +24 -0
- package/app-node/stt_whisper.test.mjs +32 -0
- package/app-node/text_routing.mjs +4 -2
- package/app-node/tts_backends.mjs +537 -3
- package/app-node/tts_backends.test.mjs +454 -0
- package/app-node/tts_player.mjs +164 -0
- package/app-node/tts_player.test.mjs +202 -0
- package/app-node/tts_runtime.mjs +134 -0
- package/app-node/tts_runtime.test.mjs +89 -0
- package/app-node/tts_settings.mjs +150 -3
- package/app-node/tts_settings.test.mjs +204 -0
- package/app-node/tts_voice_config.mjs +136 -2
- package/app-node/tts_voice_config.test.mjs +94 -0
- package/app-node/utterance_router.mjs +216 -0
- package/app-node/utterance_router.test.mjs +236 -0
- package/app-node/voice_autojoin.mjs +37 -0
- package/app-node/voice_autojoin.test.mjs +59 -0
- package/app-node/voice_io.mjs +272 -0
- package/app-node/voice_io.test.mjs +102 -0
- package/app-node/voice_turn_runner.mjs +449 -0
- package/app-node/voice_turn_runner.test.mjs +289 -0
- package/docs/CONFIGURATION.md +12 -2
- package/docs/HARNESSES.md +58 -0
- package/docs/HARNESS_AIDER.md +50 -0
- package/docs/HARNESS_CLAUDE.md +56 -0
- package/docs/HARNESS_CODEX.md +56 -0
- package/docs/HARNESS_CURSOR.md +45 -0
- package/docs/HARNESS_GEMINI.md +45 -0
- package/docs/HARNESS_HERMES.md +57 -0
- package/docs/HARNESS_OPENCLAW.md +44 -0
- package/docs/HARNESS_OPENCODE.md +44 -0
- package/docs/README.md +1 -0
- package/docs/ROADMAP.md +20 -5
- package/docs/TTS_BACKENDS.md +227 -0
- package/docs/USAGE.md +22 -0
- package/docs/i18n/AGENTS.es.md +34 -0
- package/docs/i18n/AGENTS.fr.md +34 -0
- package/docs/i18n/AGENTS.ja.md +34 -0
- package/docs/i18n/AGENTS.ko.md +34 -0
- package/docs/i18n/AGENTS.ru.md +34 -0
- package/docs/i18n/AGENTS.zh.md +34 -0
- package/docs/i18n/HARNESSES.es.md +58 -0
- package/docs/i18n/HARNESSES.fr.md +58 -0
- package/docs/i18n/HARNESSES.ja.md +58 -0
- package/docs/i18n/HARNESSES.ko.md +58 -0
- package/docs/i18n/HARNESSES.ru.md +58 -0
- package/docs/i18n/HARNESSES.zh.md +58 -0
- package/docs/i18n/HARNESS_AIDER.es.md +48 -0
- package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
- package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
- package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
- package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
- package/docs/i18n/HARNESS_CODEX.es.md +55 -0
- package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
- package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
- package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
- package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
- package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
- package/docs/i18n/HARNESS_HERMES.es.md +54 -0
- package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
- package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
- package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
- package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
- package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
- package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
- package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
- package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
- package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
- package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
- package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
- package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
- package/integrations/fireredtts2/mlx_llm.py +183 -0
- package/integrations/fireredtts2/synth.py +156 -0
- package/integrations/fireredtts2/synth_mlx.py +196 -0
- package/integrations/mlxaudio/synth.py +74 -0
- package/integrations/neuttsair/synth.py +104 -0
- package/integrations/omnivoice/synth.py +110 -0
- package/package.json +6 -1
- package/scripts/cli.mjs +84 -0
- package/scripts/doctor.mjs +104 -4
- package/scripts/install.mjs +5 -1
- package/scripts/install_fireredtts2.sh +109 -0
- package/scripts/install_mlxaudio.sh +34 -0
- package/scripts/install_mossttsnano.sh +46 -0
- package/scripts/postinstall.mjs +34 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def repo_root() -> Path:
|
|
11
|
+
return Path(__file__).resolve().parents[2]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def resolve(root: Path, value: str | None) -> str | None:
|
|
15
|
+
if not value:
|
|
16
|
+
return None
|
|
17
|
+
p = Path(value).expanduser()
|
|
18
|
+
if not p.is_absolute():
|
|
19
|
+
p = root / p
|
|
20
|
+
return str(p)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def read_text_arg(value: str | None) -> str:
|
|
24
|
+
if not value:
|
|
25
|
+
return ""
|
|
26
|
+
p = Path(value).expanduser()
|
|
27
|
+
if p.exists():
|
|
28
|
+
return p.read_text(encoding="utf-8").strip()
|
|
29
|
+
return value.strip()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def parse_args() -> argparse.Namespace:
|
|
33
|
+
parser = argparse.ArgumentParser(description="NeuTTS-Air synthesis wrapper for VerbalCoding")
|
|
34
|
+
parser.add_argument("--text", required=True)
|
|
35
|
+
parser.add_argument("--output", required=True)
|
|
36
|
+
parser.add_argument("--backbone", "--backbone-repo", dest="backbone", default="neuphonic/neutts-air-q4-gguf")
|
|
37
|
+
parser.add_argument("--codec", "--codec-repo", dest="codec", default="neuphonic/neucodec")
|
|
38
|
+
parser.add_argument("--backbone-device", default="cpu")
|
|
39
|
+
parser.add_argument("--codec-device", default="cpu")
|
|
40
|
+
parser.add_argument("--ref-audio", default="")
|
|
41
|
+
parser.add_argument("--ref-text", default="")
|
|
42
|
+
parser.add_argument("--ref-text-file", default="")
|
|
43
|
+
parser.add_argument("--language", default="en")
|
|
44
|
+
parser.add_argument("--sample-rate", type=int, default=24000)
|
|
45
|
+
parser.add_argument("--cache-ref", action="store_true")
|
|
46
|
+
return parser.parse_args()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def main() -> int:
|
|
50
|
+
args = parse_args()
|
|
51
|
+
root = repo_root()
|
|
52
|
+
vendor = root / "vendor" / "neutts-air"
|
|
53
|
+
if vendor.exists():
|
|
54
|
+
sys.path.insert(0, str(vendor))
|
|
55
|
+
try:
|
|
56
|
+
import soundfile as sf
|
|
57
|
+
import torch
|
|
58
|
+
from neutts import NeuTTS
|
|
59
|
+
except Exception as exc:
|
|
60
|
+
print(f"NeuTTS-Air dependencies are missing: {exc}", file=sys.stderr, flush=True)
|
|
61
|
+
return 127
|
|
62
|
+
|
|
63
|
+
ref_audio = resolve(root, args.ref_audio)
|
|
64
|
+
ref_text = read_text_arg(args.ref_text_file) or read_text_arg(args.ref_text)
|
|
65
|
+
if not ref_audio or not Path(ref_audio).exists():
|
|
66
|
+
print(f"NeuTTS-Air reference audio not found: {ref_audio}", file=sys.stderr, flush=True)
|
|
67
|
+
return 66
|
|
68
|
+
if not ref_text:
|
|
69
|
+
# Fall back to a short generic transcript; users should configure NEUTTSAIR_REF_TEXT
|
|
70
|
+
# or NEUTTSAIR_REF_TEXT_FILE for best cloning quality.
|
|
71
|
+
ref_text = "This is a reference voice sample."
|
|
72
|
+
|
|
73
|
+
out = Path(args.output).expanduser()
|
|
74
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
cache_path = Path(ref_audio).with_suffix(".neutts.pt")
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
print(f"[neutts-air] loading backbone={args.backbone} codec={args.codec}", file=sys.stderr, flush=True)
|
|
79
|
+
tts = NeuTTS(
|
|
80
|
+
backbone_repo=args.backbone,
|
|
81
|
+
backbone_device=args.backbone_device,
|
|
82
|
+
codec_repo=args.codec,
|
|
83
|
+
codec_device=args.codec_device,
|
|
84
|
+
)
|
|
85
|
+
if args.cache_ref and cache_path.exists():
|
|
86
|
+
print(f"[neutts-air] loading cached reference {cache_path}", file=sys.stderr, flush=True)
|
|
87
|
+
ref_codes = torch.load(cache_path, map_location="cpu", weights_only=False)
|
|
88
|
+
else:
|
|
89
|
+
print(f"[neutts-air] encoding reference {ref_audio}", file=sys.stderr, flush=True)
|
|
90
|
+
ref_codes = tts.encode_reference(ref_audio)
|
|
91
|
+
if args.cache_ref:
|
|
92
|
+
torch.save(ref_codes, cache_path)
|
|
93
|
+
print(f"[neutts-air] generating chars={len(args.text)}", file=sys.stderr, flush=True)
|
|
94
|
+
wav = tts.infer(args.text, ref_codes, ref_text)
|
|
95
|
+
sf.write(str(out), wav, args.sample_rate)
|
|
96
|
+
print(f"[neutts-air] wrote {out}", file=sys.stderr, flush=True)
|
|
97
|
+
return 0
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
print(f"NeuTTS-Air synthesis failed: {exc}", file=sys.stderr, flush=True)
|
|
100
|
+
return 1
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
if __name__ == "__main__":
|
|
104
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Synthesize speech with k2-fsa OmniVoice for VerbalCoding.
|
|
3
|
+
|
|
4
|
+
The wrapper keeps the Node bridge independent from OmniVoice's Python runtime.
|
|
5
|
+
It accepts one text chunk and writes a 24 kHz WAV file.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import inspect
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _torch_dtype(name: str):
|
|
18
|
+
import torch
|
|
19
|
+
|
|
20
|
+
normalized = (name or "").lower()
|
|
21
|
+
if normalized in {"auto", ""}:
|
|
22
|
+
return None
|
|
23
|
+
if normalized in {"float16", "fp16", "half"}:
|
|
24
|
+
return torch.float16
|
|
25
|
+
if normalized in {"bfloat16", "bf16"}:
|
|
26
|
+
return torch.bfloat16
|
|
27
|
+
if normalized in {"float32", "fp32"}:
|
|
28
|
+
return torch.float32
|
|
29
|
+
raise ValueError(f"Unsupported OmniVoice dtype: {name}")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _filtered_call(fn, **kwargs: Any):
|
|
33
|
+
"""Call fn with only supported kwargs when the signature is inspectable."""
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
sig = inspect.signature(fn)
|
|
37
|
+
except (TypeError, ValueError):
|
|
38
|
+
return fn(**{k: v for k, v in kwargs.items() if v not in (None, "")})
|
|
39
|
+
|
|
40
|
+
accepts_kwargs = any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values())
|
|
41
|
+
clean = {k: v for k, v in kwargs.items() if v not in (None, "")}
|
|
42
|
+
if accepts_kwargs:
|
|
43
|
+
return fn(**clean)
|
|
44
|
+
return fn(**{k: v for k, v in clean.items() if k in sig.parameters})
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def synthesize(args: argparse.Namespace) -> None:
|
|
48
|
+
try:
|
|
49
|
+
import soundfile as sf
|
|
50
|
+
import torch
|
|
51
|
+
from omnivoice import OmniVoice
|
|
52
|
+
except Exception as exc: # pragma: no cover - exercised in real install
|
|
53
|
+
raise RuntimeError(
|
|
54
|
+
"OmniVoice dependencies are missing. Install them in OMNIVOICE_PYTHON's environment: "
|
|
55
|
+
"pip install torch torchaudio soundfile omnivoice"
|
|
56
|
+
) from exc
|
|
57
|
+
|
|
58
|
+
dtype = _torch_dtype(args.dtype)
|
|
59
|
+
load_kwargs = {"device_map": args.device}
|
|
60
|
+
if dtype is not None:
|
|
61
|
+
load_kwargs["dtype"] = dtype
|
|
62
|
+
|
|
63
|
+
model = OmniVoice.from_pretrained(args.model, **load_kwargs)
|
|
64
|
+
if hasattr(torch, "set_grad_enabled"):
|
|
65
|
+
torch.set_grad_enabled(False)
|
|
66
|
+
|
|
67
|
+
audio = _filtered_call(
|
|
68
|
+
model.generate,
|
|
69
|
+
text=args.text,
|
|
70
|
+
ref_audio=args.ref_audio,
|
|
71
|
+
ref_text=args.ref_text,
|
|
72
|
+
language=args.language,
|
|
73
|
+
speaker=args.speaker,
|
|
74
|
+
)
|
|
75
|
+
if isinstance(audio, tuple):
|
|
76
|
+
audio = audio[0]
|
|
77
|
+
if isinstance(audio, list):
|
|
78
|
+
if not audio:
|
|
79
|
+
raise RuntimeError("OmniVoice returned no audio")
|
|
80
|
+
audio = audio[0]
|
|
81
|
+
|
|
82
|
+
out = Path(args.output)
|
|
83
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
84
|
+
sf.write(str(out), audio, 24000)
|
|
85
|
+
if not out.exists() or out.stat().st_size <= 0:
|
|
86
|
+
raise RuntimeError(f"OmniVoice wrote empty output: {out}")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def main(argv: list[str] | None = None) -> int:
|
|
90
|
+
parser = argparse.ArgumentParser(description="VerbalCoding OmniVoice TTS wrapper")
|
|
91
|
+
parser.add_argument("--text", required=True)
|
|
92
|
+
parser.add_argument("--output", required=True)
|
|
93
|
+
parser.add_argument("--model", default="k2-fsa/OmniVoice")
|
|
94
|
+
parser.add_argument("--device", default="mps")
|
|
95
|
+
parser.add_argument("--dtype", default="float16")
|
|
96
|
+
parser.add_argument("--ref-audio", default="")
|
|
97
|
+
parser.add_argument("--ref-text", default="")
|
|
98
|
+
parser.add_argument("--language", default="ko")
|
|
99
|
+
parser.add_argument("--speaker", default="")
|
|
100
|
+
args = parser.parse_args(argv)
|
|
101
|
+
synthesize(args)
|
|
102
|
+
return 0
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
if __name__ == "__main__": # pragma: no cover
|
|
106
|
+
try:
|
|
107
|
+
raise SystemExit(main())
|
|
108
|
+
except Exception as exc:
|
|
109
|
+
print(f"OmniVoice synthesis failed: {exc}", file=sys.stderr)
|
|
110
|
+
raise SystemExit(1)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "verbalcoding",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.13",
|
|
4
4
|
"description": "Discord voice bridge for CLI coding agents.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -35,6 +35,10 @@
|
|
|
35
35
|
"scripts/*.mjs",
|
|
36
36
|
"scripts/*.sh",
|
|
37
37
|
"integrations/openvoice/*.py",
|
|
38
|
+
"integrations/omnivoice/*.py",
|
|
39
|
+
"integrations/fireredtts2/*.py",
|
|
40
|
+
"integrations/mlxaudio/*.py",
|
|
41
|
+
"integrations/neuttsair/*.py",
|
|
38
42
|
"run.sh",
|
|
39
43
|
".env.example",
|
|
40
44
|
"README.md",
|
|
@@ -42,6 +46,7 @@
|
|
|
42
46
|
"LICENSE"
|
|
43
47
|
],
|
|
44
48
|
"scripts": {
|
|
49
|
+
"postinstall": "node scripts/postinstall.mjs",
|
|
45
50
|
"start": "node app-node/main.mjs",
|
|
46
51
|
"setup": "node scripts/install.mjs",
|
|
47
52
|
"doctor": "node scripts/doctor.mjs",
|
package/scripts/cli.mjs
CHANGED
|
@@ -10,7 +10,17 @@ import {
|
|
|
10
10
|
normalizeInstanceAnswers,
|
|
11
11
|
parseKeyValueEnv,
|
|
12
12
|
renderInstanceSetupSummary,
|
|
13
|
+
SUPPORTED_TTS_BACKENDS,
|
|
13
14
|
} from '../app-node/install_config.mjs';
|
|
15
|
+
import {
|
|
16
|
+
applyTtsVoiceSelectionToEnv,
|
|
17
|
+
defaultTtsVoiceConfig,
|
|
18
|
+
effectiveTtsVoiceSelection,
|
|
19
|
+
readTtsVoiceConfig,
|
|
20
|
+
updateTtsVoiceConfig,
|
|
21
|
+
writeTtsVoiceConfig,
|
|
22
|
+
} from '../app-node/tts_voice_config.mjs';
|
|
23
|
+
import { normalizeTtsBackendName } from '../app-node/tts_settings.mjs';
|
|
14
24
|
import { ensureHermesProfile, validateProfileName } from '../app-node/hermes_profiles.mjs';
|
|
15
25
|
import { checkInstanceConfigs } from '../app-node/instance_doctor.mjs';
|
|
16
26
|
import { healInstanceProfileFromEnv } from '../app-node/instance_profile_lifecycle.mjs';
|
|
@@ -38,6 +48,8 @@ Usage:
|
|
|
38
48
|
vc language <ko|en|auto>
|
|
39
49
|
vc language status
|
|
40
50
|
vc restart auto <on|off|status>
|
|
51
|
+
vc tts backend <${SUPPORTED_TTS_BACKENDS.join('|')}>
|
|
52
|
+
vc tts status
|
|
41
53
|
vc bot invite <client-id> [--guild <guild-id>]
|
|
42
54
|
vc instance list
|
|
43
55
|
vc instance setup [name] [--start]
|
|
@@ -57,6 +69,7 @@ Examples:
|
|
|
57
69
|
vc language en
|
|
58
70
|
vc language ko
|
|
59
71
|
vc language auto
|
|
72
|
+
vc tts backend qwen3
|
|
60
73
|
vc restart auto off
|
|
61
74
|
vc bot invite 123456789012345678
|
|
62
75
|
`;
|
|
@@ -97,6 +110,56 @@ function printLanguageStatus(values) {
|
|
|
97
110
|
console.log(`TTS voice: ${s.ttsVoice}`);
|
|
98
111
|
}
|
|
99
112
|
|
|
113
|
+
function ttsVoiceConfigPath(values = readEnvFile()) {
|
|
114
|
+
const configured = values.TTS_VOICE_CONFIG || process.env.TTS_VOICE_CONFIG || path.join('config', 'tts-voices.json');
|
|
115
|
+
return path.isAbsolute(configured) ? configured : path.join(ROOT, configured);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function compactUpdates(updates) {
|
|
119
|
+
return Object.fromEntries(Object.entries(updates).filter(([, value]) => value != null && value !== ''));
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function printTtsStatus() {
|
|
123
|
+
const env = readEnvFile();
|
|
124
|
+
const config = readTtsVoiceConfig(ttsVoiceConfigPath(env), defaultTtsVoiceConfig());
|
|
125
|
+
const selected = effectiveTtsVoiceSelection(config, env);
|
|
126
|
+
console.log(`TTS backend: ${selected.backend}`);
|
|
127
|
+
console.log(`TTS voice type: ${selected.voiceType}`);
|
|
128
|
+
console.log(`TTS voice: ${selected.voice?.label || selected.voice?.voice || '-'}`);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function setTtsBackendFromCli(rawBackend, rawVoiceType = '') {
|
|
132
|
+
const backend = normalizeTtsBackendName(rawBackend, '');
|
|
133
|
+
if (!backend) {
|
|
134
|
+
throw new Error(`Unknown TTS backend: ${rawBackend}. Supported: ${SUPPORTED_TTS_BACKENDS.join(', ')}`);
|
|
135
|
+
}
|
|
136
|
+
const env = readEnvFile();
|
|
137
|
+
const configPath = ttsVoiceConfigPath(env);
|
|
138
|
+
const baseConfig = readTtsVoiceConfig(configPath, defaultTtsVoiceConfig());
|
|
139
|
+
const nextConfig = updateTtsVoiceConfig(baseConfig, { backend, voiceType: rawVoiceType });
|
|
140
|
+
writeTtsVoiceConfig(configPath, nextConfig);
|
|
141
|
+
const selected = effectiveTtsVoiceSelection(nextConfig, { ...env, TTS_BACKEND: backend, TTS_VOICE_TYPE: rawVoiceType || env.TTS_VOICE_TYPE });
|
|
142
|
+
const nextEnv = applyTtsVoiceSelectionToEnv(env, selected);
|
|
143
|
+
upsertEnvFile(ENV_PATH, compactUpdates({
|
|
144
|
+
TTS_BACKEND: nextEnv.TTS_BACKEND,
|
|
145
|
+
TTS_VOICE_TYPE: nextEnv.TTS_VOICE_TYPE,
|
|
146
|
+
TTS_VOICE: nextEnv.TTS_VOICE || env.TTS_VOICE,
|
|
147
|
+
VOICE_LANGUAGE: nextEnv.VOICE_LANGUAGE || env.VOICE_LANGUAGE,
|
|
148
|
+
QWEN3TTS_MODE: nextEnv.QWEN3TTS_MODE,
|
|
149
|
+
QWEN3TTS_SPEAKER: nextEnv.QWEN3TTS_SPEAKER,
|
|
150
|
+
QWEN3TTS_REF_AUDIO: nextEnv.QWEN3TTS_REF_AUDIO,
|
|
151
|
+
QWEN3TTS_INSTRUCT: nextEnv.QWEN3TTS_INSTRUCT,
|
|
152
|
+
MLXAUDIO_VOICE: nextEnv.MLXAUDIO_VOICE,
|
|
153
|
+
FIREREDTTS2_PROMPT_AUDIO: nextEnv.FIREREDTTS2_PROMPT_AUDIO,
|
|
154
|
+
MOSSTTSNANO_MODE: nextEnv.MOSSTTSNANO_MODE,
|
|
155
|
+
MOSSTTSNANO_PROMPT_AUDIO: nextEnv.MOSSTTSNANO_PROMPT_AUDIO,
|
|
156
|
+
}));
|
|
157
|
+
console.log(`Updated ${ENV_PATH}`);
|
|
158
|
+
console.log(`TTS backend: ${selected.backend}`);
|
|
159
|
+
console.log(`TTS voice type: ${selected.voiceType}`);
|
|
160
|
+
console.log('Restart the bridge for CLI changes to take effect; voice requests switch the running bridge immediately.');
|
|
161
|
+
}
|
|
162
|
+
|
|
100
163
|
function printInstanceStatus(statuses) {
|
|
101
164
|
if (statuses.length === 0) {
|
|
102
165
|
console.log('No instance env files found in instances/*.env');
|
|
@@ -308,9 +371,30 @@ async function main(argv = process.argv.slice(2)) {
|
|
|
308
371
|
}
|
|
309
372
|
if (command === 'status') {
|
|
310
373
|
printLanguageStatus(readEnvFile());
|
|
374
|
+
printTtsStatus();
|
|
311
375
|
console.log(autoRestartStatusText(readEnvFile()));
|
|
312
376
|
return;
|
|
313
377
|
}
|
|
378
|
+
if (command === 'tts' || command === 'voice') {
|
|
379
|
+
if (!subcommand || subcommand === 'status') {
|
|
380
|
+
printTtsStatus();
|
|
381
|
+
return;
|
|
382
|
+
}
|
|
383
|
+
if (subcommand === 'backend' || subcommand === 'switch' || subcommand === 'set') {
|
|
384
|
+
const backend = argv[2];
|
|
385
|
+
const voiceType = argv.includes('--voice-type') ? argv[argv.indexOf('--voice-type') + 1] : '';
|
|
386
|
+
if (!backend || backend.startsWith('--')) {
|
|
387
|
+
console.error(`Use: vc tts backend <${SUPPORTED_TTS_BACKENDS.join('|')}> [--voice-type <name>]`);
|
|
388
|
+
process.exitCode = 2;
|
|
389
|
+
return;
|
|
390
|
+
}
|
|
391
|
+
setTtsBackendFromCli(backend, voiceType);
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
console.error('Use: vc tts status OR vc tts backend <name>');
|
|
395
|
+
process.exitCode = 2;
|
|
396
|
+
return;
|
|
397
|
+
}
|
|
314
398
|
if (command === 'instance') {
|
|
315
399
|
await handleInstanceCommand(argv);
|
|
316
400
|
return;
|
package/scripts/doctor.mjs
CHANGED
|
@@ -20,12 +20,25 @@ function readEnvFile(file) {
|
|
|
20
20
|
}
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
function dropUnexpandedRefs(env) {
|
|
24
|
+
// parseKeyValueEnv does no shell expansion, so values like
|
|
25
|
+
// PATH="$JAVA_HOME/bin:$PATH"
|
|
26
|
+
// would otherwise clobber process.env.PATH with a literal "$VAR" string,
|
|
27
|
+
// breaking every JS-level PATH lookup (e.g. agent_detect.defaultWhich).
|
|
28
|
+
const out = {};
|
|
29
|
+
for (const [key, value] of Object.entries(env)) {
|
|
30
|
+
if (typeof value === 'string' && /\$[A-Za-z_][A-Za-z0-9_]*|\$\{[^}]+\}/.test(value)) continue;
|
|
31
|
+
out[key] = value;
|
|
32
|
+
}
|
|
33
|
+
return out;
|
|
34
|
+
}
|
|
35
|
+
|
|
23
36
|
function mergeEnv() {
|
|
24
37
|
// Project .env intentionally wins over ~/.zshrc so local setup is reproducible.
|
|
25
38
|
return {
|
|
26
39
|
...process.env,
|
|
27
|
-
...readEnvFile(path.join(process.env.HOME || '', '.zshrc')),
|
|
28
|
-
...readEnvFile(path.join(ROOT, '.env')),
|
|
40
|
+
...dropUnexpandedRefs(readEnvFile(path.join(process.env.HOME || '', '.zshrc'))),
|
|
41
|
+
...dropUnexpandedRefs(readEnvFile(path.join(ROOT, '.env'))),
|
|
29
42
|
};
|
|
30
43
|
}
|
|
31
44
|
|
|
@@ -105,10 +118,43 @@ function fixablePrerequisites(env) {
|
|
|
105
118
|
const edgeCommand = env.EDGE_TTS_COMMAND || env.TTS_EDGE_COMMAND || 'edge-tts';
|
|
106
119
|
if (!resolveCommand(edgeCommand, [path.join(ROOT, '.venv-tts', 'bin', 'edge-tts')])) missing.push('edge-tts');
|
|
107
120
|
}
|
|
121
|
+
if (ttsBackend === 'fireredtts2') {
|
|
122
|
+
const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
|
|
123
|
+
const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
|
|
124
|
+
const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
|
|
125
|
+
if (!isExecutable(firePath) || !fs.existsSync(fireModel)) missing.push('FireRedTTS-2');
|
|
126
|
+
}
|
|
108
127
|
if (backend === 'hermes' && !commandExists('hermes')) missing.push('hermes CLI');
|
|
109
128
|
return missing;
|
|
110
129
|
}
|
|
111
130
|
|
|
131
|
+
function installFireRedTts2IfNeeded(env) {
|
|
132
|
+
const ttsBackend = (env.TTS_BACKEND || 'edge').toLowerCase();
|
|
133
|
+
if (ttsBackend !== 'fireredtts2') return false;
|
|
134
|
+
const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
|
|
135
|
+
const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
|
|
136
|
+
const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
|
|
137
|
+
if (isExecutable(firePath) && fs.existsSync(fireModel)) return false;
|
|
138
|
+
if (['0', 'false', 'no', 'off'].includes(String(process.env.VERBALCODING_DOCTOR_INSTALL_FIREREDTTS2 || '1').toLowerCase())) {
|
|
139
|
+
console.log('Skipping FireRedTTS-2 auto-install because VERBALCODING_DOCTOR_INSTALL_FIREREDTTS2 is off.');
|
|
140
|
+
return false;
|
|
141
|
+
}
|
|
142
|
+
console.log('VerbalCoding doctor: TTS_BACKEND=fireredtts2 but FireRedTTS-2 is missing; installing...');
|
|
143
|
+
const result = spawnSync('bash', [path.join(ROOT, 'scripts', 'install_fireredtts2.sh'), '--yes'], {
|
|
144
|
+
cwd: ROOT,
|
|
145
|
+
stdio: 'inherit',
|
|
146
|
+
env: process.env,
|
|
147
|
+
});
|
|
148
|
+
if (result.status !== 0) {
|
|
149
|
+
console.log(`FireRedTTS-2 installer exited with status ${result.status}. Continuing with checks.`);
|
|
150
|
+
}
|
|
151
|
+
upsertEnvFile(path.join(ROOT, '.env'), {
|
|
152
|
+
FIREREDTTS2_COMMAND: './.local/bin/fireredtts2',
|
|
153
|
+
FIREREDTTS2_PRETRAINED_DIR: 'pretrained_models/FireRedTTS2',
|
|
154
|
+
});
|
|
155
|
+
return true;
|
|
156
|
+
}
|
|
157
|
+
|
|
112
158
|
function installHermesCliIfNeeded(env) {
|
|
113
159
|
const backend = (env.AGENT_BACKEND || 'hermes').toLowerCase();
|
|
114
160
|
if (backend !== 'hermes' || commandExists('hermes')) return false;
|
|
@@ -192,6 +238,11 @@ if (autoFixEnabled && missingBeforeFix.length > 0) {
|
|
|
192
238
|
env = mergeEnv();
|
|
193
239
|
}
|
|
194
240
|
if (autoFixEnabled) {
|
|
241
|
+
const fireAttempted = installFireRedTts2IfNeeded(env);
|
|
242
|
+
if (fireAttempted) {
|
|
243
|
+
console.log('');
|
|
244
|
+
env = mergeEnv();
|
|
245
|
+
}
|
|
195
246
|
const hermesAttempted = installHermesCliIfNeeded(env);
|
|
196
247
|
if (hermesAttempted) {
|
|
197
248
|
console.log('');
|
|
@@ -246,8 +297,8 @@ note('Progress/voice language', env.VOICE_LANGUAGE || env.WHISPER_CPP_LANGUAGE |
|
|
|
246
297
|
note('Latency log path', env.LATENCY_LOG_PATH || './.logs/latency.jsonl');
|
|
247
298
|
note('TTS voice fallback', env.TTS_VOICE || 'ko-KR-SunHiNeural');
|
|
248
299
|
|
|
249
|
-
if (!['edge', 'openvoice', 'speechswift', 'supertonic'].includes(ttsBackend)) {
|
|
250
|
-
ok = check('TTS_BACKEND value', false, 'must be edge, openvoice, speechswift, or
|
|
300
|
+
if (!['edge', 'openvoice', 'speechswift', 'supertonic', 'omnivoice', 'qwen3tts', 'mlxaudio', 'fireredtts2', 'mossttsnano', 'neuttsair'].includes(ttsBackend)) {
|
|
301
|
+
ok = check('TTS_BACKEND value', false, 'must be edge, openvoice, speechswift, supertonic, omnivoice, qwen3tts, mlxaudio, fireredtts2, mossttsnano, or neuttsair') && ok;
|
|
251
302
|
}
|
|
252
303
|
if (ttsBackend === 'edge') {
|
|
253
304
|
const edgeCommand = env.EDGE_TTS_COMMAND || env.TTS_EDGE_COMMAND || 'edge-tts';
|
|
@@ -272,6 +323,55 @@ if (ttsBackend === 'edge') {
|
|
|
272
323
|
ok = check('supertonic CLI', commandExists(supertonicCommand), commandExists(supertonicCommand) || 'install with: python3 -m pip install supertonic') && ok;
|
|
273
324
|
note('Supertonic voice/lang/steps', `${env.SUPERTONIC_VOICE || 'M1'} / ${env.SUPERTONIC_LANGUAGE || 'ko'} / ${env.SUPERTONIC_STEPS || '2'}`);
|
|
274
325
|
note('Supertonic progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.SUPERTONIC_PROGRESS || '0').toLowerCase()) ? 'supertonic' : 'edge fallback');
|
|
326
|
+
} else if (ttsBackend === 'omnivoice') {
|
|
327
|
+
const omniPython = env.OMNIVOICE_PYTHON || path.join(ROOT, '.venv-omnivoice', 'bin', 'python');
|
|
328
|
+
const resolvedOmniPython = path.isAbsolute(omniPython) ? omniPython : path.resolve(ROOT, omniPython);
|
|
329
|
+
const refAudio = path.resolve(ROOT, env.OMNIVOICE_REF_AUDIO || env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
|
|
330
|
+
ok = check('OmniVoice Python', fs.existsSync(resolvedOmniPython) || commandExists(omniPython), fs.existsSync(resolvedOmniPython) ? path.relative(ROOT, resolvedOmniPython) : 'install with: python -m venv .venv-omnivoice && .venv-omnivoice/bin/pip install torch torchaudio soundfile omnivoice') && ok;
|
|
331
|
+
ok = check('OmniVoice reference audio', fs.existsSync(refAudio), path.relative(ROOT, refAudio)) && ok;
|
|
332
|
+
ok = check('OmniVoice synth wrapper help', spawnSync(fs.existsSync(resolvedOmniPython) ? resolvedOmniPython : 'python3', ['integrations/omnivoice/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/omnivoice/synth.py') && ok;
|
|
333
|
+
note('OmniVoice model/device', `${env.OMNIVOICE_MODEL || 'k2-fsa/OmniVoice'} / ${env.OMNIVOICE_DEVICE || 'mps'}`);
|
|
334
|
+
note('OmniVoice progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.OMNIVOICE_PROGRESS || '0').toLowerCase()) ? 'omnivoice' : 'edge fallback');
|
|
335
|
+
} else if (ttsBackend === 'qwen3tts') {
|
|
336
|
+
const qwenCommand = env.QWEN3TTS_COMMAND || 'audio';
|
|
337
|
+
ok = check('Qwen3 TTS audio CLI', commandExists(qwenCommand), commandExists(qwenCommand) || 'install speech-swift/audio first') && ok;
|
|
338
|
+
note('Qwen3 speaker', env.QWEN3TTS_SPEAKER || 'sohee');
|
|
339
|
+
note('Qwen3 progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.QWEN3TTS_PROGRESS || '0').toLowerCase()) ? 'qwen3tts' : 'edge fallback');
|
|
340
|
+
} else if (ttsBackend === 'mlxaudio') {
|
|
341
|
+
const mlxPython = env.MLXAUDIO_PYTHON || './.venv-mlxaudio/bin/python';
|
|
342
|
+
const mlxPath = path.isAbsolute(mlxPython) ? mlxPython : path.resolve(ROOT, mlxPython);
|
|
343
|
+
ok = check('MLX Audio Python', isExecutable(mlxPath) || commandExists(mlxPython), isExecutable(mlxPath) ? path.relative(ROOT, mlxPath) : (commandExists(mlxPython) || 'install with: scripts/install_mlxaudio.sh --yes')) && ok;
|
|
344
|
+
ok = check('MLX Audio wrapper help', spawnSync(isExecutable(mlxPath) ? mlxPath : 'python3', ['integrations/mlxaudio/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/mlxaudio/synth.py') && ok;
|
|
345
|
+
note('MLX Audio model/voice', `${env.MLXAUDIO_MODEL || 'mlx-community/Qwen3-TTS-12Hz-1.7B-Base-8bit'} / ${env.MLXAUDIO_VOICE || 'Chelsie'}`);
|
|
346
|
+
note('MLX Audio progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.MLXAUDIO_PROGRESS || '0').toLowerCase()) ? 'mlxaudio' : 'edge fallback');
|
|
347
|
+
} else if (ttsBackend === 'fireredtts2') {
|
|
348
|
+
const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
|
|
349
|
+
const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
|
|
350
|
+
const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
|
|
351
|
+
ok = check('FireRedTTS-2 wrapper', isExecutable(firePath), path.relative(ROOT, firePath) || firePath) && ok;
|
|
352
|
+
ok = check('FireRedTTS-2 model', fs.existsSync(fireModel), path.relative(ROOT, fireModel)) && ok;
|
|
353
|
+
ok = check('FireRedTTS-2 synth wrapper help', spawnSync(isExecutable(firePath) ? firePath : process.execPath, isExecutable(firePath) ? ['--help'] : ['integrations/fireredtts2/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/fireredtts2/synth.py') && ok;
|
|
354
|
+
note('FireRedTTS-2 progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.FIREREDTTS2_PROGRESS || '0').toLowerCase()) ? 'fireredtts2' : 'edge fallback');
|
|
355
|
+
} else if (ttsBackend === 'mossttsnano') {
|
|
356
|
+
const mossCommand = env.MOSSTTSNANO_COMMAND || './.venv-mossttsnano/bin/python';
|
|
357
|
+
const mossPath = path.isAbsolute(mossCommand) ? mossCommand : path.resolve(ROOT, mossCommand);
|
|
358
|
+
const mossScript = path.resolve(ROOT, env.MOSSTTSNANO_SCRIPT || 'vendor/MOSS-TTS-Nano/infer.py');
|
|
359
|
+
ok = check('MOSS-TTS-Nano Python', isExecutable(mossPath) || commandExists(mossCommand), isExecutable(mossPath) ? path.relative(ROOT, mossPath) : (commandExists(mossCommand) || 'missing')) && ok;
|
|
360
|
+
ok = check('MOSS-TTS-Nano infer.py', fs.existsSync(mossScript), path.relative(ROOT, mossScript)) && ok;
|
|
361
|
+
note('MOSS checkpoint', env.MOSSTTSNANO_CHECKPOINT || 'OpenMOSS-Team/MOSS-TTS-Nano');
|
|
362
|
+
note('MOSS progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.MOSSTTSNANO_PROGRESS || '0').toLowerCase()) ? 'mossttsnano' : 'edge fallback');
|
|
363
|
+
} else if (ttsBackend === 'neuttsair') {
|
|
364
|
+
const neuPython = env.NEUTTSAIR_PYTHON || './.venv-neuttsair/bin/python';
|
|
365
|
+
const neuPath = path.isAbsolute(neuPython) ? neuPython : path.resolve(ROOT, neuPython);
|
|
366
|
+
const neuScript = path.resolve(ROOT, env.NEUTTSAIR_SCRIPT || 'integrations/neuttsair/synth.py');
|
|
367
|
+
const refAudio = path.resolve(ROOT, env.NEUTTSAIR_REF_AUDIO || env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
|
|
368
|
+
ok = check('NeuTTS Air Python', isExecutable(neuPath) || commandExists(neuPython), isExecutable(neuPath) ? path.relative(ROOT, neuPath) : (commandExists(neuPython) || 'install with: python3 -m venv .venv-neuttsair && .venv-neuttsair/bin/pip install -e vendor/neutts-air')) && ok;
|
|
369
|
+
ok = check('NeuTTS Air wrapper', fs.existsSync(neuScript), path.relative(ROOT, neuScript)) && ok;
|
|
370
|
+
ok = check('NeuTTS Air reference audio', fs.existsSync(refAudio), path.relative(ROOT, refAudio)) && ok;
|
|
371
|
+
ok = check('NeuTTS Air synth wrapper help', spawnSync(isExecutable(neuPath) ? neuPath : 'python3', ['integrations/neuttsair/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/neuttsair/synth.py') && ok;
|
|
372
|
+
note('NeuTTS Air backbone/device', `${env.NEUTTSAIR_BACKBONE_REPO || env.NEUTTSAIR_BACKBONE || 'neuphonic/neutts-air-q4-gguf'} / ${env.NEUTTSAIR_BACKBONE_DEVICE || env.NEUTTSAIR_DEVICE || 'mps'}`);
|
|
373
|
+
note('NeuTTS Air codec/device', `${env.NEUTTSAIR_CODEC_REPO || env.NEUTTSAIR_CODEC || 'neuphonic/neucodec'} / ${env.NEUTTSAIR_CODEC_DEVICE || env.NEUTTSAIR_DEVICE || 'mps'}`);
|
|
374
|
+
note('NeuTTS Air progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.NEUTTSAIR_PROGRESS || '0').toLowerCase()) ? 'neuttsair' : 'edge fallback');
|
|
275
375
|
}
|
|
276
376
|
|
|
277
377
|
const backendCommand = {
|
package/scripts/install.mjs
CHANGED
|
@@ -166,7 +166,7 @@ async function main() {
|
|
|
166
166
|
const autoJoinVoiceChannels = await ask('Auto-join voice channel names', process.env.AUTO_JOIN_VOICE_CHANNELS || '일반,General,general');
|
|
167
167
|
const transcriptChannelId = await ask('Transcript text channel/thread ID', process.env.TRANSCRIPT_CHANNEL_ID || '');
|
|
168
168
|
const language = await ask('Default voice language: ko/en/auto', process.env.VOICE_LANGUAGE || process.env.WHISPER_CPP_LANGUAGE || process.env.STT_LANGUAGE || 'ko');
|
|
169
|
-
const ttsBackend = await ask('TTS backend: edge/openvoice/speechswift/supertonic', process.env.TTS_BACKEND || 'edge');
|
|
169
|
+
const ttsBackend = await ask('TTS backend: edge/openvoice/speechswift/supertonic/omnivoice/qwen3tts/mlxaudio/fireredtts2/mossttsnano/neuttsair', process.env.TTS_BACKEND || 'edge');
|
|
170
170
|
const edgeTtsCommand = await ask('Edge TTS command', process.env.EDGE_TTS_COMMAND || process.env.TTS_EDGE_COMMAND || 'edge-tts');
|
|
171
171
|
const ttsVoice = await ask('TTS voice', process.env.TTS_VOICE || 'ko-KR-SunHiNeural');
|
|
172
172
|
const ttsRate = await ask('TTS rate', process.env.TTS_RATE || '+10%');
|
|
@@ -179,6 +179,8 @@ async function main() {
|
|
|
179
179
|
const openvoiceDir = await ask('OpenVoice repo dir', process.env.OPENVOICE_DIR || './vendor/OpenVoice');
|
|
180
180
|
const openvoiceVenv = await ask('OpenVoice venv dir', process.env.OPENVOICE_VENV || './.venv-openvoice');
|
|
181
181
|
const openvoiceRefAudio = await ask('OpenVoice reference audio path', process.env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
|
|
182
|
+
const omnivoicePython = await ask('OmniVoice Python', process.env.OMNIVOICE_PYTHON || './.venv-omnivoice/bin/python');
|
|
183
|
+
const omnivoiceRefAudio = await ask('OmniVoice reference audio path', process.env.OMNIVOICE_REF_AUDIO || process.env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
|
|
182
184
|
const requireWake = (await ask('Require wake word? 1/0', process.env.REQUIRE_WAKE_WORD || '0')) === '1';
|
|
183
185
|
const verboseProgress = (await ask('Verbose progress by default? 1/0', process.env.AGENT_VERBOSE_PROGRESS || process.env.VERBALCODING_VERBOSE_PROGRESS || '0')) === '1';
|
|
184
186
|
const utteranceIdleMs = await ask('Utterance idle wait before STT, ms', process.env.UTTERANCE_IDLE_MS || '4500');
|
|
@@ -207,6 +209,8 @@ async function main() {
|
|
|
207
209
|
openvoiceDir,
|
|
208
210
|
openvoiceVenv,
|
|
209
211
|
openvoiceRefAudio,
|
|
212
|
+
omnivoicePython,
|
|
213
|
+
omnivoiceRefAudio,
|
|
210
214
|
requireWakeWord: requireWake,
|
|
211
215
|
verboseProgress,
|
|
212
216
|
utteranceIdleMs,
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
|
5
|
+
cd "$ROOT"
|
|
6
|
+
|
|
7
|
+
ASSUME_YES=0
|
|
8
|
+
SKIP_MODEL=0
|
|
9
|
+
SKIP_PIP=0
|
|
10
|
+
for arg in "$@"; do
|
|
11
|
+
case "$arg" in
|
|
12
|
+
-y|--yes) ASSUME_YES=1 ;;
|
|
13
|
+
--skip-model) SKIP_MODEL=1 ;;
|
|
14
|
+
--skip-pip) SKIP_PIP=1 ;;
|
|
15
|
+
-h|--help)
|
|
16
|
+
cat <<'USAGE'
|
|
17
|
+
Usage: scripts/install_fireredtts2.sh [--yes] [--skip-model] [--skip-pip]
|
|
18
|
+
|
|
19
|
+
Installs FireRedTTS-2 for VerbalCoding:
|
|
20
|
+
- clones FireRedTeam/FireRedTTS2 under vendor/FireRedTTS2
|
|
21
|
+
- creates .venv-fireredtts2
|
|
22
|
+
- installs upstream Python dependencies
|
|
23
|
+
- downloads https://huggingface.co/FireRedTeam/FireRedTTS2 weights under pretrained_models/FireRedTTS2
|
|
24
|
+
- creates .local/bin/fireredtts2 wrapper used by TTS_BACKEND=fireredtts2
|
|
25
|
+
|
|
26
|
+
The model is large. Use --skip-model only if FIREREDTTS2_PRETRAINED_DIR points elsewhere.
|
|
27
|
+
USAGE
|
|
28
|
+
exit 0
|
|
29
|
+
;;
|
|
30
|
+
esac
|
|
31
|
+
done
|
|
32
|
+
|
|
33
|
+
log() { printf '==> %s\n' "$*"; }
|
|
34
|
+
warn() { printf 'Warning: %s\n' "$*" >&2; }
|
|
35
|
+
has_cmd() { command -v "$1" >/dev/null 2>&1; }
|
|
36
|
+
confirm() {
|
|
37
|
+
if [ "$ASSUME_YES" = "1" ]; then return 0; fi
|
|
38
|
+
printf '%s [y/N]: ' "$1" >&2
|
|
39
|
+
read -r answer
|
|
40
|
+
case "$answer" in y|Y|yes|YES) return 0 ;; *) return 1 ;; esac
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if [ "$ASSUME_YES" != "1" ]; then
|
|
44
|
+
confirm 'FireRedTTS-2 can download several GB of model/dependency data. Continue?' || exit 2
|
|
45
|
+
fi
|
|
46
|
+
|
|
47
|
+
mkdir -p vendor .local/bin pretrained_models
|
|
48
|
+
|
|
49
|
+
if ! has_cmd git; then
|
|
50
|
+
warn 'git is required to install FireRedTTS-2.'
|
|
51
|
+
exit 1
|
|
52
|
+
fi
|
|
53
|
+
PYTHON_BIN=""
|
|
54
|
+
for candidate in python3.12 python3.11 python3; do
|
|
55
|
+
if has_cmd "$candidate"; then PYTHON_BIN="$candidate"; break; fi
|
|
56
|
+
done
|
|
57
|
+
if [ -z "$PYTHON_BIN" ]; then
|
|
58
|
+
warn 'Python >=3.11 is required to install FireRedTTS-2.'
|
|
59
|
+
exit 1
|
|
60
|
+
fi
|
|
61
|
+
PYTHON_VERSION="$($PYTHON_BIN - <<'PY'
|
|
62
|
+
import sys
|
|
63
|
+
print(f"{sys.version_info.major}.{sys.version_info.minor}")
|
|
64
|
+
PY
|
|
65
|
+
)"
|
|
66
|
+
case "$PYTHON_VERSION" in
|
|
67
|
+
3.11|3.12|3.13*) ;;
|
|
68
|
+
*) warn "FireRedTTS-2 requires Python >=3.11; found $PYTHON_VERSION at $PYTHON_BIN"; exit 1 ;;
|
|
69
|
+
esac
|
|
70
|
+
|
|
71
|
+
if [ ! -d vendor/FireRedTTS2/.git ]; then
|
|
72
|
+
log 'Cloning FireRedTTS-2'
|
|
73
|
+
git clone --depth 1 https://github.com/FireRedTeam/FireRedTTS2.git vendor/FireRedTTS2
|
|
74
|
+
else
|
|
75
|
+
log 'FireRedTTS-2 repo already exists'
|
|
76
|
+
fi
|
|
77
|
+
|
|
78
|
+
if [ "$SKIP_PIP" != "1" ]; then
|
|
79
|
+
if [ ! -x .venv-fireredtts2/bin/python ]; then
|
|
80
|
+
log 'Creating .venv-fireredtts2'
|
|
81
|
+
"$PYTHON_BIN" -m venv .venv-fireredtts2
|
|
82
|
+
fi
|
|
83
|
+
log 'Installing FireRedTTS-2 Python dependencies'
|
|
84
|
+
.venv-fireredtts2/bin/python -m pip install --upgrade pip setuptools wheel
|
|
85
|
+
.venv-fireredtts2/bin/python -m pip install torch torchaudio huggingface_hub
|
|
86
|
+
.venv-fireredtts2/bin/python -m pip install -e vendor/FireRedTTS2
|
|
87
|
+
if [ -f vendor/FireRedTTS2/requirements.txt ]; then
|
|
88
|
+
.venv-fireredtts2/bin/python -m pip install -r vendor/FireRedTTS2/requirements.txt
|
|
89
|
+
fi
|
|
90
|
+
fi
|
|
91
|
+
|
|
92
|
+
if [ "$SKIP_MODEL" != "1" ]; then
|
|
93
|
+
if [ -d pretrained_models/FireRedTTS2 ] && [ "$(find pretrained_models/FireRedTTS2 -mindepth 1 -maxdepth 1 2>/dev/null | head -n 1)" ]; then
|
|
94
|
+
log 'FireRedTTS-2 pretrained model already exists'
|
|
95
|
+
else
|
|
96
|
+
log 'Downloading FireRedTTS-2 weights from https://huggingface.co/FireRedTeam/FireRedTTS2'
|
|
97
|
+
.venv-fireredtts2/bin/huggingface-cli download FireRedTeam/FireRedTTS2 --local-dir pretrained_models/FireRedTTS2 --local-dir-use-symlinks False
|
|
98
|
+
fi
|
|
99
|
+
fi
|
|
100
|
+
|
|
101
|
+
cat > .local/bin/fireredtts2 <<'SH'
|
|
102
|
+
#!/usr/bin/env bash
|
|
103
|
+
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
|
104
|
+
exec "$ROOT/.venv-fireredtts2/bin/python" "$ROOT/integrations/fireredtts2/synth.py" "$@"
|
|
105
|
+
SH
|
|
106
|
+
chmod +x .local/bin/fireredtts2
|
|
107
|
+
|
|
108
|
+
log 'Installed .local/bin/fireredtts2 wrapper'
|
|
109
|
+
log 'Set FIREREDTTS2_COMMAND=./.local/bin/fireredtts2 and TTS_BACKEND=fireredtts2, then restart VerbalCoding.'
|