verbalcoding 0.2.11 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +98 -2
- package/README.es.md +134 -0
- package/README.fr.md +134 -0
- package/README.ja.md +134 -0
- package/README.ko.md +134 -0
- package/README.md +118 -74
- package/README.ru.md +134 -0
- package/README.zh.md +133 -0
- package/app-node/agent_adapters.mjs +37 -5
- package/app-node/agent_adapters.test.mjs +27 -1
- package/app-node/agent_detect.mjs +73 -0
- package/app-node/agent_detect.test.mjs +77 -0
- package/app-node/agent_routing.mjs +148 -0
- package/app-node/agent_routing.test.mjs +138 -0
- package/app-node/agent_turn.mjs +86 -0
- package/app-node/agent_turn.test.mjs +109 -0
- package/app-node/bridge_context.mjs +73 -0
- package/app-node/bridge_context.test.mjs +54 -0
- package/app-node/bridge_state.mjs +4 -0
- package/app-node/bridge_wireup.test.mjs +462 -0
- package/app-node/cli_install.test.mjs +31 -0
- package/app-node/cross_agent_routing.test.mjs +78 -0
- package/app-node/discord_command_router.mjs +204 -0
- package/app-node/discord_command_router.test.mjs +311 -0
- package/app-node/discord_voice_setup.mjs +251 -0
- package/app-node/discord_voice_setup.test.mjs +86 -0
- package/app-node/hermes_profiles.test.mjs +12 -1
- package/app-node/install_config.mjs +113 -3
- package/app-node/install_config.test.mjs +8 -0
- package/app-node/instance_doctor.test.mjs +9 -0
- package/app-node/instances.test.mjs +8 -1
- package/app-node/main.mjs +513 -1058
- package/app-node/mcp_tools.test.mjs +7 -0
- package/app-node/notification_handler.mjs +89 -0
- package/app-node/notification_handler.test.mjs +187 -0
- package/app-node/notify.mjs +73 -0
- package/app-node/notify.test.mjs +68 -0
- package/app-node/plan_dispatcher.mjs +215 -0
- package/app-node/plan_dispatcher.test.mjs +101 -0
- package/app-node/plan_mode.mjs +203 -0
- package/app-node/plan_mode.test.mjs +231 -0
- package/app-node/progress_handler.mjs +220 -0
- package/app-node/progress_handler.test.mjs +193 -0
- package/app-node/progress_speech.mjs +54 -32
- package/app-node/progress_speech.test.mjs +12 -3
- package/app-node/project_sessions.mjs +5 -2
- package/app-node/project_sessions.test.mjs +7 -0
- package/app-node/research_mode.mjs +282 -0
- package/app-node/research_mode.test.mjs +264 -0
- package/app-node/restart_notice.mjs +3 -0
- package/app-node/restart_notice.test.mjs +11 -0
- package/app-node/session_ontology.mjs +271 -0
- package/app-node/session_ontology.test.mjs +130 -0
- package/app-node/smart_progress.mjs +94 -0
- package/app-node/smart_progress.test.mjs +66 -0
- package/app-node/stream_sentencer.mjs +91 -0
- package/app-node/stream_sentencer.test.mjs +129 -0
- package/app-node/streaming_tts_queue.mjs +52 -0
- package/app-node/streaming_tts_queue.test.mjs +64 -0
- package/app-node/stt_whisper.mjs +24 -0
- package/app-node/stt_whisper.test.mjs +32 -0
- package/app-node/text_routing.mjs +22 -0
- package/app-node/text_routing.test.mjs +23 -1
- package/app-node/tts_backends.mjs +537 -3
- package/app-node/tts_backends.test.mjs +454 -0
- package/app-node/tts_player.mjs +164 -0
- package/app-node/tts_player.test.mjs +202 -0
- package/app-node/tts_runtime.mjs +134 -0
- package/app-node/tts_runtime.test.mjs +89 -0
- package/app-node/tts_settings.mjs +150 -3
- package/app-node/tts_settings.test.mjs +204 -0
- package/app-node/tts_voice_config.mjs +136 -2
- package/app-node/tts_voice_config.test.mjs +94 -0
- package/app-node/utterance_router.mjs +216 -0
- package/app-node/utterance_router.test.mjs +236 -0
- package/app-node/voice_autojoin.mjs +37 -0
- package/app-node/voice_autojoin.test.mjs +59 -0
- package/app-node/voice_io.mjs +272 -0
- package/app-node/voice_io.test.mjs +102 -0
- package/app-node/voice_turn_runner.mjs +449 -0
- package/app-node/voice_turn_runner.test.mjs +289 -0
- package/docs/CONFIGURATION.md +79 -96
- package/docs/FRESH_INSTALL.md +105 -63
- package/docs/HARNESSES.md +58 -0
- package/docs/HARNESS_AIDER.md +50 -0
- package/docs/HARNESS_CLAUDE.md +56 -0
- package/docs/HARNESS_CODEX.md +56 -0
- package/docs/HARNESS_CURSOR.md +45 -0
- package/docs/HARNESS_GEMINI.md +45 -0
- package/docs/HARNESS_HERMES.md +57 -0
- package/docs/HARNESS_OPENCLAW.md +44 -0
- package/docs/HARNESS_OPENCODE.md +44 -0
- package/docs/HERMES_VOICE.md +65 -0
- package/docs/MULTI_INSTANCE.md +16 -0
- package/docs/README.md +50 -0
- package/docs/RELEASE.md +42 -19
- package/docs/ROADMAP.md +53 -0
- package/docs/TROUBLESHOOTING.md +126 -0
- package/docs/TTS_BACKENDS.md +227 -0
- package/docs/USAGE.md +94 -40
- package/docs/assets/figures/verbalcoding-flow.svg +1 -1
- package/docs/i18n/AGENTS.es.md +34 -0
- package/docs/i18n/AGENTS.fr.md +34 -0
- package/docs/i18n/AGENTS.ja.md +34 -0
- package/docs/i18n/AGENTS.ko.md +34 -0
- package/docs/i18n/AGENTS.ru.md +34 -0
- package/docs/i18n/AGENTS.zh.md +34 -0
- package/docs/i18n/CONFIGURATION.es.md +25 -0
- package/docs/i18n/CONFIGURATION.fr.md +25 -0
- package/docs/i18n/CONFIGURATION.ja.md +25 -0
- package/docs/i18n/CONFIGURATION.ko.md +25 -0
- package/docs/i18n/CONFIGURATION.ru.md +25 -0
- package/docs/i18n/CONFIGURATION.zh.md +25 -0
- package/docs/i18n/FRESH_INSTALL.es.md +27 -2
- package/docs/i18n/FRESH_INSTALL.fr.md +27 -2
- package/docs/i18n/FRESH_INSTALL.ja.md +27 -2
- package/docs/i18n/FRESH_INSTALL.ko.md +27 -2
- package/docs/i18n/FRESH_INSTALL.ru.md +27 -2
- package/docs/i18n/FRESH_INSTALL.zh.md +27 -2
- package/docs/i18n/HARNESSES.es.md +58 -0
- package/docs/i18n/HARNESSES.fr.md +58 -0
- package/docs/i18n/HARNESSES.ja.md +58 -0
- package/docs/i18n/HARNESSES.ko.md +58 -0
- package/docs/i18n/HARNESSES.ru.md +58 -0
- package/docs/i18n/HARNESSES.zh.md +58 -0
- package/docs/i18n/HARNESS_AIDER.es.md +48 -0
- package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
- package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
- package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
- package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
- package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
- package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
- package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
- package/docs/i18n/HARNESS_CODEX.es.md +55 -0
- package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
- package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
- package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
- package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
- package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
- package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
- package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
- package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
- package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
- package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
- package/docs/i18n/HARNESS_HERMES.es.md +54 -0
- package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
- package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
- package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
- package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
- package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
- package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
- package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
- package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
- package/docs/i18n/HERMES_VOICE.es.md +46 -0
- package/docs/i18n/HERMES_VOICE.fr.md +46 -0
- package/docs/i18n/HERMES_VOICE.ja.md +46 -0
- package/docs/i18n/HERMES_VOICE.ko.md +65 -0
- package/docs/i18n/HERMES_VOICE.ru.md +46 -0
- package/docs/i18n/HERMES_VOICE.zh.md +46 -0
- package/docs/i18n/MULTI_INSTANCE.es.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.fr.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.ja.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.ko.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.ru.md +25 -0
- package/docs/i18n/MULTI_INSTANCE.zh.md +25 -0
- package/docs/i18n/README.es.md +20 -134
- package/docs/i18n/README.fr.md +20 -134
- package/docs/i18n/README.ja.md +20 -134
- package/docs/i18n/README.ko.md +20 -133
- package/docs/i18n/README.ru.md +20 -134
- package/docs/i18n/README.zh.md +20 -133
- package/docs/i18n/RELEASE.es.md +26 -1
- package/docs/i18n/RELEASE.fr.md +26 -1
- package/docs/i18n/RELEASE.ja.md +26 -1
- package/docs/i18n/RELEASE.ko.md +26 -1
- package/docs/i18n/RELEASE.ru.md +26 -1
- package/docs/i18n/RELEASE.zh.md +26 -1
- package/docs/i18n/TROUBLESHOOTING.es.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.fr.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.ja.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.ko.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.ru.md +39 -0
- package/docs/i18n/TROUBLESHOOTING.zh.md +39 -0
- package/docs/i18n/USAGE.es.md +25 -0
- package/docs/i18n/USAGE.fr.md +25 -0
- package/docs/i18n/USAGE.ja.md +25 -0
- package/docs/i18n/USAGE.ko.md +25 -0
- package/docs/i18n/USAGE.ru.md +25 -0
- package/docs/i18n/USAGE.zh.md +25 -0
- package/docs/superpowers/plans/2026-05-13-phase1-streaming-pipeline.md +122 -0
- package/docs/superpowers/plans/2026-05-13-phase10-push-notifications.md +152 -0
- package/docs/superpowers/plans/2026-05-13-phase2-agent-adapters.md +242 -0
- package/docs/superpowers/plans/2026-05-13-phase6-smart-progress.md +172 -0
- package/docs/superpowers/plans/2026-05-13-phase7-voice-plan-mode.md +108 -0
- package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
- package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
- package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
- package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
- package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
- package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
- package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
- package/integrations/fireredtts2/mlx_llm.py +183 -0
- package/integrations/fireredtts2/synth.py +156 -0
- package/integrations/fireredtts2/synth_mlx.py +196 -0
- package/integrations/mlxaudio/synth.py +74 -0
- package/integrations/neuttsair/synth.py +104 -0
- package/integrations/omnivoice/synth.py +110 -0
- package/package.json +7 -1
- package/scripts/cli.mjs +88 -3
- package/scripts/doctor.mjs +115 -4
- package/scripts/install.mjs +20 -2
- package/scripts/install_fireredtts2.sh +109 -0
- package/scripts/install_mlxaudio.sh +34 -0
- package/scripts/install_mossttsnano.sh +46 -0
- package/scripts/postinstall.mjs +34 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def repo_root() -> Path:
|
|
11
|
+
return Path(__file__).resolve().parents[2]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def resolve(root: Path, value: str | None) -> str | None:
|
|
15
|
+
if not value:
|
|
16
|
+
return None
|
|
17
|
+
p = Path(value).expanduser()
|
|
18
|
+
if not p.is_absolute():
|
|
19
|
+
p = root / p
|
|
20
|
+
return str(p)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def read_text_arg(value: str | None) -> str:
|
|
24
|
+
if not value:
|
|
25
|
+
return ""
|
|
26
|
+
p = Path(value).expanduser()
|
|
27
|
+
if p.exists():
|
|
28
|
+
return p.read_text(encoding="utf-8").strip()
|
|
29
|
+
return value.strip()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def parse_args() -> argparse.Namespace:
|
|
33
|
+
parser = argparse.ArgumentParser(description="NeuTTS-Air synthesis wrapper for VerbalCoding")
|
|
34
|
+
parser.add_argument("--text", required=True)
|
|
35
|
+
parser.add_argument("--output", required=True)
|
|
36
|
+
parser.add_argument("--backbone", "--backbone-repo", dest="backbone", default="neuphonic/neutts-air-q4-gguf")
|
|
37
|
+
parser.add_argument("--codec", "--codec-repo", dest="codec", default="neuphonic/neucodec")
|
|
38
|
+
parser.add_argument("--backbone-device", default="cpu")
|
|
39
|
+
parser.add_argument("--codec-device", default="cpu")
|
|
40
|
+
parser.add_argument("--ref-audio", default="")
|
|
41
|
+
parser.add_argument("--ref-text", default="")
|
|
42
|
+
parser.add_argument("--ref-text-file", default="")
|
|
43
|
+
parser.add_argument("--language", default="en")
|
|
44
|
+
parser.add_argument("--sample-rate", type=int, default=24000)
|
|
45
|
+
parser.add_argument("--cache-ref", action="store_true")
|
|
46
|
+
return parser.parse_args()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def main() -> int:
|
|
50
|
+
args = parse_args()
|
|
51
|
+
root = repo_root()
|
|
52
|
+
vendor = root / "vendor" / "neutts-air"
|
|
53
|
+
if vendor.exists():
|
|
54
|
+
sys.path.insert(0, str(vendor))
|
|
55
|
+
try:
|
|
56
|
+
import soundfile as sf
|
|
57
|
+
import torch
|
|
58
|
+
from neutts import NeuTTS
|
|
59
|
+
except Exception as exc:
|
|
60
|
+
print(f"NeuTTS-Air dependencies are missing: {exc}", file=sys.stderr, flush=True)
|
|
61
|
+
return 127
|
|
62
|
+
|
|
63
|
+
ref_audio = resolve(root, args.ref_audio)
|
|
64
|
+
ref_text = read_text_arg(args.ref_text_file) or read_text_arg(args.ref_text)
|
|
65
|
+
if not ref_audio or not Path(ref_audio).exists():
|
|
66
|
+
print(f"NeuTTS-Air reference audio not found: {ref_audio}", file=sys.stderr, flush=True)
|
|
67
|
+
return 66
|
|
68
|
+
if not ref_text:
|
|
69
|
+
# Fall back to a short generic transcript; users should configure NEUTTSAIR_REF_TEXT
|
|
70
|
+
# or NEUTTSAIR_REF_TEXT_FILE for best cloning quality.
|
|
71
|
+
ref_text = "This is a reference voice sample."
|
|
72
|
+
|
|
73
|
+
out = Path(args.output).expanduser()
|
|
74
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
75
|
+
cache_path = Path(ref_audio).with_suffix(".neutts.pt")
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
print(f"[neutts-air] loading backbone={args.backbone} codec={args.codec}", file=sys.stderr, flush=True)
|
|
79
|
+
tts = NeuTTS(
|
|
80
|
+
backbone_repo=args.backbone,
|
|
81
|
+
backbone_device=args.backbone_device,
|
|
82
|
+
codec_repo=args.codec,
|
|
83
|
+
codec_device=args.codec_device,
|
|
84
|
+
)
|
|
85
|
+
if args.cache_ref and cache_path.exists():
|
|
86
|
+
print(f"[neutts-air] loading cached reference {cache_path}", file=sys.stderr, flush=True)
|
|
87
|
+
ref_codes = torch.load(cache_path, map_location="cpu", weights_only=False)
|
|
88
|
+
else:
|
|
89
|
+
print(f"[neutts-air] encoding reference {ref_audio}", file=sys.stderr, flush=True)
|
|
90
|
+
ref_codes = tts.encode_reference(ref_audio)
|
|
91
|
+
if args.cache_ref:
|
|
92
|
+
torch.save(ref_codes, cache_path)
|
|
93
|
+
print(f"[neutts-air] generating chars={len(args.text)}", file=sys.stderr, flush=True)
|
|
94
|
+
wav = tts.infer(args.text, ref_codes, ref_text)
|
|
95
|
+
sf.write(str(out), wav, args.sample_rate)
|
|
96
|
+
print(f"[neutts-air] wrote {out}", file=sys.stderr, flush=True)
|
|
97
|
+
return 0
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
print(f"NeuTTS-Air synthesis failed: {exc}", file=sys.stderr, flush=True)
|
|
100
|
+
return 1
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
if __name__ == "__main__":
|
|
104
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Synthesize speech with k2-fsa OmniVoice for VerbalCoding.
|
|
3
|
+
|
|
4
|
+
The wrapper keeps the Node bridge independent from OmniVoice's Python runtime.
|
|
5
|
+
It accepts one text chunk and writes a 24 kHz WAV file.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import inspect
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _torch_dtype(name: str):
|
|
18
|
+
import torch
|
|
19
|
+
|
|
20
|
+
normalized = (name or "").lower()
|
|
21
|
+
if normalized in {"auto", ""}:
|
|
22
|
+
return None
|
|
23
|
+
if normalized in {"float16", "fp16", "half"}:
|
|
24
|
+
return torch.float16
|
|
25
|
+
if normalized in {"bfloat16", "bf16"}:
|
|
26
|
+
return torch.bfloat16
|
|
27
|
+
if normalized in {"float32", "fp32"}:
|
|
28
|
+
return torch.float32
|
|
29
|
+
raise ValueError(f"Unsupported OmniVoice dtype: {name}")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _filtered_call(fn, **kwargs: Any):
|
|
33
|
+
"""Call fn with only supported kwargs when the signature is inspectable."""
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
sig = inspect.signature(fn)
|
|
37
|
+
except (TypeError, ValueError):
|
|
38
|
+
return fn(**{k: v for k, v in kwargs.items() if v not in (None, "")})
|
|
39
|
+
|
|
40
|
+
accepts_kwargs = any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values())
|
|
41
|
+
clean = {k: v for k, v in kwargs.items() if v not in (None, "")}
|
|
42
|
+
if accepts_kwargs:
|
|
43
|
+
return fn(**clean)
|
|
44
|
+
return fn(**{k: v for k, v in clean.items() if k in sig.parameters})
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def synthesize(args: argparse.Namespace) -> None:
|
|
48
|
+
try:
|
|
49
|
+
import soundfile as sf
|
|
50
|
+
import torch
|
|
51
|
+
from omnivoice import OmniVoice
|
|
52
|
+
except Exception as exc: # pragma: no cover - exercised in real install
|
|
53
|
+
raise RuntimeError(
|
|
54
|
+
"OmniVoice dependencies are missing. Install them in OMNIVOICE_PYTHON's environment: "
|
|
55
|
+
"pip install torch torchaudio soundfile omnivoice"
|
|
56
|
+
) from exc
|
|
57
|
+
|
|
58
|
+
dtype = _torch_dtype(args.dtype)
|
|
59
|
+
load_kwargs = {"device_map": args.device}
|
|
60
|
+
if dtype is not None:
|
|
61
|
+
load_kwargs["dtype"] = dtype
|
|
62
|
+
|
|
63
|
+
model = OmniVoice.from_pretrained(args.model, **load_kwargs)
|
|
64
|
+
if hasattr(torch, "set_grad_enabled"):
|
|
65
|
+
torch.set_grad_enabled(False)
|
|
66
|
+
|
|
67
|
+
audio = _filtered_call(
|
|
68
|
+
model.generate,
|
|
69
|
+
text=args.text,
|
|
70
|
+
ref_audio=args.ref_audio,
|
|
71
|
+
ref_text=args.ref_text,
|
|
72
|
+
language=args.language,
|
|
73
|
+
speaker=args.speaker,
|
|
74
|
+
)
|
|
75
|
+
if isinstance(audio, tuple):
|
|
76
|
+
audio = audio[0]
|
|
77
|
+
if isinstance(audio, list):
|
|
78
|
+
if not audio:
|
|
79
|
+
raise RuntimeError("OmniVoice returned no audio")
|
|
80
|
+
audio = audio[0]
|
|
81
|
+
|
|
82
|
+
out = Path(args.output)
|
|
83
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
84
|
+
sf.write(str(out), audio, 24000)
|
|
85
|
+
if not out.exists() or out.stat().st_size <= 0:
|
|
86
|
+
raise RuntimeError(f"OmniVoice wrote empty output: {out}")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def main(argv: list[str] | None = None) -> int:
|
|
90
|
+
parser = argparse.ArgumentParser(description="VerbalCoding OmniVoice TTS wrapper")
|
|
91
|
+
parser.add_argument("--text", required=True)
|
|
92
|
+
parser.add_argument("--output", required=True)
|
|
93
|
+
parser.add_argument("--model", default="k2-fsa/OmniVoice")
|
|
94
|
+
parser.add_argument("--device", default="mps")
|
|
95
|
+
parser.add_argument("--dtype", default="float16")
|
|
96
|
+
parser.add_argument("--ref-audio", default="")
|
|
97
|
+
parser.add_argument("--ref-text", default="")
|
|
98
|
+
parser.add_argument("--language", default="ko")
|
|
99
|
+
parser.add_argument("--speaker", default="")
|
|
100
|
+
args = parser.parse_args(argv)
|
|
101
|
+
synthesize(args)
|
|
102
|
+
return 0
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
if __name__ == "__main__": # pragma: no cover
|
|
106
|
+
try:
|
|
107
|
+
raise SystemExit(main())
|
|
108
|
+
except Exception as exc:
|
|
109
|
+
print(f"OmniVoice synthesis failed: {exc}", file=sys.stderr)
|
|
110
|
+
raise SystemExit(1)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "verbalcoding",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.13",
|
|
4
4
|
"description": "Discord voice bridge for CLI coding agents.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -35,12 +35,18 @@
|
|
|
35
35
|
"scripts/*.mjs",
|
|
36
36
|
"scripts/*.sh",
|
|
37
37
|
"integrations/openvoice/*.py",
|
|
38
|
+
"integrations/omnivoice/*.py",
|
|
39
|
+
"integrations/fireredtts2/*.py",
|
|
40
|
+
"integrations/mlxaudio/*.py",
|
|
41
|
+
"integrations/neuttsair/*.py",
|
|
38
42
|
"run.sh",
|
|
39
43
|
".env.example",
|
|
40
44
|
"README.md",
|
|
45
|
+
"README.*.md",
|
|
41
46
|
"LICENSE"
|
|
42
47
|
],
|
|
43
48
|
"scripts": {
|
|
49
|
+
"postinstall": "node scripts/postinstall.mjs",
|
|
44
50
|
"start": "node app-node/main.mjs",
|
|
45
51
|
"setup": "node scripts/install.mjs",
|
|
46
52
|
"doctor": "node scripts/doctor.mjs",
|
package/scripts/cli.mjs
CHANGED
|
@@ -10,7 +10,17 @@ import {
|
|
|
10
10
|
normalizeInstanceAnswers,
|
|
11
11
|
parseKeyValueEnv,
|
|
12
12
|
renderInstanceSetupSummary,
|
|
13
|
+
SUPPORTED_TTS_BACKENDS,
|
|
13
14
|
} from '../app-node/install_config.mjs';
|
|
15
|
+
import {
|
|
16
|
+
applyTtsVoiceSelectionToEnv,
|
|
17
|
+
defaultTtsVoiceConfig,
|
|
18
|
+
effectiveTtsVoiceSelection,
|
|
19
|
+
readTtsVoiceConfig,
|
|
20
|
+
updateTtsVoiceConfig,
|
|
21
|
+
writeTtsVoiceConfig,
|
|
22
|
+
} from '../app-node/tts_voice_config.mjs';
|
|
23
|
+
import { normalizeTtsBackendName } from '../app-node/tts_settings.mjs';
|
|
14
24
|
import { ensureHermesProfile, validateProfileName } from '../app-node/hermes_profiles.mjs';
|
|
15
25
|
import { checkInstanceConfigs } from '../app-node/instance_doctor.mjs';
|
|
16
26
|
import { healInstanceProfileFromEnv } from '../app-node/instance_profile_lifecycle.mjs';
|
|
@@ -38,6 +48,8 @@ Usage:
|
|
|
38
48
|
vc language <ko|en|auto>
|
|
39
49
|
vc language status
|
|
40
50
|
vc restart auto <on|off|status>
|
|
51
|
+
vc tts backend <${SUPPORTED_TTS_BACKENDS.join('|')}>
|
|
52
|
+
vc tts status
|
|
41
53
|
vc bot invite <client-id> [--guild <guild-id>]
|
|
42
54
|
vc instance list
|
|
43
55
|
vc instance setup [name] [--start]
|
|
@@ -48,14 +60,16 @@ Usage:
|
|
|
48
60
|
vc doctor
|
|
49
61
|
|
|
50
62
|
Examples:
|
|
51
|
-
npx verbalcoding setup
|
|
52
|
-
vc setup
|
|
53
|
-
vc setup
|
|
63
|
+
npx verbalcoding setup
|
|
64
|
+
vc setup
|
|
65
|
+
vc setup --yes # automation/non-interactive starter config
|
|
66
|
+
vc setup token # later token update
|
|
54
67
|
vc setup channels "General,Team Voice"
|
|
55
68
|
vc start
|
|
56
69
|
vc language en
|
|
57
70
|
vc language ko
|
|
58
71
|
vc language auto
|
|
72
|
+
vc tts backend qwen3
|
|
59
73
|
vc restart auto off
|
|
60
74
|
vc bot invite 123456789012345678
|
|
61
75
|
`;
|
|
@@ -96,6 +110,56 @@ function printLanguageStatus(values) {
|
|
|
96
110
|
console.log(`TTS voice: ${s.ttsVoice}`);
|
|
97
111
|
}
|
|
98
112
|
|
|
113
|
+
function ttsVoiceConfigPath(values = readEnvFile()) {
|
|
114
|
+
const configured = values.TTS_VOICE_CONFIG || process.env.TTS_VOICE_CONFIG || path.join('config', 'tts-voices.json');
|
|
115
|
+
return path.isAbsolute(configured) ? configured : path.join(ROOT, configured);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function compactUpdates(updates) {
|
|
119
|
+
return Object.fromEntries(Object.entries(updates).filter(([, value]) => value != null && value !== ''));
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function printTtsStatus() {
|
|
123
|
+
const env = readEnvFile();
|
|
124
|
+
const config = readTtsVoiceConfig(ttsVoiceConfigPath(env), defaultTtsVoiceConfig());
|
|
125
|
+
const selected = effectiveTtsVoiceSelection(config, env);
|
|
126
|
+
console.log(`TTS backend: ${selected.backend}`);
|
|
127
|
+
console.log(`TTS voice type: ${selected.voiceType}`);
|
|
128
|
+
console.log(`TTS voice: ${selected.voice?.label || selected.voice?.voice || '-'}`);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function setTtsBackendFromCli(rawBackend, rawVoiceType = '') {
|
|
132
|
+
const backend = normalizeTtsBackendName(rawBackend, '');
|
|
133
|
+
if (!backend) {
|
|
134
|
+
throw new Error(`Unknown TTS backend: ${rawBackend}. Supported: ${SUPPORTED_TTS_BACKENDS.join(', ')}`);
|
|
135
|
+
}
|
|
136
|
+
const env = readEnvFile();
|
|
137
|
+
const configPath = ttsVoiceConfigPath(env);
|
|
138
|
+
const baseConfig = readTtsVoiceConfig(configPath, defaultTtsVoiceConfig());
|
|
139
|
+
const nextConfig = updateTtsVoiceConfig(baseConfig, { backend, voiceType: rawVoiceType });
|
|
140
|
+
writeTtsVoiceConfig(configPath, nextConfig);
|
|
141
|
+
const selected = effectiveTtsVoiceSelection(nextConfig, { ...env, TTS_BACKEND: backend, TTS_VOICE_TYPE: rawVoiceType || env.TTS_VOICE_TYPE });
|
|
142
|
+
const nextEnv = applyTtsVoiceSelectionToEnv(env, selected);
|
|
143
|
+
upsertEnvFile(ENV_PATH, compactUpdates({
|
|
144
|
+
TTS_BACKEND: nextEnv.TTS_BACKEND,
|
|
145
|
+
TTS_VOICE_TYPE: nextEnv.TTS_VOICE_TYPE,
|
|
146
|
+
TTS_VOICE: nextEnv.TTS_VOICE || env.TTS_VOICE,
|
|
147
|
+
VOICE_LANGUAGE: nextEnv.VOICE_LANGUAGE || env.VOICE_LANGUAGE,
|
|
148
|
+
QWEN3TTS_MODE: nextEnv.QWEN3TTS_MODE,
|
|
149
|
+
QWEN3TTS_SPEAKER: nextEnv.QWEN3TTS_SPEAKER,
|
|
150
|
+
QWEN3TTS_REF_AUDIO: nextEnv.QWEN3TTS_REF_AUDIO,
|
|
151
|
+
QWEN3TTS_INSTRUCT: nextEnv.QWEN3TTS_INSTRUCT,
|
|
152
|
+
MLXAUDIO_VOICE: nextEnv.MLXAUDIO_VOICE,
|
|
153
|
+
FIREREDTTS2_PROMPT_AUDIO: nextEnv.FIREREDTTS2_PROMPT_AUDIO,
|
|
154
|
+
MOSSTTSNANO_MODE: nextEnv.MOSSTTSNANO_MODE,
|
|
155
|
+
MOSSTTSNANO_PROMPT_AUDIO: nextEnv.MOSSTTSNANO_PROMPT_AUDIO,
|
|
156
|
+
}));
|
|
157
|
+
console.log(`Updated ${ENV_PATH}`);
|
|
158
|
+
console.log(`TTS backend: ${selected.backend}`);
|
|
159
|
+
console.log(`TTS voice type: ${selected.voiceType}`);
|
|
160
|
+
console.log('Restart the bridge for CLI changes to take effect; voice requests switch the running bridge immediately.');
|
|
161
|
+
}
|
|
162
|
+
|
|
99
163
|
function printInstanceStatus(statuses) {
|
|
100
164
|
if (statuses.length === 0) {
|
|
101
165
|
console.log('No instance env files found in instances/*.env');
|
|
@@ -307,9 +371,30 @@ async function main(argv = process.argv.slice(2)) {
|
|
|
307
371
|
}
|
|
308
372
|
if (command === 'status') {
|
|
309
373
|
printLanguageStatus(readEnvFile());
|
|
374
|
+
printTtsStatus();
|
|
310
375
|
console.log(autoRestartStatusText(readEnvFile()));
|
|
311
376
|
return;
|
|
312
377
|
}
|
|
378
|
+
if (command === 'tts' || command === 'voice') {
|
|
379
|
+
if (!subcommand || subcommand === 'status') {
|
|
380
|
+
printTtsStatus();
|
|
381
|
+
return;
|
|
382
|
+
}
|
|
383
|
+
if (subcommand === 'backend' || subcommand === 'switch' || subcommand === 'set') {
|
|
384
|
+
const backend = argv[2];
|
|
385
|
+
const voiceType = argv.includes('--voice-type') ? argv[argv.indexOf('--voice-type') + 1] : '';
|
|
386
|
+
if (!backend || backend.startsWith('--')) {
|
|
387
|
+
console.error(`Use: vc tts backend <${SUPPORTED_TTS_BACKENDS.join('|')}> [--voice-type <name>]`);
|
|
388
|
+
process.exitCode = 2;
|
|
389
|
+
return;
|
|
390
|
+
}
|
|
391
|
+
setTtsBackendFromCli(backend, voiceType);
|
|
392
|
+
return;
|
|
393
|
+
}
|
|
394
|
+
console.error('Use: vc tts status OR vc tts backend <name>');
|
|
395
|
+
process.exitCode = 2;
|
|
396
|
+
return;
|
|
397
|
+
}
|
|
313
398
|
if (command === 'instance') {
|
|
314
399
|
await handleInstanceCommand(argv);
|
|
315
400
|
return;
|
package/scripts/doctor.mjs
CHANGED
|
@@ -5,6 +5,7 @@ import { spawnSync } from 'node:child_process';
|
|
|
5
5
|
import { parseKeyValueEnv } from '../app-node/install_config.mjs';
|
|
6
6
|
import { checkInstanceConfigs, formatInstanceDoctor } from '../app-node/instance_doctor.mjs';
|
|
7
7
|
import { autoRestartVoiceBotEnabled } from '../app-node/restart_policy.mjs';
|
|
8
|
+
import { detectInstalledAgents, formatAgentDetectionReport } from '../app-node/agent_detect.mjs';
|
|
8
9
|
|
|
9
10
|
const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..');
|
|
10
11
|
const args = process.argv.slice(2);
|
|
@@ -19,12 +20,25 @@ function readEnvFile(file) {
|
|
|
19
20
|
}
|
|
20
21
|
}
|
|
21
22
|
|
|
23
|
+
function dropUnexpandedRefs(env) {
|
|
24
|
+
// parseKeyValueEnv does no shell expansion, so values like
|
|
25
|
+
// PATH="$JAVA_HOME/bin:$PATH"
|
|
26
|
+
// would otherwise clobber process.env.PATH with a literal "$VAR" string,
|
|
27
|
+
// breaking every JS-level PATH lookup (e.g. agent_detect.defaultWhich).
|
|
28
|
+
const out = {};
|
|
29
|
+
for (const [key, value] of Object.entries(env)) {
|
|
30
|
+
if (typeof value === 'string' && /\$[A-Za-z_][A-Za-z0-9_]*|\$\{[^}]+\}/.test(value)) continue;
|
|
31
|
+
out[key] = value;
|
|
32
|
+
}
|
|
33
|
+
return out;
|
|
34
|
+
}
|
|
35
|
+
|
|
22
36
|
function mergeEnv() {
|
|
23
37
|
// Project .env intentionally wins over ~/.zshrc so local setup is reproducible.
|
|
24
38
|
return {
|
|
25
39
|
...process.env,
|
|
26
|
-
...readEnvFile(path.join(process.env.HOME || '', '.zshrc')),
|
|
27
|
-
...readEnvFile(path.join(ROOT, '.env')),
|
|
40
|
+
...dropUnexpandedRefs(readEnvFile(path.join(process.env.HOME || '', '.zshrc'))),
|
|
41
|
+
...dropUnexpandedRefs(readEnvFile(path.join(ROOT, '.env'))),
|
|
28
42
|
};
|
|
29
43
|
}
|
|
30
44
|
|
|
@@ -104,10 +118,43 @@ function fixablePrerequisites(env) {
|
|
|
104
118
|
const edgeCommand = env.EDGE_TTS_COMMAND || env.TTS_EDGE_COMMAND || 'edge-tts';
|
|
105
119
|
if (!resolveCommand(edgeCommand, [path.join(ROOT, '.venv-tts', 'bin', 'edge-tts')])) missing.push('edge-tts');
|
|
106
120
|
}
|
|
121
|
+
if (ttsBackend === 'fireredtts2') {
|
|
122
|
+
const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
|
|
123
|
+
const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
|
|
124
|
+
const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
|
|
125
|
+
if (!isExecutable(firePath) || !fs.existsSync(fireModel)) missing.push('FireRedTTS-2');
|
|
126
|
+
}
|
|
107
127
|
if (backend === 'hermes' && !commandExists('hermes')) missing.push('hermes CLI');
|
|
108
128
|
return missing;
|
|
109
129
|
}
|
|
110
130
|
|
|
131
|
+
function installFireRedTts2IfNeeded(env) {
|
|
132
|
+
const ttsBackend = (env.TTS_BACKEND || 'edge').toLowerCase();
|
|
133
|
+
if (ttsBackend !== 'fireredtts2') return false;
|
|
134
|
+
const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
|
|
135
|
+
const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
|
|
136
|
+
const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
|
|
137
|
+
if (isExecutable(firePath) && fs.existsSync(fireModel)) return false;
|
|
138
|
+
if (['0', 'false', 'no', 'off'].includes(String(process.env.VERBALCODING_DOCTOR_INSTALL_FIREREDTTS2 || '1').toLowerCase())) {
|
|
139
|
+
console.log('Skipping FireRedTTS-2 auto-install because VERBALCODING_DOCTOR_INSTALL_FIREREDTTS2 is off.');
|
|
140
|
+
return false;
|
|
141
|
+
}
|
|
142
|
+
console.log('VerbalCoding doctor: TTS_BACKEND=fireredtts2 but FireRedTTS-2 is missing; installing...');
|
|
143
|
+
const result = spawnSync('bash', [path.join(ROOT, 'scripts', 'install_fireredtts2.sh'), '--yes'], {
|
|
144
|
+
cwd: ROOT,
|
|
145
|
+
stdio: 'inherit',
|
|
146
|
+
env: process.env,
|
|
147
|
+
});
|
|
148
|
+
if (result.status !== 0) {
|
|
149
|
+
console.log(`FireRedTTS-2 installer exited with status ${result.status}. Continuing with checks.`);
|
|
150
|
+
}
|
|
151
|
+
upsertEnvFile(path.join(ROOT, '.env'), {
|
|
152
|
+
FIREREDTTS2_COMMAND: './.local/bin/fireredtts2',
|
|
153
|
+
FIREREDTTS2_PRETRAINED_DIR: 'pretrained_models/FireRedTTS2',
|
|
154
|
+
});
|
|
155
|
+
return true;
|
|
156
|
+
}
|
|
157
|
+
|
|
111
158
|
function installHermesCliIfNeeded(env) {
|
|
112
159
|
const backend = (env.AGENT_BACKEND || 'hermes').toLowerCase();
|
|
113
160
|
if (backend !== 'hermes' || commandExists('hermes')) return false;
|
|
@@ -191,6 +238,11 @@ if (autoFixEnabled && missingBeforeFix.length > 0) {
|
|
|
191
238
|
env = mergeEnv();
|
|
192
239
|
}
|
|
193
240
|
if (autoFixEnabled) {
|
|
241
|
+
const fireAttempted = installFireRedTts2IfNeeded(env);
|
|
242
|
+
if (fireAttempted) {
|
|
243
|
+
console.log('');
|
|
244
|
+
env = mergeEnv();
|
|
245
|
+
}
|
|
194
246
|
const hermesAttempted = installHermesCliIfNeeded(env);
|
|
195
247
|
if (hermesAttempted) {
|
|
196
248
|
console.log('');
|
|
@@ -210,6 +262,16 @@ if (!autoFixEnabled) note('Automatic prerequisite bootstrap', 'off');
|
|
|
210
262
|
if (autoFixAttempted) note('Automatic prerequisite bootstrap', 'attempted');
|
|
211
263
|
console.log('');
|
|
212
264
|
|
|
265
|
+
try {
|
|
266
|
+
const detection = await detectInstalledAgents(env);
|
|
267
|
+
console.log(formatAgentDetectionReport(detection));
|
|
268
|
+
const selected = detection.find(r => r.backend === backend || r.backend === backend.replace(/-/g, ''));
|
|
269
|
+
if (selected && !selected.present) note(`Selected backend "${backend}"`, `binary ${selected.bin} not on PATH`);
|
|
270
|
+
console.log('');
|
|
271
|
+
} catch (e) {
|
|
272
|
+
note('Agent backend detection', `skipped: ${e?.message || e}`);
|
|
273
|
+
}
|
|
274
|
+
|
|
213
275
|
const nodeCommand = commandExists('node');
|
|
214
276
|
const npmCommand = commandExists('npm');
|
|
215
277
|
const ffmpegCommand = commandExists('ffmpeg');
|
|
@@ -235,8 +297,8 @@ note('Progress/voice language', env.VOICE_LANGUAGE || env.WHISPER_CPP_LANGUAGE |
|
|
|
235
297
|
note('Latency log path', env.LATENCY_LOG_PATH || './.logs/latency.jsonl');
|
|
236
298
|
note('TTS voice fallback', env.TTS_VOICE || 'ko-KR-SunHiNeural');
|
|
237
299
|
|
|
238
|
-
if (!['edge', 'openvoice', 'speechswift', 'supertonic'].includes(ttsBackend)) {
|
|
239
|
-
ok = check('TTS_BACKEND value', false, 'must be edge, openvoice, speechswift, or
|
|
300
|
+
if (!['edge', 'openvoice', 'speechswift', 'supertonic', 'omnivoice', 'qwen3tts', 'mlxaudio', 'fireredtts2', 'mossttsnano', 'neuttsair'].includes(ttsBackend)) {
|
|
301
|
+
ok = check('TTS_BACKEND value', false, 'must be edge, openvoice, speechswift, supertonic, omnivoice, qwen3tts, mlxaudio, fireredtts2, mossttsnano, or neuttsair') && ok;
|
|
240
302
|
}
|
|
241
303
|
if (ttsBackend === 'edge') {
|
|
242
304
|
const edgeCommand = env.EDGE_TTS_COMMAND || env.TTS_EDGE_COMMAND || 'edge-tts';
|
|
@@ -261,6 +323,55 @@ if (ttsBackend === 'edge') {
|
|
|
261
323
|
ok = check('supertonic CLI', commandExists(supertonicCommand), commandExists(supertonicCommand) || 'install with: python3 -m pip install supertonic') && ok;
|
|
262
324
|
note('Supertonic voice/lang/steps', `${env.SUPERTONIC_VOICE || 'M1'} / ${env.SUPERTONIC_LANGUAGE || 'ko'} / ${env.SUPERTONIC_STEPS || '2'}`);
|
|
263
325
|
note('Supertonic progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.SUPERTONIC_PROGRESS || '0').toLowerCase()) ? 'supertonic' : 'edge fallback');
|
|
326
|
+
} else if (ttsBackend === 'omnivoice') {
|
|
327
|
+
const omniPython = env.OMNIVOICE_PYTHON || path.join(ROOT, '.venv-omnivoice', 'bin', 'python');
|
|
328
|
+
const resolvedOmniPython = path.isAbsolute(omniPython) ? omniPython : path.resolve(ROOT, omniPython);
|
|
329
|
+
const refAudio = path.resolve(ROOT, env.OMNIVOICE_REF_AUDIO || env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
|
|
330
|
+
ok = check('OmniVoice Python', fs.existsSync(resolvedOmniPython) || commandExists(omniPython), fs.existsSync(resolvedOmniPython) ? path.relative(ROOT, resolvedOmniPython) : 'install with: python -m venv .venv-omnivoice && .venv-omnivoice/bin/pip install torch torchaudio soundfile omnivoice') && ok;
|
|
331
|
+
ok = check('OmniVoice reference audio', fs.existsSync(refAudio), path.relative(ROOT, refAudio)) && ok;
|
|
332
|
+
ok = check('OmniVoice synth wrapper help', spawnSync(fs.existsSync(resolvedOmniPython) ? resolvedOmniPython : 'python3', ['integrations/omnivoice/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/omnivoice/synth.py') && ok;
|
|
333
|
+
note('OmniVoice model/device', `${env.OMNIVOICE_MODEL || 'k2-fsa/OmniVoice'} / ${env.OMNIVOICE_DEVICE || 'mps'}`);
|
|
334
|
+
note('OmniVoice progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.OMNIVOICE_PROGRESS || '0').toLowerCase()) ? 'omnivoice' : 'edge fallback');
|
|
335
|
+
} else if (ttsBackend === 'qwen3tts') {
|
|
336
|
+
const qwenCommand = env.QWEN3TTS_COMMAND || 'audio';
|
|
337
|
+
ok = check('Qwen3 TTS audio CLI', commandExists(qwenCommand), commandExists(qwenCommand) || 'install speech-swift/audio first') && ok;
|
|
338
|
+
note('Qwen3 speaker', env.QWEN3TTS_SPEAKER || 'sohee');
|
|
339
|
+
note('Qwen3 progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.QWEN3TTS_PROGRESS || '0').toLowerCase()) ? 'qwen3tts' : 'edge fallback');
|
|
340
|
+
} else if (ttsBackend === 'mlxaudio') {
|
|
341
|
+
const mlxPython = env.MLXAUDIO_PYTHON || './.venv-mlxaudio/bin/python';
|
|
342
|
+
const mlxPath = path.isAbsolute(mlxPython) ? mlxPython : path.resolve(ROOT, mlxPython);
|
|
343
|
+
ok = check('MLX Audio Python', isExecutable(mlxPath) || commandExists(mlxPython), isExecutable(mlxPath) ? path.relative(ROOT, mlxPath) : (commandExists(mlxPython) || 'install with: scripts/install_mlxaudio.sh --yes')) && ok;
|
|
344
|
+
ok = check('MLX Audio wrapper help', spawnSync(isExecutable(mlxPath) ? mlxPath : 'python3', ['integrations/mlxaudio/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/mlxaudio/synth.py') && ok;
|
|
345
|
+
note('MLX Audio model/voice', `${env.MLXAUDIO_MODEL || 'mlx-community/Qwen3-TTS-12Hz-1.7B-Base-8bit'} / ${env.MLXAUDIO_VOICE || 'Chelsie'}`);
|
|
346
|
+
note('MLX Audio progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.MLXAUDIO_PROGRESS || '0').toLowerCase()) ? 'mlxaudio' : 'edge fallback');
|
|
347
|
+
} else if (ttsBackend === 'fireredtts2') {
|
|
348
|
+
const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
|
|
349
|
+
const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
|
|
350
|
+
const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
|
|
351
|
+
ok = check('FireRedTTS-2 wrapper', isExecutable(firePath), path.relative(ROOT, firePath) || firePath) && ok;
|
|
352
|
+
ok = check('FireRedTTS-2 model', fs.existsSync(fireModel), path.relative(ROOT, fireModel)) && ok;
|
|
353
|
+
ok = check('FireRedTTS-2 synth wrapper help', spawnSync(isExecutable(firePath) ? firePath : process.execPath, isExecutable(firePath) ? ['--help'] : ['integrations/fireredtts2/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/fireredtts2/synth.py') && ok;
|
|
354
|
+
note('FireRedTTS-2 progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.FIREREDTTS2_PROGRESS || '0').toLowerCase()) ? 'fireredtts2' : 'edge fallback');
|
|
355
|
+
} else if (ttsBackend === 'mossttsnano') {
|
|
356
|
+
const mossCommand = env.MOSSTTSNANO_COMMAND || './.venv-mossttsnano/bin/python';
|
|
357
|
+
const mossPath = path.isAbsolute(mossCommand) ? mossCommand : path.resolve(ROOT, mossCommand);
|
|
358
|
+
const mossScript = path.resolve(ROOT, env.MOSSTTSNANO_SCRIPT || 'vendor/MOSS-TTS-Nano/infer.py');
|
|
359
|
+
ok = check('MOSS-TTS-Nano Python', isExecutable(mossPath) || commandExists(mossCommand), isExecutable(mossPath) ? path.relative(ROOT, mossPath) : (commandExists(mossCommand) || 'missing')) && ok;
|
|
360
|
+
ok = check('MOSS-TTS-Nano infer.py', fs.existsSync(mossScript), path.relative(ROOT, mossScript)) && ok;
|
|
361
|
+
note('MOSS checkpoint', env.MOSSTTSNANO_CHECKPOINT || 'OpenMOSS-Team/MOSS-TTS-Nano');
|
|
362
|
+
note('MOSS progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.MOSSTTSNANO_PROGRESS || '0').toLowerCase()) ? 'mossttsnano' : 'edge fallback');
|
|
363
|
+
} else if (ttsBackend === 'neuttsair') {
|
|
364
|
+
const neuPython = env.NEUTTSAIR_PYTHON || './.venv-neuttsair/bin/python';
|
|
365
|
+
const neuPath = path.isAbsolute(neuPython) ? neuPython : path.resolve(ROOT, neuPython);
|
|
366
|
+
const neuScript = path.resolve(ROOT, env.NEUTTSAIR_SCRIPT || 'integrations/neuttsair/synth.py');
|
|
367
|
+
const refAudio = path.resolve(ROOT, env.NEUTTSAIR_REF_AUDIO || env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
|
|
368
|
+
ok = check('NeuTTS Air Python', isExecutable(neuPath) || commandExists(neuPython), isExecutable(neuPath) ? path.relative(ROOT, neuPath) : (commandExists(neuPython) || 'install with: python3 -m venv .venv-neuttsair && .venv-neuttsair/bin/pip install -e vendor/neutts-air')) && ok;
|
|
369
|
+
ok = check('NeuTTS Air wrapper', fs.existsSync(neuScript), path.relative(ROOT, neuScript)) && ok;
|
|
370
|
+
ok = check('NeuTTS Air reference audio', fs.existsSync(refAudio), path.relative(ROOT, refAudio)) && ok;
|
|
371
|
+
ok = check('NeuTTS Air synth wrapper help', spawnSync(isExecutable(neuPath) ? neuPath : 'python3', ['integrations/neuttsair/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/neuttsair/synth.py') && ok;
|
|
372
|
+
note('NeuTTS Air backbone/device', `${env.NEUTTSAIR_BACKBONE_REPO || env.NEUTTSAIR_BACKBONE || 'neuphonic/neutts-air-q4-gguf'} / ${env.NEUTTSAIR_BACKBONE_DEVICE || env.NEUTTSAIR_DEVICE || 'mps'}`);
|
|
373
|
+
note('NeuTTS Air codec/device', `${env.NEUTTSAIR_CODEC_REPO || env.NEUTTSAIR_CODEC || 'neuphonic/neucodec'} / ${env.NEUTTSAIR_CODEC_DEVICE || env.NEUTTSAIR_DEVICE || 'mps'}`);
|
|
374
|
+
note('NeuTTS Air progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.NEUTTSAIR_PROGRESS || '0').toLowerCase()) ? 'neuttsair' : 'edge fallback');
|
|
264
375
|
}
|
|
265
376
|
|
|
266
377
|
const backendCommand = {
|
package/scripts/install.mjs
CHANGED
|
@@ -4,6 +4,7 @@ import path from 'node:path';
|
|
|
4
4
|
import readline from 'node:readline/promises';
|
|
5
5
|
import { stdin as input, stdout as output } from 'node:process';
|
|
6
6
|
import { buildEnvFile, normalizeInstallAnswers, renderInstallSummary, SUPPORTED_HARNESSES } from '../app-node/install_config.mjs';
|
|
7
|
+
import { detectInstalledAgents, pickDefaultBackend, formatAgentDetectionReport } from '../app-node/agent_detect.mjs';
|
|
7
8
|
|
|
8
9
|
const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..');
|
|
9
10
|
|
|
@@ -138,7 +139,20 @@ async function main() {
|
|
|
138
139
|
try {
|
|
139
140
|
console.log('VerbalCoding installer');
|
|
140
141
|
console.log(`Supported harnesses: ${SUPPORTED_HARNESSES.join(', ')}`);
|
|
141
|
-
|
|
142
|
+
console.log('Discord setup: keep https://discord.com/developers/applications open.');
|
|
143
|
+
console.log('Create an application/bot, enable Message Content intent, then paste the bot token and application/client ID below.');
|
|
144
|
+
console.log('If you are not ready, press Enter to skip and run `vc setup token` / `vc setup channels` later.');
|
|
145
|
+
let detectionDefault = 'hermes';
|
|
146
|
+
try {
|
|
147
|
+
const detection = await detectInstalledAgents(process.env);
|
|
148
|
+
console.log('');
|
|
149
|
+
console.log(formatAgentDetectionReport(detection));
|
|
150
|
+
detectionDefault = pickDefaultBackend(detection, process.env.AGENT_BACKEND);
|
|
151
|
+
console.log('');
|
|
152
|
+
} catch (e) {
|
|
153
|
+
console.log(`(agent detection skipped: ${e?.message || e})`);
|
|
154
|
+
}
|
|
155
|
+
const harness = await ask('Harness/backend', detectionDefault);
|
|
142
156
|
let agentCommand = '';
|
|
143
157
|
let agentLabel = '';
|
|
144
158
|
if (harness.toLowerCase() === 'custom') {
|
|
@@ -152,7 +166,7 @@ async function main() {
|
|
|
152
166
|
const autoJoinVoiceChannels = await ask('Auto-join voice channel names', process.env.AUTO_JOIN_VOICE_CHANNELS || '일반,General,general');
|
|
153
167
|
const transcriptChannelId = await ask('Transcript text channel/thread ID', process.env.TRANSCRIPT_CHANNEL_ID || '');
|
|
154
168
|
const language = await ask('Default voice language: ko/en/auto', process.env.VOICE_LANGUAGE || process.env.WHISPER_CPP_LANGUAGE || process.env.STT_LANGUAGE || 'ko');
|
|
155
|
-
const ttsBackend = await ask('TTS backend: edge/openvoice/speechswift/supertonic', process.env.TTS_BACKEND || 'edge');
|
|
169
|
+
const ttsBackend = await ask('TTS backend: edge/openvoice/speechswift/supertonic/omnivoice/qwen3tts/mlxaudio/fireredtts2/mossttsnano/neuttsair', process.env.TTS_BACKEND || 'edge');
|
|
156
170
|
const edgeTtsCommand = await ask('Edge TTS command', process.env.EDGE_TTS_COMMAND || process.env.TTS_EDGE_COMMAND || 'edge-tts');
|
|
157
171
|
const ttsVoice = await ask('TTS voice', process.env.TTS_VOICE || 'ko-KR-SunHiNeural');
|
|
158
172
|
const ttsRate = await ask('TTS rate', process.env.TTS_RATE || '+10%');
|
|
@@ -165,6 +179,8 @@ async function main() {
|
|
|
165
179
|
const openvoiceDir = await ask('OpenVoice repo dir', process.env.OPENVOICE_DIR || './vendor/OpenVoice');
|
|
166
180
|
const openvoiceVenv = await ask('OpenVoice venv dir', process.env.OPENVOICE_VENV || './.venv-openvoice');
|
|
167
181
|
const openvoiceRefAudio = await ask('OpenVoice reference audio path', process.env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
|
|
182
|
+
const omnivoicePython = await ask('OmniVoice Python', process.env.OMNIVOICE_PYTHON || './.venv-omnivoice/bin/python');
|
|
183
|
+
const omnivoiceRefAudio = await ask('OmniVoice reference audio path', process.env.OMNIVOICE_REF_AUDIO || process.env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
|
|
168
184
|
const requireWake = (await ask('Require wake word? 1/0', process.env.REQUIRE_WAKE_WORD || '0')) === '1';
|
|
169
185
|
const verboseProgress = (await ask('Verbose progress by default? 1/0', process.env.AGENT_VERBOSE_PROGRESS || process.env.VERBALCODING_VERBOSE_PROGRESS || '0')) === '1';
|
|
170
186
|
const utteranceIdleMs = await ask('Utterance idle wait before STT, ms', process.env.UTTERANCE_IDLE_MS || '4500');
|
|
@@ -193,6 +209,8 @@ async function main() {
|
|
|
193
209
|
openvoiceDir,
|
|
194
210
|
openvoiceVenv,
|
|
195
211
|
openvoiceRefAudio,
|
|
212
|
+
omnivoicePython,
|
|
213
|
+
omnivoiceRefAudio,
|
|
196
214
|
requireWakeWord: requireWake,
|
|
197
215
|
verboseProgress,
|
|
198
216
|
utteranceIdleMs,
|