npm - verbalcoding - Versions diffs - 0.2.12 → 0.2.13 - Mend

verbalcoding 0.2.12 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

package/.env.example +74 -4
package/README.es.md +3 -1
package/README.fr.md +3 -1
package/README.ja.md +3 -1
package/README.ko.md +4 -2
package/README.md +4 -2
package/README.ru.md +3 -1
package/README.zh.md +3 -1
package/app-node/agent_adapters.test.mjs +14 -0
package/app-node/agent_routing.mjs +148 -0
package/app-node/agent_routing.test.mjs +138 -0
package/app-node/agent_turn.mjs +86 -0
package/app-node/agent_turn.test.mjs +109 -0
package/app-node/bridge_context.mjs +73 -0
package/app-node/bridge_context.test.mjs +54 -0
package/app-node/bridge_state.mjs +4 -0
package/app-node/bridge_wireup.test.mjs +462 -0
package/app-node/cli_install.test.mjs +31 -0
package/app-node/cross_agent_routing.test.mjs +78 -0
package/app-node/discord_command_router.mjs +204 -0
package/app-node/discord_command_router.test.mjs +311 -0
package/app-node/discord_voice_setup.mjs +251 -0
package/app-node/discord_voice_setup.test.mjs +86 -0
package/app-node/hermes_profiles.test.mjs +12 -1
package/app-node/install_config.mjs +110 -3
package/app-node/install_config.test.mjs +8 -0
package/app-node/instance_doctor.test.mjs +9 -0
package/app-node/instances.test.mjs +8 -1
package/app-node/main.mjs +488 -1368
package/app-node/mcp_tools.test.mjs +7 -0
package/app-node/notification_handler.mjs +89 -0
package/app-node/notification_handler.test.mjs +187 -0
package/app-node/plan_dispatcher.mjs +215 -0
package/app-node/plan_dispatcher.test.mjs +101 -0
package/app-node/plan_mode.mjs +36 -7
package/app-node/plan_mode.test.mjs +78 -0
package/app-node/progress_handler.mjs +220 -0
package/app-node/progress_handler.test.mjs +193 -0
package/app-node/progress_speech.mjs +54 -32
package/app-node/progress_speech.test.mjs +12 -3
package/app-node/project_sessions.mjs +5 -2
package/app-node/project_sessions.test.mjs +7 -0
package/app-node/research_mode.mjs +282 -0
package/app-node/research_mode.test.mjs +264 -0
package/app-node/restart_notice.mjs +3 -0
package/app-node/restart_notice.test.mjs +11 -0
package/app-node/session_ontology.mjs +271 -0
package/app-node/session_ontology.test.mjs +130 -0
package/app-node/smart_progress.mjs +1 -1
package/app-node/stream_sentencer.mjs +32 -2
package/app-node/stream_sentencer.test.mjs +65 -0
package/app-node/streaming_tts_queue.mjs +5 -1
package/app-node/streaming_tts_queue.test.mjs +7 -1
package/app-node/stt_whisper.mjs +24 -0
package/app-node/stt_whisper.test.mjs +32 -0
package/app-node/text_routing.mjs +4 -2
package/app-node/tts_backends.mjs +537 -3
package/app-node/tts_backends.test.mjs +454 -0
package/app-node/tts_player.mjs +164 -0
package/app-node/tts_player.test.mjs +202 -0
package/app-node/tts_runtime.mjs +134 -0
package/app-node/tts_runtime.test.mjs +89 -0
package/app-node/tts_settings.mjs +150 -3
package/app-node/tts_settings.test.mjs +204 -0
package/app-node/tts_voice_config.mjs +136 -2
package/app-node/tts_voice_config.test.mjs +94 -0
package/app-node/utterance_router.mjs +216 -0
package/app-node/utterance_router.test.mjs +236 -0
package/app-node/voice_autojoin.mjs +37 -0
package/app-node/voice_autojoin.test.mjs +59 -0
package/app-node/voice_io.mjs +272 -0
package/app-node/voice_io.test.mjs +102 -0
package/app-node/voice_turn_runner.mjs +449 -0
package/app-node/voice_turn_runner.test.mjs +289 -0
package/docs/CONFIGURATION.md +12 -2
package/docs/HARNESSES.md +58 -0
package/docs/HARNESS_AIDER.md +50 -0
package/docs/HARNESS_CLAUDE.md +56 -0
package/docs/HARNESS_CODEX.md +56 -0
package/docs/HARNESS_CURSOR.md +45 -0
package/docs/HARNESS_GEMINI.md +45 -0
package/docs/HARNESS_HERMES.md +57 -0
package/docs/HARNESS_OPENCLAW.md +44 -0
package/docs/HARNESS_OPENCODE.md +44 -0
package/docs/README.md +1 -0
package/docs/ROADMAP.md +20 -5
package/docs/TTS_BACKENDS.md +227 -0
package/docs/USAGE.md +22 -0
package/docs/i18n/AGENTS.es.md +34 -0
package/docs/i18n/AGENTS.fr.md +34 -0
package/docs/i18n/AGENTS.ja.md +34 -0
package/docs/i18n/AGENTS.ko.md +34 -0
package/docs/i18n/AGENTS.ru.md +34 -0
package/docs/i18n/AGENTS.zh.md +34 -0
package/docs/i18n/HARNESSES.es.md +58 -0
package/docs/i18n/HARNESSES.fr.md +58 -0
package/docs/i18n/HARNESSES.ja.md +58 -0
package/docs/i18n/HARNESSES.ko.md +58 -0
package/docs/i18n/HARNESSES.ru.md +58 -0
package/docs/i18n/HARNESSES.zh.md +58 -0
package/docs/i18n/HARNESS_AIDER.es.md +48 -0
package/docs/i18n/HARNESS_AIDER.fr.md +48 -0
package/docs/i18n/HARNESS_AIDER.ja.md +50 -0
package/docs/i18n/HARNESS_AIDER.ko.md +50 -0
package/docs/i18n/HARNESS_AIDER.ru.md +48 -0
package/docs/i18n/HARNESS_AIDER.zh.md +48 -0
package/docs/i18n/HARNESS_CLAUDE.es.md +55 -0
package/docs/i18n/HARNESS_CLAUDE.fr.md +55 -0
package/docs/i18n/HARNESS_CLAUDE.ja.md +56 -0
package/docs/i18n/HARNESS_CLAUDE.ko.md +56 -0
package/docs/i18n/HARNESS_CLAUDE.ru.md +55 -0
package/docs/i18n/HARNESS_CLAUDE.zh.md +56 -0
package/docs/i18n/HARNESS_CODEX.es.md +55 -0
package/docs/i18n/HARNESS_CODEX.fr.md +55 -0
package/docs/i18n/HARNESS_CODEX.ja.md +56 -0
package/docs/i18n/HARNESS_CODEX.ko.md +56 -0
package/docs/i18n/HARNESS_CODEX.ru.md +55 -0
package/docs/i18n/HARNESS_CODEX.zh.md +56 -0
package/docs/i18n/HARNESS_CURSOR.es.md +42 -0
package/docs/i18n/HARNESS_CURSOR.fr.md +42 -0
package/docs/i18n/HARNESS_CURSOR.ja.md +45 -0
package/docs/i18n/HARNESS_CURSOR.ko.md +45 -0
package/docs/i18n/HARNESS_CURSOR.ru.md +42 -0
package/docs/i18n/HARNESS_CURSOR.zh.md +42 -0
package/docs/i18n/HARNESS_GEMINI.es.md +44 -0
package/docs/i18n/HARNESS_GEMINI.fr.md +44 -0
package/docs/i18n/HARNESS_GEMINI.ja.md +45 -0
package/docs/i18n/HARNESS_GEMINI.ko.md +45 -0
package/docs/i18n/HARNESS_GEMINI.ru.md +44 -0
package/docs/i18n/HARNESS_GEMINI.zh.md +45 -0
package/docs/i18n/HARNESS_HERMES.es.md +54 -0
package/docs/i18n/HARNESS_HERMES.fr.md +54 -0
package/docs/i18n/HARNESS_HERMES.ja.md +57 -0
package/docs/i18n/HARNESS_HERMES.ko.md +57 -0
package/docs/i18n/HARNESS_HERMES.ru.md +54 -0
package/docs/i18n/HARNESS_HERMES.zh.md +57 -0
package/docs/i18n/HARNESS_OPENCLAW.es.md +41 -0
package/docs/i18n/HARNESS_OPENCLAW.fr.md +41 -0
package/docs/i18n/HARNESS_OPENCLAW.ja.md +44 -0
package/docs/i18n/HARNESS_OPENCLAW.ko.md +44 -0
package/docs/i18n/HARNESS_OPENCLAW.ru.md +41 -0
package/docs/i18n/HARNESS_OPENCLAW.zh.md +42 -0
package/docs/i18n/HARNESS_OPENCODE.es.md +41 -0
package/docs/i18n/HARNESS_OPENCODE.fr.md +41 -0
package/docs/i18n/HARNESS_OPENCODE.ja.md +44 -0
package/docs/i18n/HARNESS_OPENCODE.ko.md +44 -0
package/docs/i18n/HARNESS_OPENCODE.ru.md +41 -0
package/docs/i18n/HARNESS_OPENCODE.zh.md +44 -0
package/docs/superpowers/plans/2026-05-14-cross-agent-voice-transfer.md +625 -0
package/docs/superpowers/plans/2026-05-21-audio-overview-narrated-diffs.md +95 -0
package/docs/superpowers/plans/2026-05-21-autoresearch-ontology.md +83 -0
package/docs/superpowers/plans/2026-05-21-phase11-push-to-talk-wakeword-v2.md +77 -0
package/docs/superpowers/plans/2026-05-21-phase12-multi-user-voice.md +147 -0
package/docs/superpowers/plans/2026-05-21-phase14-verbalbench.md +136 -0
package/docs/superpowers/plans/2026-05-21-phase15-phone-companion.md +72 -0
package/integrations/fireredtts2/mlx_llm.py +183 -0
package/integrations/fireredtts2/synth.py +156 -0
package/integrations/fireredtts2/synth_mlx.py +196 -0
package/integrations/mlxaudio/synth.py +74 -0
package/integrations/neuttsair/synth.py +104 -0
package/integrations/omnivoice/synth.py +110 -0
package/package.json +6 -1
package/scripts/cli.mjs +84 -0
package/scripts/doctor.mjs +104 -4
package/scripts/install.mjs +5 -1
package/scripts/install_fireredtts2.sh +109 -0
package/scripts/install_mlxaudio.sh +34 -0
package/scripts/install_mossttsnano.sh +46 -0
package/scripts/postinstall.mjs +34 -0

package/integrations/neuttsair/synth.py ADDED Viewed

@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+import argparse
+import os
+import sys
+from pathlib import Path
+def repo_root() -> Path:
+    return Path(__file__).resolve().parents[2]
+def resolve(root: Path, value: str | None) -> str | None:
+    if not value:
+        return None
+    p = Path(value).expanduser()
+    if not p.is_absolute():
+        p = root / p
+    return str(p)
+def read_text_arg(value: str | None) -> str:
+    if not value:
+        return ""
+    p = Path(value).expanduser()
+    if p.exists():
+        return p.read_text(encoding="utf-8").strip()
+    return value.strip()
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="NeuTTS-Air synthesis wrapper for VerbalCoding")
+    parser.add_argument("--text", required=True)
+    parser.add_argument("--output", required=True)
+    parser.add_argument("--backbone", "--backbone-repo", dest="backbone", default="neuphonic/neutts-air-q4-gguf")
+    parser.add_argument("--codec", "--codec-repo", dest="codec", default="neuphonic/neucodec")
+    parser.add_argument("--backbone-device", default="cpu")
+    parser.add_argument("--codec-device", default="cpu")
+    parser.add_argument("--ref-audio", default="")
+    parser.add_argument("--ref-text", default="")
+    parser.add_argument("--ref-text-file", default="")
+    parser.add_argument("--language", default="en")
+    parser.add_argument("--sample-rate", type=int, default=24000)
+    parser.add_argument("--cache-ref", action="store_true")
+    return parser.parse_args()
+def main() -> int:
+    args = parse_args()
+    root = repo_root()
+    vendor = root / "vendor" / "neutts-air"
+    if vendor.exists():
+        sys.path.insert(0, str(vendor))
+    try:
+        import soundfile as sf
+        import torch
+        from neutts import NeuTTS
+    except Exception as exc:
+        print(f"NeuTTS-Air dependencies are missing: {exc}", file=sys.stderr, flush=True)
+        return 127
+    ref_audio = resolve(root, args.ref_audio)
+    ref_text = read_text_arg(args.ref_text_file) or read_text_arg(args.ref_text)
+    if not ref_audio or not Path(ref_audio).exists():
+        print(f"NeuTTS-Air reference audio not found: {ref_audio}", file=sys.stderr, flush=True)
+        return 66
+    if not ref_text:
+        # Fall back to a short generic transcript; users should configure NEUTTSAIR_REF_TEXT
+        # or NEUTTSAIR_REF_TEXT_FILE for best cloning quality.
+        ref_text = "This is a reference voice sample."
+    out = Path(args.output).expanduser()
+    out.parent.mkdir(parents=True, exist_ok=True)
+    cache_path = Path(ref_audio).with_suffix(".neutts.pt")
+    try:
+        print(f"[neutts-air] loading backbone={args.backbone} codec={args.codec}", file=sys.stderr, flush=True)
+        tts = NeuTTS(
+            backbone_repo=args.backbone,
+            backbone_device=args.backbone_device,
+            codec_repo=args.codec,
+            codec_device=args.codec_device,
+        )
+        if args.cache_ref and cache_path.exists():
+            print(f"[neutts-air] loading cached reference {cache_path}", file=sys.stderr, flush=True)
+            ref_codes = torch.load(cache_path, map_location="cpu", weights_only=False)
+        else:
+            print(f"[neutts-air] encoding reference {ref_audio}", file=sys.stderr, flush=True)
+            ref_codes = tts.encode_reference(ref_audio)
+            if args.cache_ref:
+                torch.save(ref_codes, cache_path)
+        print(f"[neutts-air] generating chars={len(args.text)}", file=sys.stderr, flush=True)
+        wav = tts.infer(args.text, ref_codes, ref_text)
+        sf.write(str(out), wav, args.sample_rate)
+        print(f"[neutts-air] wrote {out}", file=sys.stderr, flush=True)
+        return 0
+    except Exception as exc:
+        print(f"NeuTTS-Air synthesis failed: {exc}", file=sys.stderr, flush=True)
+        return 1
+if __name__ == "__main__":
+    raise SystemExit(main())

package/integrations/omnivoice/synth.py ADDED Viewed

@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+"""Synthesize speech with k2-fsa OmniVoice for VerbalCoding.
+The wrapper keeps the Node bridge independent from OmniVoice's Python runtime.
+It accepts one text chunk and writes a 24 kHz WAV file.
+"""
+from __future__ import annotations
+import argparse
+import inspect
+import sys
+from pathlib import Path
+from typing import Any
+def _torch_dtype(name: str):
+    import torch
+    normalized = (name or "").lower()
+    if normalized in {"auto", ""}:
+        return None
+    if normalized in {"float16", "fp16", "half"}:
+        return torch.float16
+    if normalized in {"bfloat16", "bf16"}:
+        return torch.bfloat16
+    if normalized in {"float32", "fp32"}:
+        return torch.float32
+    raise ValueError(f"Unsupported OmniVoice dtype: {name}")
+def _filtered_call(fn, **kwargs: Any):
+    """Call fn with only supported kwargs when the signature is inspectable."""
+    try:
+        sig = inspect.signature(fn)
+    except (TypeError, ValueError):
+        return fn(**{k: v for k, v in kwargs.items() if v not in (None, "")})
+    accepts_kwargs = any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values())
+    clean = {k: v for k, v in kwargs.items() if v not in (None, "")}
+    if accepts_kwargs:
+        return fn(**clean)
+    return fn(**{k: v for k, v in clean.items() if k in sig.parameters})
+def synthesize(args: argparse.Namespace) -> None:
+    try:
+        import soundfile as sf
+        import torch
+        from omnivoice import OmniVoice
+    except Exception as exc:  # pragma: no cover - exercised in real install
+        raise RuntimeError(
+            "OmniVoice dependencies are missing. Install them in OMNIVOICE_PYTHON's environment: "
+            "pip install torch torchaudio soundfile omnivoice"
+        ) from exc
+    dtype = _torch_dtype(args.dtype)
+    load_kwargs = {"device_map": args.device}
+    if dtype is not None:
+        load_kwargs["dtype"] = dtype
+    model = OmniVoice.from_pretrained(args.model, **load_kwargs)
+    if hasattr(torch, "set_grad_enabled"):
+        torch.set_grad_enabled(False)
+    audio = _filtered_call(
+        model.generate,
+        text=args.text,
+        ref_audio=args.ref_audio,
+        ref_text=args.ref_text,
+        language=args.language,
+        speaker=args.speaker,
+    )
+    if isinstance(audio, tuple):
+        audio = audio[0]
+    if isinstance(audio, list):
+        if not audio:
+            raise RuntimeError("OmniVoice returned no audio")
+        audio = audio[0]
+    out = Path(args.output)
+    out.parent.mkdir(parents=True, exist_ok=True)
+    sf.write(str(out), audio, 24000)
+    if not out.exists() or out.stat().st_size <= 0:
+        raise RuntimeError(f"OmniVoice wrote empty output: {out}")
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="VerbalCoding OmniVoice TTS wrapper")
+    parser.add_argument("--text", required=True)
+    parser.add_argument("--output", required=True)
+    parser.add_argument("--model", default="k2-fsa/OmniVoice")
+    parser.add_argument("--device", default="mps")
+    parser.add_argument("--dtype", default="float16")
+    parser.add_argument("--ref-audio", default="")
+    parser.add_argument("--ref-text", default="")
+    parser.add_argument("--language", default="ko")
+    parser.add_argument("--speaker", default="")
+    args = parser.parse_args(argv)
+    synthesize(args)
+    return 0
+if __name__ == "__main__":  # pragma: no cover
+    try:
+        raise SystemExit(main())
+    except Exception as exc:
+        print(f"OmniVoice synthesis failed: {exc}", file=sys.stderr)
+        raise SystemExit(1)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "verbalcoding",
-  "version": "0.2.12",
+  "version": "0.2.13",
   "description": "Discord voice bridge for CLI coding agents.",
   "license": "MIT",
   "repository": {
@@ -35,6 +35,10 @@
     "scripts/*.mjs",
     "scripts/*.sh",
     "integrations/openvoice/*.py",
+    "integrations/omnivoice/*.py",
+    "integrations/fireredtts2/*.py",
+    "integrations/mlxaudio/*.py",
+    "integrations/neuttsair/*.py",
     "run.sh",
     ".env.example",
     "README.md",
@@ -42,6 +46,7 @@
     "LICENSE"
   ],
   "scripts": {
+    "postinstall": "node scripts/postinstall.mjs",
     "start": "node app-node/main.mjs",
     "setup": "node scripts/install.mjs",
     "doctor": "node scripts/doctor.mjs",

package/scripts/cli.mjs CHANGED Viewed

@@ -10,7 +10,17 @@ import {
   normalizeInstanceAnswers,
   parseKeyValueEnv,
   renderInstanceSetupSummary,
+  SUPPORTED_TTS_BACKENDS,
 } from '../app-node/install_config.mjs';
+import {
+  applyTtsVoiceSelectionToEnv,
+  defaultTtsVoiceConfig,
+  effectiveTtsVoiceSelection,
+  readTtsVoiceConfig,
+  updateTtsVoiceConfig,
+  writeTtsVoiceConfig,
+} from '../app-node/tts_voice_config.mjs';
+import { normalizeTtsBackendName } from '../app-node/tts_settings.mjs';
 import { ensureHermesProfile, validateProfileName } from '../app-node/hermes_profiles.mjs';
 import { checkInstanceConfigs } from '../app-node/instance_doctor.mjs';
 import { healInstanceProfileFromEnv } from '../app-node/instance_profile_lifecycle.mjs';
@@ -38,6 +48,8 @@ Usage:
   vc language <ko|en|auto>
   vc language status
   vc restart auto <on|off|status>
+  vc tts backend <${SUPPORTED_TTS_BACKENDS.join('|')}>
+  vc tts status
   vc bot invite <client-id> [--guild <guild-id>]
   vc instance list
   vc instance setup [name] [--start]
@@ -57,6 +69,7 @@ Examples:
   vc language en
   vc language ko
   vc language auto
+  vc tts backend qwen3
   vc restart auto off
   vc bot invite 123456789012345678
 `;
@@ -97,6 +110,56 @@ function printLanguageStatus(values) {
   console.log(`TTS voice: ${s.ttsVoice}`);
 }
+function ttsVoiceConfigPath(values = readEnvFile()) {
+  const configured = values.TTS_VOICE_CONFIG || process.env.TTS_VOICE_CONFIG || path.join('config', 'tts-voices.json');
+  return path.isAbsolute(configured) ? configured : path.join(ROOT, configured);
+}
+function compactUpdates(updates) {
+  return Object.fromEntries(Object.entries(updates).filter(([, value]) => value != null && value !== ''));
+}
+function printTtsStatus() {
+  const env = readEnvFile();
+  const config = readTtsVoiceConfig(ttsVoiceConfigPath(env), defaultTtsVoiceConfig());
+  const selected = effectiveTtsVoiceSelection(config, env);
+  console.log(`TTS backend: ${selected.backend}`);
+  console.log(`TTS voice type: ${selected.voiceType}`);
+  console.log(`TTS voice: ${selected.voice?.label || selected.voice?.voice || '-'}`);
+}
+function setTtsBackendFromCli(rawBackend, rawVoiceType = '') {
+  const backend = normalizeTtsBackendName(rawBackend, '');
+  if (!backend) {
+    throw new Error(`Unknown TTS backend: ${rawBackend}. Supported: ${SUPPORTED_TTS_BACKENDS.join(', ')}`);
+  }
+  const env = readEnvFile();
+  const configPath = ttsVoiceConfigPath(env);
+  const baseConfig = readTtsVoiceConfig(configPath, defaultTtsVoiceConfig());
+  const nextConfig = updateTtsVoiceConfig(baseConfig, { backend, voiceType: rawVoiceType });
+  writeTtsVoiceConfig(configPath, nextConfig);
+  const selected = effectiveTtsVoiceSelection(nextConfig, { ...env, TTS_BACKEND: backend, TTS_VOICE_TYPE: rawVoiceType || env.TTS_VOICE_TYPE });
+  const nextEnv = applyTtsVoiceSelectionToEnv(env, selected);
+  upsertEnvFile(ENV_PATH, compactUpdates({
+    TTS_BACKEND: nextEnv.TTS_BACKEND,
+    TTS_VOICE_TYPE: nextEnv.TTS_VOICE_TYPE,
+    TTS_VOICE: nextEnv.TTS_VOICE || env.TTS_VOICE,
+    VOICE_LANGUAGE: nextEnv.VOICE_LANGUAGE || env.VOICE_LANGUAGE,
+    QWEN3TTS_MODE: nextEnv.QWEN3TTS_MODE,
+    QWEN3TTS_SPEAKER: nextEnv.QWEN3TTS_SPEAKER,
+    QWEN3TTS_REF_AUDIO: nextEnv.QWEN3TTS_REF_AUDIO,
+    QWEN3TTS_INSTRUCT: nextEnv.QWEN3TTS_INSTRUCT,
+    MLXAUDIO_VOICE: nextEnv.MLXAUDIO_VOICE,
+    FIREREDTTS2_PROMPT_AUDIO: nextEnv.FIREREDTTS2_PROMPT_AUDIO,
+    MOSSTTSNANO_MODE: nextEnv.MOSSTTSNANO_MODE,
+    MOSSTTSNANO_PROMPT_AUDIO: nextEnv.MOSSTTSNANO_PROMPT_AUDIO,
+  }));
+  console.log(`Updated ${ENV_PATH}`);
+  console.log(`TTS backend: ${selected.backend}`);
+  console.log(`TTS voice type: ${selected.voiceType}`);
+  console.log('Restart the bridge for CLI changes to take effect; voice requests switch the running bridge immediately.');
+}
 function printInstanceStatus(statuses) {
   if (statuses.length === 0) {
     console.log('No instance env files found in instances/*.env');
@@ -308,9 +371,30 @@ async function main(argv = process.argv.slice(2)) {
   }
   if (command === 'status') {
     printLanguageStatus(readEnvFile());
+    printTtsStatus();
     console.log(autoRestartStatusText(readEnvFile()));
     return;
   }
+  if (command === 'tts' || command === 'voice') {
+    if (!subcommand || subcommand === 'status') {
+      printTtsStatus();
+      return;
+    }
+    if (subcommand === 'backend' || subcommand === 'switch' || subcommand === 'set') {
+      const backend = argv[2];
+      const voiceType = argv.includes('--voice-type') ? argv[argv.indexOf('--voice-type') + 1] : '';
+      if (!backend || backend.startsWith('--')) {
+        console.error(`Use: vc tts backend <${SUPPORTED_TTS_BACKENDS.join('|')}> [--voice-type <name>]`);
+        process.exitCode = 2;
+        return;
+      }
+      setTtsBackendFromCli(backend, voiceType);
+      return;
+    }
+    console.error('Use: vc tts status OR vc tts backend <name>');
+    process.exitCode = 2;
+    return;
+  }
   if (command === 'instance') {
     await handleInstanceCommand(argv);
     return;

package/scripts/doctor.mjs CHANGED Viewed

@@ -20,12 +20,25 @@ function readEnvFile(file) {
   }
 }
+function dropUnexpandedRefs(env) {
+  // parseKeyValueEnv does no shell expansion, so values like
+  //   PATH="$JAVA_HOME/bin:$PATH"
+  // would otherwise clobber process.env.PATH with a literal "$VAR" string,
+  // breaking every JS-level PATH lookup (e.g. agent_detect.defaultWhich).
+  const out = {};
+  for (const [key, value] of Object.entries(env)) {
+    if (typeof value === 'string' && /\$[A-Za-z_][A-Za-z0-9_]*|\$\{[^}]+\}/.test(value)) continue;
+    out[key] = value;
+  }
+  return out;
+}
 function mergeEnv() {
   // Project .env intentionally wins over ~/.zshrc so local setup is reproducible.
   return {
     ...process.env,
-    ...readEnvFile(path.join(process.env.HOME || '', '.zshrc')),
-    ...readEnvFile(path.join(ROOT, '.env')),
+    ...dropUnexpandedRefs(readEnvFile(path.join(process.env.HOME || '', '.zshrc'))),
+    ...dropUnexpandedRefs(readEnvFile(path.join(ROOT, '.env'))),
   };
 }
@@ -105,10 +118,43 @@ function fixablePrerequisites(env) {
     const edgeCommand = env.EDGE_TTS_COMMAND || env.TTS_EDGE_COMMAND || 'edge-tts';
     if (!resolveCommand(edgeCommand, [path.join(ROOT, '.venv-tts', 'bin', 'edge-tts')])) missing.push('edge-tts');
   }
+  if (ttsBackend === 'fireredtts2') {
+    const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
+    const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
+    const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
+    if (!isExecutable(firePath) || !fs.existsSync(fireModel)) missing.push('FireRedTTS-2');
+  }
   if (backend === 'hermes' && !commandExists('hermes')) missing.push('hermes CLI');
   return missing;
 }
+function installFireRedTts2IfNeeded(env) {
+  const ttsBackend = (env.TTS_BACKEND || 'edge').toLowerCase();
+  if (ttsBackend !== 'fireredtts2') return false;
+  const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
+  const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
+  const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
+  if (isExecutable(firePath) && fs.existsSync(fireModel)) return false;
+  if (['0', 'false', 'no', 'off'].includes(String(process.env.VERBALCODING_DOCTOR_INSTALL_FIREREDTTS2 || '1').toLowerCase())) {
+    console.log('Skipping FireRedTTS-2 auto-install because VERBALCODING_DOCTOR_INSTALL_FIREREDTTS2 is off.');
+    return false;
+  }
+  console.log('VerbalCoding doctor: TTS_BACKEND=fireredtts2 but FireRedTTS-2 is missing; installing...');
+  const result = spawnSync('bash', [path.join(ROOT, 'scripts', 'install_fireredtts2.sh'), '--yes'], {
+    cwd: ROOT,
+    stdio: 'inherit',
+    env: process.env,
+  });
+  if (result.status !== 0) {
+    console.log(`FireRedTTS-2 installer exited with status ${result.status}. Continuing with checks.`);
+  }
+  upsertEnvFile(path.join(ROOT, '.env'), {
+    FIREREDTTS2_COMMAND: './.local/bin/fireredtts2',
+    FIREREDTTS2_PRETRAINED_DIR: 'pretrained_models/FireRedTTS2',
+  });
+  return true;
+}
 function installHermesCliIfNeeded(env) {
   const backend = (env.AGENT_BACKEND || 'hermes').toLowerCase();
   if (backend !== 'hermes' || commandExists('hermes')) return false;
@@ -192,6 +238,11 @@ if (autoFixEnabled && missingBeforeFix.length > 0) {
   env = mergeEnv();
 }
 if (autoFixEnabled) {
+  const fireAttempted = installFireRedTts2IfNeeded(env);
+  if (fireAttempted) {
+    console.log('');
+    env = mergeEnv();
+  }
   const hermesAttempted = installHermesCliIfNeeded(env);
   if (hermesAttempted) {
     console.log('');
@@ -246,8 +297,8 @@ note('Progress/voice language', env.VOICE_LANGUAGE || env.WHISPER_CPP_LANGUAGE |
 note('Latency log path', env.LATENCY_LOG_PATH || './.logs/latency.jsonl');
 note('TTS voice fallback', env.TTS_VOICE || 'ko-KR-SunHiNeural');
-if (!['edge', 'openvoice', 'speechswift', 'supertonic'].includes(ttsBackend)) {
-  ok = check('TTS_BACKEND value', false, 'must be edge, openvoice, speechswift, or supertonic') && ok;
+if (!['edge', 'openvoice', 'speechswift', 'supertonic', 'omnivoice', 'qwen3tts', 'mlxaudio', 'fireredtts2', 'mossttsnano', 'neuttsair'].includes(ttsBackend)) {
+  ok = check('TTS_BACKEND value', false, 'must be edge, openvoice, speechswift, supertonic, omnivoice, qwen3tts, mlxaudio, fireredtts2, mossttsnano, or neuttsair') && ok;
 }
 if (ttsBackend === 'edge') {
   const edgeCommand = env.EDGE_TTS_COMMAND || env.TTS_EDGE_COMMAND || 'edge-tts';
@@ -272,6 +323,55 @@ if (ttsBackend === 'edge') {
   ok = check('supertonic CLI', commandExists(supertonicCommand), commandExists(supertonicCommand) || 'install with: python3 -m pip install supertonic') && ok;
   note('Supertonic voice/lang/steps', `${env.SUPERTONIC_VOICE || 'M1'} / ${env.SUPERTONIC_LANGUAGE || 'ko'} / ${env.SUPERTONIC_STEPS || '2'}`);
   note('Supertonic progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.SUPERTONIC_PROGRESS || '0').toLowerCase()) ? 'supertonic' : 'edge fallback');
+} else if (ttsBackend === 'omnivoice') {
+  const omniPython = env.OMNIVOICE_PYTHON || path.join(ROOT, '.venv-omnivoice', 'bin', 'python');
+  const resolvedOmniPython = path.isAbsolute(omniPython) ? omniPython : path.resolve(ROOT, omniPython);
+  const refAudio = path.resolve(ROOT, env.OMNIVOICE_REF_AUDIO || env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
+  ok = check('OmniVoice Python', fs.existsSync(resolvedOmniPython) || commandExists(omniPython), fs.existsSync(resolvedOmniPython) ? path.relative(ROOT, resolvedOmniPython) : 'install with: python -m venv .venv-omnivoice && .venv-omnivoice/bin/pip install torch torchaudio soundfile omnivoice') && ok;
+  ok = check('OmniVoice reference audio', fs.existsSync(refAudio), path.relative(ROOT, refAudio)) && ok;
+  ok = check('OmniVoice synth wrapper help', spawnSync(fs.existsSync(resolvedOmniPython) ? resolvedOmniPython : 'python3', ['integrations/omnivoice/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/omnivoice/synth.py') && ok;
+  note('OmniVoice model/device', `${env.OMNIVOICE_MODEL || 'k2-fsa/OmniVoice'} / ${env.OMNIVOICE_DEVICE || 'mps'}`);
+  note('OmniVoice progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.OMNIVOICE_PROGRESS || '0').toLowerCase()) ? 'omnivoice' : 'edge fallback');
+} else if (ttsBackend === 'qwen3tts') {
+  const qwenCommand = env.QWEN3TTS_COMMAND || 'audio';
+  ok = check('Qwen3 TTS audio CLI', commandExists(qwenCommand), commandExists(qwenCommand) || 'install speech-swift/audio first') && ok;
+  note('Qwen3 speaker', env.QWEN3TTS_SPEAKER || 'sohee');
+  note('Qwen3 progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.QWEN3TTS_PROGRESS || '0').toLowerCase()) ? 'qwen3tts' : 'edge fallback');
+} else if (ttsBackend === 'mlxaudio') {
+  const mlxPython = env.MLXAUDIO_PYTHON || './.venv-mlxaudio/bin/python';
+  const mlxPath = path.isAbsolute(mlxPython) ? mlxPython : path.resolve(ROOT, mlxPython);
+  ok = check('MLX Audio Python', isExecutable(mlxPath) || commandExists(mlxPython), isExecutable(mlxPath) ? path.relative(ROOT, mlxPath) : (commandExists(mlxPython) || 'install with: scripts/install_mlxaudio.sh --yes')) && ok;
+  ok = check('MLX Audio wrapper help', spawnSync(isExecutable(mlxPath) ? mlxPath : 'python3', ['integrations/mlxaudio/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/mlxaudio/synth.py') && ok;
+  note('MLX Audio model/voice', `${env.MLXAUDIO_MODEL || 'mlx-community/Qwen3-TTS-12Hz-1.7B-Base-8bit'} / ${env.MLXAUDIO_VOICE || 'Chelsie'}`);
+  note('MLX Audio progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.MLXAUDIO_PROGRESS || '0').toLowerCase()) ? 'mlxaudio' : 'edge fallback');
+} else if (ttsBackend === 'fireredtts2') {
+  const fireCommand = env.FIREREDTTS2_COMMAND || './.local/bin/fireredtts2';
+  const firePath = path.isAbsolute(fireCommand) ? fireCommand : path.resolve(ROOT, fireCommand);
+  const fireModel = path.resolve(ROOT, env.FIREREDTTS2_PRETRAINED_DIR || 'pretrained_models/FireRedTTS2');
+  ok = check('FireRedTTS-2 wrapper', isExecutable(firePath), path.relative(ROOT, firePath) || firePath) && ok;
+  ok = check('FireRedTTS-2 model', fs.existsSync(fireModel), path.relative(ROOT, fireModel)) && ok;
+  ok = check('FireRedTTS-2 synth wrapper help', spawnSync(isExecutable(firePath) ? firePath : process.execPath, isExecutable(firePath) ? ['--help'] : ['integrations/fireredtts2/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/fireredtts2/synth.py') && ok;
+  note('FireRedTTS-2 progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.FIREREDTTS2_PROGRESS || '0').toLowerCase()) ? 'fireredtts2' : 'edge fallback');
+} else if (ttsBackend === 'mossttsnano') {
+  const mossCommand = env.MOSSTTSNANO_COMMAND || './.venv-mossttsnano/bin/python';
+  const mossPath = path.isAbsolute(mossCommand) ? mossCommand : path.resolve(ROOT, mossCommand);
+  const mossScript = path.resolve(ROOT, env.MOSSTTSNANO_SCRIPT || 'vendor/MOSS-TTS-Nano/infer.py');
+  ok = check('MOSS-TTS-Nano Python', isExecutable(mossPath) || commandExists(mossCommand), isExecutable(mossPath) ? path.relative(ROOT, mossPath) : (commandExists(mossCommand) || 'missing')) && ok;
+  ok = check('MOSS-TTS-Nano infer.py', fs.existsSync(mossScript), path.relative(ROOT, mossScript)) && ok;
+  note('MOSS checkpoint', env.MOSSTTSNANO_CHECKPOINT || 'OpenMOSS-Team/MOSS-TTS-Nano');
+  note('MOSS progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.MOSSTTSNANO_PROGRESS || '0').toLowerCase()) ? 'mossttsnano' : 'edge fallback');
+} else if (ttsBackend === 'neuttsair') {
+  const neuPython = env.NEUTTSAIR_PYTHON || './.venv-neuttsair/bin/python';
+  const neuPath = path.isAbsolute(neuPython) ? neuPython : path.resolve(ROOT, neuPython);
+  const neuScript = path.resolve(ROOT, env.NEUTTSAIR_SCRIPT || 'integrations/neuttsair/synth.py');
+  const refAudio = path.resolve(ROOT, env.NEUTTSAIR_REF_AUDIO || env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
+  ok = check('NeuTTS Air Python', isExecutable(neuPath) || commandExists(neuPython), isExecutable(neuPath) ? path.relative(ROOT, neuPath) : (commandExists(neuPython) || 'install with: python3 -m venv .venv-neuttsair && .venv-neuttsair/bin/pip install -e vendor/neutts-air')) && ok;
+  ok = check('NeuTTS Air wrapper', fs.existsSync(neuScript), path.relative(ROOT, neuScript)) && ok;
+  ok = check('NeuTTS Air reference audio', fs.existsSync(refAudio), path.relative(ROOT, refAudio)) && ok;
+  ok = check('NeuTTS Air synth wrapper help', spawnSync(isExecutable(neuPath) ? neuPath : 'python3', ['integrations/neuttsair/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/neuttsair/synth.py') && ok;
+  note('NeuTTS Air backbone/device', `${env.NEUTTSAIR_BACKBONE_REPO || env.NEUTTSAIR_BACKBONE || 'neuphonic/neutts-air-q4-gguf'} / ${env.NEUTTSAIR_BACKBONE_DEVICE || env.NEUTTSAIR_DEVICE || 'mps'}`);
+  note('NeuTTS Air codec/device', `${env.NEUTTSAIR_CODEC_REPO || env.NEUTTSAIR_CODEC || 'neuphonic/neucodec'} / ${env.NEUTTSAIR_CODEC_DEVICE || env.NEUTTSAIR_DEVICE || 'mps'}`);
+  note('NeuTTS Air progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.NEUTTSAIR_PROGRESS || '0').toLowerCase()) ? 'neuttsair' : 'edge fallback');
 }
 const backendCommand = {

package/scripts/install.mjs CHANGED Viewed

@@ -166,7 +166,7 @@ async function main() {
     const autoJoinVoiceChannels = await ask('Auto-join voice channel names', process.env.AUTO_JOIN_VOICE_CHANNELS || '일반,General,general');
     const transcriptChannelId = await ask('Transcript text channel/thread ID', process.env.TRANSCRIPT_CHANNEL_ID || '');
     const language = await ask('Default voice language: ko/en/auto', process.env.VOICE_LANGUAGE || process.env.WHISPER_CPP_LANGUAGE || process.env.STT_LANGUAGE || 'ko');
-    const ttsBackend = await ask('TTS backend: edge/openvoice/speechswift/supertonic', process.env.TTS_BACKEND || 'edge');
+    const ttsBackend = await ask('TTS backend: edge/openvoice/speechswift/supertonic/omnivoice/qwen3tts/mlxaudio/fireredtts2/mossttsnano/neuttsair', process.env.TTS_BACKEND || 'edge');
     const edgeTtsCommand = await ask('Edge TTS command', process.env.EDGE_TTS_COMMAND || process.env.TTS_EDGE_COMMAND || 'edge-tts');
     const ttsVoice = await ask('TTS voice', process.env.TTS_VOICE || 'ko-KR-SunHiNeural');
     const ttsRate = await ask('TTS rate', process.env.TTS_RATE || '+10%');
@@ -179,6 +179,8 @@ async function main() {
     const openvoiceDir = await ask('OpenVoice repo dir', process.env.OPENVOICE_DIR || './vendor/OpenVoice');
     const openvoiceVenv = await ask('OpenVoice venv dir', process.env.OPENVOICE_VENV || './.venv-openvoice');
     const openvoiceRefAudio = await ask('OpenVoice reference audio path', process.env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
+    const omnivoicePython = await ask('OmniVoice Python', process.env.OMNIVOICE_PYTHON || './.venv-omnivoice/bin/python');
+    const omnivoiceRefAudio = await ask('OmniVoice reference audio path', process.env.OMNIVOICE_REF_AUDIO || process.env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
     const requireWake = (await ask('Require wake word? 1/0', process.env.REQUIRE_WAKE_WORD || '0')) === '1';
     const verboseProgress = (await ask('Verbose progress by default? 1/0', process.env.AGENT_VERBOSE_PROGRESS || process.env.VERBALCODING_VERBOSE_PROGRESS || '0')) === '1';
     const utteranceIdleMs = await ask('Utterance idle wait before STT, ms', process.env.UTTERANCE_IDLE_MS || '4500');
@@ -207,6 +209,8 @@ async function main() {
       openvoiceDir,
       openvoiceVenv,
       openvoiceRefAudio,
+      omnivoicePython,
+      omnivoiceRefAudio,
       requireWakeWord: requireWake,
       verboseProgress,
       utteranceIdleMs,

package/scripts/install_fireredtts2.sh ADDED Viewed

@@ -0,0 +1,109 @@
+#!/usr/bin/env bash
+set -euo pipefail
+ROOT="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT"
+ASSUME_YES=0
+SKIP_MODEL=0
+SKIP_PIP=0
+for arg in "$@"; do
+  case "$arg" in
+    -y|--yes) ASSUME_YES=1 ;;
+    --skip-model) SKIP_MODEL=1 ;;
+    --skip-pip) SKIP_PIP=1 ;;
+    -h|--help)
+      cat <<'USAGE'
+Usage: scripts/install_fireredtts2.sh [--yes] [--skip-model] [--skip-pip]
+Installs FireRedTTS-2 for VerbalCoding:
+  - clones FireRedTeam/FireRedTTS2 under vendor/FireRedTTS2
+  - creates .venv-fireredtts2
+  - installs upstream Python dependencies
+  - downloads https://huggingface.co/FireRedTeam/FireRedTTS2 weights under pretrained_models/FireRedTTS2
+  - creates .local/bin/fireredtts2 wrapper used by TTS_BACKEND=fireredtts2
+The model is large. Use --skip-model only if FIREREDTTS2_PRETRAINED_DIR points elsewhere.
+USAGE
+      exit 0
+      ;;
+  esac
+done
+log() { printf '==> %s\n' "$*"; }
+warn() { printf 'Warning: %s\n' "$*" >&2; }
+has_cmd() { command -v "$1" >/dev/null 2>&1; }
+confirm() {
+  if [ "$ASSUME_YES" = "1" ]; then return 0; fi
+  printf '%s [y/N]: ' "$1" >&2
+  read -r answer
+  case "$answer" in y|Y|yes|YES) return 0 ;; *) return 1 ;; esac
+}
+if [ "$ASSUME_YES" != "1" ]; then
+  confirm 'FireRedTTS-2 can download several GB of model/dependency data. Continue?' || exit 2
+fi
+mkdir -p vendor .local/bin pretrained_models
+if ! has_cmd git; then
+  warn 'git is required to install FireRedTTS-2.'
+  exit 1
+fi
+PYTHON_BIN=""
+for candidate in python3.12 python3.11 python3; do
+  if has_cmd "$candidate"; then PYTHON_BIN="$candidate"; break; fi
+done
+if [ -z "$PYTHON_BIN" ]; then
+  warn 'Python >=3.11 is required to install FireRedTTS-2.'
+  exit 1
+fi
+PYTHON_VERSION="$($PYTHON_BIN - <<'PY'
+import sys
+print(f"{sys.version_info.major}.{sys.version_info.minor}")
+PY
+)"
+case "$PYTHON_VERSION" in
+  3.11|3.12|3.13*) ;;
+  *) warn "FireRedTTS-2 requires Python >=3.11; found $PYTHON_VERSION at $PYTHON_BIN"; exit 1 ;;
+esac
+if [ ! -d vendor/FireRedTTS2/.git ]; then
+  log 'Cloning FireRedTTS-2'
+  git clone --depth 1 https://github.com/FireRedTeam/FireRedTTS2.git vendor/FireRedTTS2
+else
+  log 'FireRedTTS-2 repo already exists'
+fi
+if [ "$SKIP_PIP" != "1" ]; then
+  if [ ! -x .venv-fireredtts2/bin/python ]; then
+    log 'Creating .venv-fireredtts2'
+    "$PYTHON_BIN" -m venv .venv-fireredtts2
+  fi
+  log 'Installing FireRedTTS-2 Python dependencies'
+  .venv-fireredtts2/bin/python -m pip install --upgrade pip setuptools wheel
+  .venv-fireredtts2/bin/python -m pip install torch torchaudio huggingface_hub
+  .venv-fireredtts2/bin/python -m pip install -e vendor/FireRedTTS2
+  if [ -f vendor/FireRedTTS2/requirements.txt ]; then
+    .venv-fireredtts2/bin/python -m pip install -r vendor/FireRedTTS2/requirements.txt
+  fi
+fi
+if [ "$SKIP_MODEL" != "1" ]; then
+  if [ -d pretrained_models/FireRedTTS2 ] && [ "$(find pretrained_models/FireRedTTS2 -mindepth 1 -maxdepth 1 2>/dev/null | head -n 1)" ]; then
+    log 'FireRedTTS-2 pretrained model already exists'
+  else
+    log 'Downloading FireRedTTS-2 weights from https://huggingface.co/FireRedTeam/FireRedTTS2'
+    .venv-fireredtts2/bin/huggingface-cli download FireRedTeam/FireRedTTS2 --local-dir pretrained_models/FireRedTTS2 --local-dir-use-symlinks False
+  fi
+fi
+cat > .local/bin/fireredtts2 <<'SH'
+#!/usr/bin/env bash
+ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+exec "$ROOT/.venv-fireredtts2/bin/python" "$ROOT/integrations/fireredtts2/synth.py" "$@"
+SH
+chmod +x .local/bin/fireredtts2
+log 'Installed .local/bin/fireredtts2 wrapper'
+log 'Set FIREREDTTS2_COMMAND=./.local/bin/fireredtts2 and TTS_BACKEND=fireredtts2, then restart VerbalCoding.'