npm - verbalcoding - Versions diffs - 0.2.11 → 0.2.12 - Mend

verbalcoding 0.2.11 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/.env.example +27 -1
package/README.es.md +132 -0
package/README.fr.md +132 -0
package/README.ja.md +132 -0
package/README.ko.md +132 -0
package/README.md +116 -74
package/README.ru.md +132 -0
package/README.zh.md +131 -0
package/app-node/agent_adapters.mjs +37 -5
package/app-node/agent_adapters.test.mjs +13 -1
package/app-node/agent_detect.mjs +73 -0
package/app-node/agent_detect.test.mjs +77 -0
package/app-node/install_config.mjs +3 -0
package/app-node/main.mjs +339 -4
package/app-node/notify.mjs +73 -0
package/app-node/notify.test.mjs +68 -0
package/app-node/plan_mode.mjs +174 -0
package/app-node/plan_mode.test.mjs +153 -0
package/app-node/smart_progress.mjs +94 -0
package/app-node/smart_progress.test.mjs +66 -0
package/app-node/stream_sentencer.mjs +61 -0
package/app-node/stream_sentencer.test.mjs +64 -0
package/app-node/streaming_tts_queue.mjs +48 -0
package/app-node/streaming_tts_queue.test.mjs +58 -0
package/app-node/text_routing.mjs +20 -0
package/app-node/text_routing.test.mjs +23 -1
package/docs/CONFIGURATION.md +69 -96
package/docs/FRESH_INSTALL.md +105 -63
package/docs/HERMES_VOICE.md +65 -0
package/docs/MULTI_INSTANCE.md +16 -0
package/docs/README.md +49 -0
package/docs/RELEASE.md +42 -19
package/docs/ROADMAP.md +38 -0
package/docs/TROUBLESHOOTING.md +126 -0
package/docs/USAGE.md +72 -40
package/docs/assets/figures/verbalcoding-flow.svg +1 -1
package/docs/i18n/CONFIGURATION.es.md +25 -0
package/docs/i18n/CONFIGURATION.fr.md +25 -0
package/docs/i18n/CONFIGURATION.ja.md +25 -0
package/docs/i18n/CONFIGURATION.ko.md +25 -0
package/docs/i18n/CONFIGURATION.ru.md +25 -0
package/docs/i18n/CONFIGURATION.zh.md +25 -0
package/docs/i18n/FRESH_INSTALL.es.md +27 -2
package/docs/i18n/FRESH_INSTALL.fr.md +27 -2
package/docs/i18n/FRESH_INSTALL.ja.md +27 -2
package/docs/i18n/FRESH_INSTALL.ko.md +27 -2
package/docs/i18n/FRESH_INSTALL.ru.md +27 -2
package/docs/i18n/FRESH_INSTALL.zh.md +27 -2
package/docs/i18n/HERMES_VOICE.es.md +46 -0
package/docs/i18n/HERMES_VOICE.fr.md +46 -0
package/docs/i18n/HERMES_VOICE.ja.md +46 -0
package/docs/i18n/HERMES_VOICE.ko.md +65 -0
package/docs/i18n/HERMES_VOICE.ru.md +46 -0
package/docs/i18n/HERMES_VOICE.zh.md +46 -0
package/docs/i18n/MULTI_INSTANCE.es.md +25 -0
package/docs/i18n/MULTI_INSTANCE.fr.md +25 -0
package/docs/i18n/MULTI_INSTANCE.ja.md +25 -0
package/docs/i18n/MULTI_INSTANCE.ko.md +25 -0
package/docs/i18n/MULTI_INSTANCE.ru.md +25 -0
package/docs/i18n/MULTI_INSTANCE.zh.md +25 -0
package/docs/i18n/README.es.md +20 -134
package/docs/i18n/README.fr.md +20 -134
package/docs/i18n/README.ja.md +20 -134
package/docs/i18n/README.ko.md +20 -133
package/docs/i18n/README.ru.md +20 -134
package/docs/i18n/README.zh.md +20 -133
package/docs/i18n/RELEASE.es.md +26 -1
package/docs/i18n/RELEASE.fr.md +26 -1
package/docs/i18n/RELEASE.ja.md +26 -1
package/docs/i18n/RELEASE.ko.md +26 -1
package/docs/i18n/RELEASE.ru.md +26 -1
package/docs/i18n/RELEASE.zh.md +26 -1
package/docs/i18n/TROUBLESHOOTING.es.md +39 -0
package/docs/i18n/TROUBLESHOOTING.fr.md +39 -0
package/docs/i18n/TROUBLESHOOTING.ja.md +39 -0
package/docs/i18n/TROUBLESHOOTING.ko.md +39 -0
package/docs/i18n/TROUBLESHOOTING.ru.md +39 -0
package/docs/i18n/TROUBLESHOOTING.zh.md +39 -0
package/docs/i18n/USAGE.es.md +25 -0
package/docs/i18n/USAGE.fr.md +25 -0
package/docs/i18n/USAGE.ja.md +25 -0
package/docs/i18n/USAGE.ko.md +25 -0
package/docs/i18n/USAGE.ru.md +25 -0
package/docs/i18n/USAGE.zh.md +25 -0
package/docs/superpowers/plans/2026-05-13-phase1-streaming-pipeline.md +122 -0
package/docs/superpowers/plans/2026-05-13-phase10-push-notifications.md +152 -0
package/docs/superpowers/plans/2026-05-13-phase2-agent-adapters.md +242 -0
package/docs/superpowers/plans/2026-05-13-phase6-smart-progress.md +172 -0
package/docs/superpowers/plans/2026-05-13-phase7-voice-plan-mode.md +108 -0
package/package.json +2 -1
package/scripts/cli.mjs +4 -3
package/scripts/doctor.mjs +11 -0
package/scripts/install.mjs +15 -1

package/README.md CHANGED Viewed

@@ -1,148 +1,190 @@
 # VerbalCoding
 <p align="center">
-  <strong>Talk to your CLI coding agents through Discord voice — like a phone call for software work.</strong>
+  <strong>The voice layer for any coding agent — real barge-in, streaming latency, and the agents you already use.</strong>
 </p>
 <p align="center">
-  <a href="docs/i18n/README.ko.md">한국어</a> ·
-  <a href="docs/i18n/README.ja.md">日本語</a> ·
-  <a href="docs/i18n/README.zh.md">中文</a> ·
-  <a href="docs/i18n/README.es.md">Español</a> ·
-  <a href="docs/i18n/README.fr.md">Français</a> ·
-  <a href="docs/i18n/README.ru.md">Русский</a>
+  <a href="./README.ko.md">한국어</a> ·
+  <a href="./README.ja.md">日本語</a> ·
+  <a href="./README.zh.md">中文</a> ·
+  <a href="./README.es.md">Español</a> ·
+  <a href="./README.fr.md">Français</a> ·
+  <a href="./README.ru.md">Русский</a>
 </p>
 <p align="center">
+  <img alt="npm" src="https://img.shields.io/npm/v/verbalcoding?color=CB3837&logo=npm&logoColor=white">
   <img alt="Node.js" src="https://img.shields.io/badge/Node.js-20%2B-339933?logo=node.js&logoColor=white">
   <img alt="Discord" src="https://img.shields.io/badge/Discord-voice%20bridge-5865F2?logo=discord&logoColor=white">
   <img alt="STT" src="https://img.shields.io/badge/STT-whisper.cpp-7C3AED">
-  <img alt="TTS" src="https://img.shields.io/badge/TTS-Edge%20%7C%20OpenVoice%20%7C%20Supertonic%20%7C%20SpeechSwift-0EA5E9">
-  <img alt="Agents" src="https://img.shields.io/badge/Agents-Hermes%20%7C%20Claude%20%7C%20Codex%20%7C%20Gemini%20%7C%20OpenCode-111827">
+  <img alt="TTS" src="https://img.shields.io/badge/TTS-Edge%20%7C%20OpenVoice%20%7C%20SpeechSwift-0EA5E9">
+  <img alt="License" src="https://img.shields.io/github/license/ca1773130n/VerbalCoding">
 </p>
 <p align="center">
   <img src="docs/assets/figures/verbalcoding-flow.svg" alt="VerbalCoding voice-to-agent flow" width="860">
 </p>
-## Why
+## Why it exists
-VerbalCoding turns a Discord voice channel into a hands-free control surface for coding agents. Speak a request, let your CLI agent work, and hear a concise answer back — with text transcripts, progress events, and guardrails for noisy code/log output.
+VerbalCoding turns a Discord voice channel into a hands-free cockpit for **any** CLI coding agent. Hermes ships its own `/voice join` for Hermes; VerbalCoding is a thin, agent-agnostic layer that puts the same loop on top of Hermes, Claude Code, Codex, Gemini, OpenCode, OpenClaw, Aider, Cursor CLI, or any non-interactive shell command — with the rough edges other voice frontends still have on their roadmap:
-## Highlights
+- **True audio barge-in** — interrupt the agent mid-sentence; Hermes' built-in voice pauses its listener during TTS.
+- **Streaming pipeline** — first sentence plays while the agent is still writing (Hermes lists this as a future Phase-4 item).
+- **Smart progress narration** — describes intent ("wiring the new login route"), not file lists.
+- **Voice plan mode** — say "plan it first", edit by voice ("skip step 3"), say "approve" to execute.
+- **Phone-down mode** — push notification with a voice summary when a long task completes and the room is empty.
-| What you get | Why it feels good |
+## What feels different
+| Capability | Why it matters |
 |---|---|
-| Voice-first agent control | Talk to Hermes Agent, Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw, or any custom CLI harness. |
-| On-device speech loop | Discord voice capture → local `whisper-cli` transcription → agent → chunked TTS playback. |
-| Shared voice + text context | Voice turns and `!ask` text commands can reuse the same supported agent session. |
-| Barge-in and sensitivity modes | Interrupt playback naturally and switch between normal and conservative/noisy environments. |
-| Multilingual voice presets | Switch STT, progress language, and TTS voice together with `vc language ko/en/auto`. |
-| Multi-room project isolation | Run one bot per project room with isolated Hermes profiles, sessions, memory, and logs. |
+| Agent choice, first-class | Hermes Agent, Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw, Aider, Cursor CLI, or any custom command. `vc setup` auto-detects what's installed. |
+| Real barge-in | VAD thresholds tuned for indoor and noisy rooms; cut in mid-utterance and resume the conversation. |
+| Streaming end-to-end | `STREAMING_TTS=1` plays sentences as the agent produces them; first audio in well under a second on a warm cache. |
+| Smart progress | Optional LLM summarizer collapses raw events into one human sentence; falls back to the existing regex labels when no key is set. |
+| Plan-mode by voice | Narrated, editable, voice-driven plans without touching the keyboard. |
+| Phone-down handoff | Long task + empty VC = push notification (`ntfy`/`pushover`) with a redacted one-line summary and tap-to-rejoin link. |
+| Local speech loop | Discord audio is transcribed by local `whisper-cli`; TTS via Edge, OpenVoice, SpeechSwift/CosyVoice, or Supertonic. |
+| Real operations support | Doctor auto-fixes, Docker UDP guidance, latency metrics, multi-instance project rooms, redacted config checks. |
-## Quick Start
+> **Already using Hermes Agent?** Hermes itself has a working Discord voice loop via `/voice join` / `/voice channel`. Use VerbalCoding when you want it agent-agnostic, want barge-in and streaming today, or want plan-mode, push handoff, and smart narration on top of the same loop. The two coexist — VerbalCoding can drive Hermes as its backend.
-Fastest path with npm:
+## Quick Start
 ```bash
-npm install -g verbalcoding
-vc setup --yes
+npm install -g verbalcoding@latest
+vc setup       # detects installed agents and lets you pick
 vc doctor
 vc start
 ```
-Or run directly without a permanent global install:
+`vc setup` is the normal human path. Keep Discord Developer Portal open while it asks for your bot token, application/client ID, transcript target, and voice channel names.
+Automation can skip prompts, then fill Discord details later:
 ```bash
-npx verbalcoding setup --yes
+vc setup --yes
+vc setup token <bot-token> --client-id <discord-client-id>
+vc setup channels "General,Team Voice"
 vc doctor
-vc start
 ```
-GitHub clone path for contributors:
+Contributor clone path:
 ```bash
 git clone https://github.com/ca1773130n/VerbalCoding.git
 cd VerbalCoding
-./scripts/install.sh --yes
+./scripts/install.sh
 vc doctor
 ./run.sh
 ```
-`vc setup --yes` bootstraps local prerequisites from the npm package. `./scripts/install.sh --yes` does the same for GitHub clone installs. Both cover Node/npm dependencies, `ffmpeg`, `whisper-cli`, the default whisper.cpp model, a local `.venv-tts` Edge TTS helper, and setup wizard configuration where possible. They support macOS/Homebrew plus common Linux package managers (`apt`, `dnf`, `pacman`); rerun with `--no-wizard` for dependency-only setup or `--skip-system` if you want to install OS packages yourself.
-Need a clean install walkthrough? Start with [Fresh Install](docs/FRESH_INSTALL.md).
-## Supported Agent Backends
+## Discord setup in one minute
-| Backend | Default command | Session support |
-|---|---:|---|
-| Hermes Agent | `hermes chat -Q -q` | Resume, verbose progress, cancellation, final-answer recovery |
-| Claude Code | `claude -p` | CLI session file support through adapter defaults |
-| Codex CLI | `codex exec` | CLI session file support through adapter defaults |
-| Gemini CLI | `gemini -p` | CLI session file support through adapter defaults |
-| OpenCode | `opencode run` | CLI session file support through adapter defaults |
-| OpenClaw | `openclaw run` | CLI session file support through adapter defaults |
-| Custom | `AGENT_COMMAND` | Bring your own non-interactive command |
+1. Create a Discord application and bot in <https://discord.com/developers/applications>.
+2. Enable the Message Content privileged intent.
+3. Run `vc setup` and paste the bot token plus application/client ID when prompted.
+4. Enter exact voice channel names for auto-join.
+5. Invite the bot with:
-## Learn More
+```bash
+vc bot invite <discord-client-id>
+vc bot invite <discord-client-id> --guild <guild-id>
+```
-| Guide | What you get |
-|---|---|
-| [Fresh Install](docs/FRESH_INSTALL.md) | Clean clone setup, model download, first run |
-| [Usage Guide](docs/USAGE.md) | CLI commands, Discord commands, progress mode, latency metrics |
-| [Configuration](docs/CONFIGURATION.md) | `.env`, agent backends, MCP, TTS backends, operational notes |
-| [Multi-Instance](docs/MULTI_INSTANCE.md) | One permanent Discord voice room per project |
-| [Release Notes](docs/RELEASE.md) | Current capabilities and pre-release checklist |
+Secrets are stored in ignored local env files with mode `0600` and are not printed back by `vc doctor`.
-## Tiny Command Map
+## Tiny command map
 ```bash
-vc status                 # current language, TTS, and bridge settings
-vc language ko|en|auto    # switch STT/progress/TTS language preset
-vc bot invite CLIENT_ID   # generate the Discord bot invite URL
-vc instance setup NAME    # create an isolated project voice bot
-vc instance start NAME    # run that bot in the background
-vc doctor                 # redacted health check
-vc start                  # start the default bridge
+vc setup                               # guided setup with agent auto-detection
+vc setup --yes                         # non-interactive bootstrap/starter config
+vc setup token                         # rotate or add Discord bot token/client ID later
+vc setup channels "General,Team Voice" # update auto-join voice channel names
+vc bot invite CLIENT_ID                # generate a Discord bot invite URL
+vc status                              # show active language, TTS, bridge settings, and resolved backend
+vc language ko|en|auto                 # switch STT/progress/TTS language preset
+vc doctor                              # redacted health check with auto-fix suggestions
+vc start                               # start the default bridge
+vc instance setup NAME                 # create an isolated project voice bot
+vc instance start NAME                 # run that bot in the background
 ```
 In Discord:
 | Command | What it does |
 |---|---|
-| `!join` | Join your current voice channel. |
-| `!ask <prompt>` | Send text to the same agent backend. |
-| `!verbose on\|off` | Show/speak short progress updates. |
-| `!latency` | Summarize recent voice/STT/agent/TTS latency. |
-| `!sensitivity normal` | Use normal indoor barge-in sensitivity. |
-| `!sensitivity conservative` | Use stricter noisy/outdoor sensitivity. |
+| `!join` / `!leave` | Join or leave your current voice channel. |
+| `!ask <prompt>` | Send text to the same selected agent backend. |
+| `!verbose on\|off` | Toggle short progress updates. |
+| `!latency` / `!metrics` | Summarize recent STT/agent/TTS latency. |
+| `!sensitivity normal\|conservative` | Tune barge-in for indoor or noisy environments. |
 | `!session new <name> <workdir> [context] --voice <voice-channel>` | Bind a project session to a voice room. |
+## Roadmap
+The differentiation push is tracked in [docs/ROADMAP.md](./docs/ROADMAP.md). Five phases land the claims above:
+| # | Phase | What it adds |
+|---|---|---|
+| 1 | Streaming pipeline | Sentence-by-sentence TTS while the agent is still writing. |
+| 2 | Agent-agnostic adapters | First-class Aider + Cursor CLI; `vc setup` auto-detects. |
+| 6 | Smart progress | LLM-summarized narration. Falls back to today's regex labels. |
+| 7 | Voice plan mode | Narrate plan, voice-edit, approve to execute. |
+| 10 | Push notification handoff | ntfy/Pushover when a long task ends and the room is empty. |
+## Learn more
+| Guide | What you get |
+|---|---|
+| [Docs hub](docs/README.md) | One page linking every guide and localized doc set. |
+| [Roadmap](docs/ROADMAP.md) | Differentiation plan and per-phase implementation plans. |
+| [Fresh Install](docs/FRESH_INSTALL.md) | npm/global setup, Discord app setup, token/channel commands, first run. |
+| [Usage Guide](docs/USAGE.md) | CLI commands, Discord commands, run modes, voice changes, latency metrics. |
+| [Hermes Built-in Voice vs VerbalCoding](docs/HERMES_VOICE.md) | What Hermes already supports and when VerbalCoding is worth adding. |
+| [Configuration](docs/CONFIGURATION.md) | `.env`, agent backends, MCP server, TTS backends, operational notes. |
+| [Troubleshooting](docs/TROUBLESHOOTING.md) | Docker host networking, UDP voice failures, missing token/channel diagnostics. |
+| [Multi-Instance](docs/MULTI_INSTANCE.md) | One permanent Discord voice room per project. |
+| [Release Notes](docs/RELEASE.md) | Current capabilities, checks, and public-release gaps. |
 ## Requirements
 | Layer | Default |
 |---|---|
-| Runtime | Node.js 20+, npm; install script can install via Homebrew/apt/dnf/pacman |
-| Audio | `ffmpeg`; install script can install it |
-| Speech recognition | Local `whisper-cli` from whisper.cpp; install script uses Homebrew on macOS or local Linux build fallback |
-| TTS | Edge TTS CLI; install script creates `.venv-tts` if needed |
-| Discord | Bot token, Message Content intent, voice permissions |
-| Agent | At least one authenticated CLI harness, Hermes Agent by default |
-| Platform focus | macOS / Apple Silicon most tested; Linux bootstrap is best-effort and documented |
+| Runtime | Node.js 20+ and npm; setup can install via Homebrew/apt/dnf/pacman where supported. |
+| Audio | `ffmpeg`; setup/doctor can install it on supported OSes. |
+| Speech recognition | Local `whisper-cli` from whisper.cpp plus `models/ggml-small-q5_1.bin`. |
+| TTS | Edge TTS by default; optional OpenVoice, SpeechSwift/CosyVoice, and Supertonic paths. |
+| Discord | Bot token, Message Content intent, voice permissions, matching auto-join channel names. |
+| Agent | At least one CLI harness installed; `vc setup` auto-detects Hermes, Claude Code, Codex, Gemini, OpenCode, OpenClaw, Aider, Cursor CLI. |
+| Platform focus | macOS / Apple Silicon most tested; Linux bootstrap is best-effort; Windows unsupported for now. |
+## Docker / container note
+Discord text login can work while voice join fails if outbound UDP is blocked. If logs show `Cannot perform IP discovery - socket closed`, use Linux host networking for the service that runs `vc start`:
+```yaml
+services:
+  verbalcoding:
+    network_mode: "host"
+```
+Do not combine `network_mode: "host"` with `ports:`. Docker Desktop for macOS/Windows behaves differently; if UDP still fails there, run VerbalCoding directly on the host or a Linux VM.
 ## Contributing
-Run the lightweight checks before sending changes:
+Run lightweight checks before sending changes:
 ```bash
 node --check app-node/main.mjs
 npm test
-bash -n run.sh scripts/install.sh
+bash -n run.sh scripts/install.sh scripts/bootstrap_prereqs.sh
 npm pack --dry-run
 vc doctor
 ```
 ## Status
-VerbalCoding is public-release oriented but still early. Demo video/GIF, broader Linux validation, CI, and deeper security review are still TODOs.
+Public-release oriented but still early. The roadmap above tracks live differentiation work. Demo video/GIF, broader Linux validation, CI, and deeper security review are still TODOs.

package/README.ru.md ADDED Viewed

@@ -0,0 +1,132 @@
+# VerbalCoding
+<p align="center"><strong>Общайтесь с CLI-агентами для разработки голосом в Discord, как по телефону.</strong></p>
+<p align="center"><a href="./README.md">English</a> · <a href="./README.ko.md">한국어</a> · <a href="./README.ja.md">日本語</a> · <a href="./README.zh.md">中文</a> · <a href="./README.es.md">Español</a> · <a href="./README.fr.md">Français</a></p>
+<p align="center">
+  <img alt="npm" src="https://img.shields.io/npm/v/verbalcoding?color=CB3837&logo=npm&logoColor=white">
+  <img alt="Node.js" src="https://img.shields.io/badge/Node.js-20%2B-339933?logo=node.js&logoColor=white">
+  <img alt="Discord" src="https://img.shields.io/badge/Discord-voice%20bridge-5865F2?logo=discord&logoColor=white">
+  <img alt="STT" src="https://img.shields.io/badge/STT-whisper.cpp-7C3AED">
+  <img alt="TTS" src="https://img.shields.io/badge/TTS-Edge%20%7C%20OpenVoice%20%7C%20SpeechSwift-0EA5E9">
+  <img alt="License" src="https://img.shields.io/github/license/ca1773130n/VerbalCoding">
+</p>
+<p align="center">
+  <img src="docs/assets/figures/verbalcoding-flow.svg" alt="VerbalCoding voice-to-agent flow" width="860">
+</p>
+## Зачем это нужно
+VerbalCoding превращает голосовую комнату Discord в hands-free кабину для coding agents. Вы произносите задачу, CLI-агент работает, а в ответ получаете короткую озвучку, текстовую расшифровку и события прогресса. Diffs и logs не зачитываются длинным TTS.
+> **Уже используете Hermes Agent?** В Hermes уже есть встроенная поддержка голосовых каналов Discord через `/voice join` / `/voice channel`: бот может зайти в текущий VC, распознать речь через Whisper и ответить TTS. Для этого базового цикла VerbalCoding не обязателен. VerbalCoding добавляет workflow-слой: маршрутизацию проектов/сессий, общий контекст голоса+текста, правила прерывания, голосовой прогресс, языковые пресеты, метрики задержки и переключение CLI-бэкендов помимо Hermes.
+## Что ощущается иначе
+| Возможность | Зачем это важно |
+|---|---|
+| Работа как звонок | Говорите, слушайте, перебивайте и продолжайте в одном голосовом канале Discord. |
+| Пошаговая настройка | `vc setup` проводит через prerequisites, Discord token/client ID, voice channel, transcript target, backend и TTS settings за один проход. |
+| Локальный голосовой цикл | Discord audio → local `whisper-cli` → selected CLI agent → TTS reply. |
+| Выбор агента | Hermes Agent, Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw или custom command. |
+| Больше, чем встроенный голос Hermes | Сохраняет тот же VC-голосовой цикл и добавляет проектные комнаты, общий контекст `!ask`, тонкую обработку прерываний, голос прогресса/статуса и управление multi-agent бэкендами. |
+| Готовность к эксплуатации | doctor auto-fix, Docker UDP guide, latency metrics, multi-instance rooms и redacted config checks встроены. |
+## Быстрый старт
+```bash
+npm install -g verbalcoding@latest
+vc setup
+vc doctor
+vc start
+```
+`vc setup` — обычный путь для человека. Держите Discord Developer Portal открытым и введите bot token, application/client ID, transcript target и voice channel names.
+Для автоматизации можно пропустить prompts и добавить Discord-данные позже.
+```bash
+vc setup --yes
+vc setup token <bot-token> --client-id <discord-client-id>
+vc setup channels "General,Team Voice"
+vc doctor
+```
+## Discord за одну минуту
+1. Создайте application и bot в Discord Developer Portal.
+2. Включите Message Content privileged intent.
+3. Запустите `vc setup` и вставьте bot token и application/client ID.
+4. Введите точные имена voice channels для auto-join.
+5. Пригласите bot этими командами.
+```bash
+vc bot invite <discord-client-id>
+vc bot invite <discord-client-id> --guild <guild-id>
+```
+## Краткая карта команд
+```bash
+vc setup                                 # пошаговая настройка: prerequisites, Discord, backend, voice
+vc setup --yes                           # неинтерактивный bootstrap/starter config
+vc setup token                           # позже обновить или добавить Discord bot token/client ID
+vc setup channels "General,Team Voice"   # обновить auto-join voice channel names
+vc bot invite CLIENT_ID                  # сгенерировать Discord bot invite URL
+vc status                                # показать текущие настройки
+vc language ko|en|auto                   # переключить language preset
+vc doctor                                # redacted health check и auto-fixes
+vc start                                 # запустить bridge по умолчанию
+vc instance setup NAME                   # создать изолированный project voice bot
+vc instance start NAME                   # запустить этот bot в background
+```
+## Подробнее
+| Гайд | Что внутри |
+|---|---|
+| [Центр документации](docs/i18n/README.ru.md) | Индекс локализованных гайдов. |
+| [Fresh Install](docs/i18n/FRESH_INSTALL.ru.md) | npm/global setup, настройка Discord и первый запуск. |
+| [Usage](docs/i18n/USAGE.ru.md) | CLI-команды, Discord-команды, режимы запуска и latency. |
+| [Встроенный голос Hermes vs VerbalCoding](docs/i18n/HERMES_VOICE.ru.md) | Что Hermes уже умеет в Discord voice и чем отличается VerbalCoding. |
+| [Configuration](docs/i18n/CONFIGURATION.ru.md) | .env, agent backends, MCP, TTS и эксплуатация. |
+| [Troubleshooting](docs/i18n/TROUBLESHOOTING.ru.md) | Docker UDP и проверки token/channel. |
+| [Multi-Instance](docs/i18n/MULTI_INSTANCE.ru.md) | Одна постоянная voice room на проект. |
+## Требования
+| Слой | По умолчанию |
+|---|---|
+| Runtime | Node.js 20+ и npm. |
+| Audio | `ffmpeg` и local `whisper-cli`. |
+| TTS | По умолчанию Edge TTS; опционально OpenVoice, SpeechSwift/CosyVoice, Supertonic. |
+| Discord | Bot token, Message Content intent, voice permissions и совпадающие channel names. |
+| Agent | Минимум один аутентифицированный CLI harness; по умолчанию Hermes Agent. |
+## Docker / контейнеры
+Если в logs видно `Cannot perform IP discovery - socket closed`, Discord voice UDP заблокирован. В Linux Docker Compose используйте:
+```yaml
+services:
+  verbalcoding:
+    network_mode: "host"
+```
+Не совмещайте `network_mode: "host"` с `ports:`.
+## Участие
+```bash
+node --check app-node/main.mjs
+npm test
+bash -n run.sh scripts/install.sh scripts/bootstrap_prereqs.sh
+npm pack --dry-run
+vc doctor
+```
+## Статус
+VerbalCoding ориентирован на публичный релиз, но проект ещё ранний. Demo video/GIF, более широкая Linux validation, CI и security review остаются TODO.

package/README.zh.md ADDED Viewed

@@ -0,0 +1,131 @@
+# VerbalCoding
+<p align="center"><strong>像打电话一样，通过 Discord 语音控制 CLI 编程代理。</strong></p>
+<p align="center"><a href="./README.md">English</a> · <a href="./README.ko.md">한국어</a> · <a href="./README.ja.md">日本語</a> · <a href="./README.es.md">Español</a> · <a href="./README.fr.md">Français</a> · <a href="./README.ru.md">Русский</a></p>
+<p align="center">
+  <img alt="npm" src="https://img.shields.io/npm/v/verbalcoding?color=CB3837&logo=npm&logoColor=white">
+  <img alt="Node.js" src="https://img.shields.io/badge/Node.js-20%2B-339933?logo=node.js&logoColor=white">
+  <img alt="Discord" src="https://img.shields.io/badge/Discord-voice%20bridge-5865F2?logo=discord&logoColor=white">
+  <img alt="STT" src="https://img.shields.io/badge/STT-whisper.cpp-7C3AED">
+  <img alt="TTS" src="https://img.shields.io/badge/TTS-Edge%20%7C%20OpenVoice%20%7C%20SpeechSwift-0EA5E9">
+  <img alt="License" src="https://img.shields.io/github/license/ca1773130n/VerbalCoding">
+</p>
+<p align="center">
+  <img src="docs/assets/figures/verbalcoding-flow.svg" alt="VerbalCoding voice-to-agent flow" width="860">
+</p>
+## 为什么需要它
+VerbalCoding 把 Discord 语音房间变成编码代理的免提驾驶舱。你说出需求，让 CLI 代理工作，并收到简短语音回复和文本记录；diff 和日志不会被 TTS 长篇朗读。
+> **已经在用 Hermes Agent？** Hermes 本身已经通过 `/voice join` / `/voice channel` 支持 Discord 语音频道：它可以加入你当前所在的 VC，用 Whisper 做语音转文字，并用 TTS 回答。只需要这个基础闭环时，VerbalCoding 不是必需的。VerbalCoding 是加在上面的工作流层：项目/会话路由、语音+文本共享上下文、插话规则、进度语音、语言预设、延迟指标，以及 Hermes 之外的 CLI 后端切换。
+## 体验亮点
+| 能力 | 价值 |
+|---|---|
+| 电话式工作流 | 在同一个 Discord 语音频道里说话、收听、打断、继续。 |
+| 面向人的引导设置 | `vc setup` 一次引导 prerequisites、Discord token/client ID、voice channel、transcript target、backend 和 TTS 设置。 |
+| 本地语音闭环 | Discord audio → local `whisper-cli` → selected CLI agent → TTS reply。 |
+| 可选代理 | 支持 Hermes Agent、Claude Code、Codex、Gemini CLI、OpenCode、OpenClaw 或 custom command。 |
+| 超越 Hermes 内置语音 | 在同一个 VC 语音闭环上增加项目房间、`!ask` 共享上下文、细粒度打断处理、进度/状态语音和多代理后端控制。 |
+| 真实运维支持 | 内置 doctor auto-fix、Docker UDP 指南、latency metrics、multi-instance rooms 和 redacted config checks。 |
+## 快速开始
+```bash
+npm install -g verbalcoding@latest
+vc setup
+vc doctor
+vc start
+```
+普通用户路径是 `vc setup`。运行时请打开 Discord Developer Portal，并按提示输入 bot token、application/client ID、transcript target 和 voice channel names。
+自动化场景可以跳过提示，然后再补充 Discord 信息。
+```bash
+vc setup --yes
+vc setup token <bot-token> --client-id <discord-client-id>
+vc setup channels "General,Team Voice"
+vc doctor
+```
+## 一分钟完成 Discord 设置
+1. 在 Discord Developer Portal 创建 application 和 bot。
+2. 启用 Message Content privileged intent。
+3. 运行 `vc setup`，粘贴 bot token 和 application/client ID。
+4. 输入要自动加入的精确 voice channel 名称。
+5. 用下面的命令邀请 bot。
+```bash
+vc bot invite <discord-client-id>
+vc bot invite <discord-client-id> --guild <guild-id>
+```
+## 迷你命令地图
+```bash
+vc setup                                 # 引导式设置: prerequisites, Discord, backend, voice
+vc setup --yes                           # 非交互 bootstrap/starter config
+vc setup token                           # 稍后轮换或添加 Discord bot token/client ID
+vc setup channels "General,Team Voice"   # 更新 auto-join voice channel names
+vc bot invite CLIENT_ID                  # 生成 Discord bot invite URL
+vc status                                # 显示当前设置
+vc language ko|en|auto                   # 切换 language preset
+vc doctor                                # redacted health check 和 auto-fix
+vc start                                 # 启动默认 bridge
+vc instance setup NAME                   # 创建隔离的 project voice bot
+vc instance start NAME                   # 后台运行该 bot
+```
+## 了解更多
+| 指南 | 内容 |
+|---|---|
+| [文档中心](docs/i18n/README.zh.md) | 本地化指南索引。 |
+| [Fresh Install](docs/i18n/FRESH_INSTALL.zh.md) | npm/global setup、Discord 设置、首次运行。 |
+| [Usage](docs/i18n/USAGE.zh.md) | CLI 命令、Discord 命令、运行模式、latency。 |
+| [Configuration](docs/i18n/CONFIGURATION.zh.md) | .env、agent backends、MCP、TTS、运维。 |
+| [Troubleshooting](docs/i18n/TROUBLESHOOTING.zh.md) | Docker UDP、token/channel 缺失检查。 |
+| [Multi-Instance](docs/i18n/MULTI_INSTANCE.zh.md) | 每个项目一个固定语音房间。 |
+## 要求
+| 层级 | 默认 |
+|---|---|
+| Runtime | Node.js 20+ 和 npm。 |
+| Audio | `ffmpeg` 和 local `whisper-cli`。 |
+| TTS | 默认 Edge TTS；可选 OpenVoice、SpeechSwift/CosyVoice、Supertonic。 |
+| Discord | Bot token、Message Content intent、voice permissions、匹配的 channel names。 |
+| Agent | 至少一个已认证 CLI harness；默认 Hermes Agent。 |
+## Docker / 容器说明
+如果日志出现 `Cannot perform IP discovery - socket closed`，说明 Discord voice UDP 被阻断。在 Linux Docker Compose 中使用：
+```yaml
+services:
+  verbalcoding:
+    network_mode: "host"
+```
+不要同时使用 `network_mode: "host"` 和 `ports:`。
+## 贡献
+```bash
+node --check app-node/main.mjs
+npm test
+bash -n run.sh scripts/install.sh scripts/bootstrap_prereqs.sh
+npm pack --dry-run
+vc doctor
+```
+## 状态
+VerbalCoding 面向公开发布，但仍处于早期阶段。演示视频/GIF、更广泛的 Linux 验证、CI 和安全审查仍是 TODO。

package/app-node/agent_adapters.mjs CHANGED Viewed

@@ -23,12 +23,14 @@ export function voiceBridgePrompt(text, options = {}) {
   const english = /^en/i.test(String(options.language || ''));
   const lines = english ? [
     'This is a user utterance from a Discord voice call.',
+    'Consider Discord voice-channel speech and text-channel messages as one shared conversation context when inferring intent.',
     'Answer in English. For simple conversation/status questions, do not use tools; answer directly in 1-3 sentences.',
     'Use tools only for real work requests such as file edits, command execution, log checks, or web/search tasks.',
     'If code changes are made, do not read diffs or full code aloud; summarize outcome and next checks briefly.',
     'Do not include CLI metadata or session_id in the answer.',
   ] : [
     'Discord 음성 대화로 들어온 사용자 발화다.',
+    '의도를 판단할 때 음성 채널 발화와 텍스트 채널 메시지를 같은 대화 맥락으로 함께 고려해라.',
     '단순 대화/상태 질문이면 도구를 쓰지 말고 1~3문장으로 바로 한국어 답변해라.',
     '파일 수정, 실행, 로그 확인, 검색 같은 실제 작업 지시일 때만 필요한 도구를 사용해라.',
     '코드 변경을 수행했다면 음성 답변에는 diff나 코드 전문을 읽지 말고, 작업 결과와 다음 확인 사항만 짧게 말해라.',
@@ -57,6 +59,10 @@ export function voiceBridgePrompt(text, options = {}) {
     lines.push(english ? 'Route this turn through the following project/session context:' : '이 턴은 아래 프로젝트/세션 컨텍스트로 처리해라.');
     lines.push(String(options.projectContext).trim());
   }
+  if (options.recentDiscordContext) {
+    lines.push(english ? 'Recent Discord text-channel context to consider with this voice utterance:' : '이 음성 발화와 함께 고려할 최근 Discord 텍스트 채널 맥락:');
+    lines.push(String(options.recentDiscordContext).trim());
+  }
   return lines.concat(['', text]).join('\n');
 }
@@ -251,6 +257,24 @@ export function buildAgentSettings({ ROOT, env = process.env } = {}) {
       sessionFile: env.AGENT_SESSION_FILE || path.join(root, '.agent-sessions', 'openclaw'),
       supportsHermesSession: false,
     },
+    aider: {
+      label: 'Aider',
+      command: env.AIDER_COMMAND || 'aider --no-pretty --yes-always --message',
+      sessionFile: env.AGENT_SESSION_FILE || path.join(root, '.agent-sessions', 'aider'),
+      supportsHermesSession: false,
+    },
+    cursor: {
+      label: 'Cursor CLI',
+      command: env.CURSOR_COMMAND || 'cursor-agent --print --prompt',
+      sessionFile: env.AGENT_SESSION_FILE || path.join(root, '.agent-sessions', 'cursor'),
+      supportsHermesSession: false,
+    },
+    'cursor-cli': {
+      label: 'Cursor CLI',
+      command: env.CURSOR_COMMAND || 'cursor-agent --print --prompt',
+      sessionFile: env.AGENT_SESSION_FILE || path.join(root, '.agent-sessions', 'cursor'),
+      supportsHermesSession: false,
+    },
     custom: {
       label: env.AGENT_LABEL || 'Custom Agent',
       command: env.AGENT_COMMAND || '',
@@ -294,6 +318,7 @@ export function createAgentAdapter(settings, deps = {}) {
   const hermesSessionsDir = deps.hermesSessionsDir || path.join(os.homedir(), '.hermes', 'sessions');
   const spawnProcess = deps.spawn;
   const onProgress = deps.onProgress || (() => {});
+  const onStdoutChunk = deps.onStdoutChunk || null;
   const emittedProgress = new Set();
   let activeProgressLanguage = settings.language;
   const capabilities = agentAdapterCapabilities(settings);
@@ -308,7 +333,7 @@ export function createAgentAdapter(settings, deps = {}) {
   }
   function execWithOptionalProgress(cmd, args, options, verbose) {
-    if (!verbose || !spawnProcess) return execFileAsync(cmd, args, options);
+    if ((!verbose && !onStdoutChunk) || !spawnProcess) return execFileAsync(cmd, args, options);
     return new Promise((resolve, reject) => {
       const child = spawnProcess(cmd, args, {
         env: options.env,
@@ -353,7 +378,8 @@ export function createAgentAdapter(settings, deps = {}) {
       child.stdout?.on('data', chunk => {
         const s = chunk.toString();
         stdout += s;
-        emitVerboseProgress(s);
+        if (onStdoutChunk) { try { onStdoutChunk(s); } catch (e) { warn('onStdoutChunk failed', e?.stack || e); } }
+        if (verbose) emitVerboseProgress(s);
         if (stdout.length + stderr.length > options.maxBuffer) {
           const err = new Error('maxBuffer exceeded');
           err.code = 'ERR_CHILD_PROCESS_STDIO_MAXBUFFER';
@@ -364,7 +390,7 @@ export function createAgentAdapter(settings, deps = {}) {
       child.stderr?.on('data', chunk => {
         const s = chunk.toString();
         stderr += s;
-        emitVerboseProgress(s);
+        if (verbose) emitVerboseProgress(s);
         if (stdout.length + stderr.length > options.maxBuffer) {
           const err = new Error('maxBuffer exceeded');
           err.code = 'ERR_CHILD_PROCESS_STDIO_MAXBUFFER';
@@ -467,7 +493,12 @@ export function createAgentAdapter(settings, deps = {}) {
   function buildArgs(text, options = {}) {
     const argv = shellSplit(settings.command);
     const cmd = argv[0];
-    const query = voiceBridgePrompt(text, { verboseProgress: options.verboseProgress, language: options.language, projectContext: options.projectContext });
+    const query = voiceBridgePrompt(text, {
+      verboseProgress: options.verboseProgress,
+      language: options.language,
+      projectContext: options.projectContext,
+      recentDiscordContext: options.recentDiscordContext,
+    });
     let args = argv.slice(1);
     if (settings.backend === 'hermes' && options.verboseProgress) {
       // Hermes quiet mode intentionally suppresses tool previews.  In verbose
@@ -491,8 +522,9 @@ export function createAgentAdapter(settings, deps = {}) {
     const language = plan.language || settings.language;
     activeProgressLanguage = language;
     const projectContext = plan.projectContext || settings.projectContext || '';
+    const recentDiscordContext = plan.recentDiscordContext || '';
     emittedProgress.clear();
-    const { cmd, args, sessionId } = buildArgs(text, { verboseProgress, language, projectContext });
+    const { cmd, args, sessionId } = buildArgs(text, { verboseProgress, language, projectContext, recentDiscordContext });
     const start = Date.now();
     const label = plan.label || settings.label;
     const { args: finalArgs, outputPath } = addCodexOutputCapture(args);