verbalcoding 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +83 -0
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/app-node/agent_adapters.mjs +576 -0
- package/app-node/agent_adapters.test.mjs +455 -0
- package/app-node/agent_contract.mjs +45 -0
- package/app-node/barge_in.mjs +148 -0
- package/app-node/barge_in.test.mjs +179 -0
- package/app-node/bridge_logger.mjs +66 -0
- package/app-node/bridge_logger.test.mjs +73 -0
- package/app-node/bridge_state.mjs +104 -0
- package/app-node/bridge_state.test.mjs +64 -0
- package/app-node/cli_install.test.mjs +97 -0
- package/app-node/deferred_queue.mjs +12 -0
- package/app-node/deferred_queue.test.mjs +20 -0
- package/app-node/discord_invite_cli.test.mjs +31 -0
- package/app-node/discord_text.mjs +29 -0
- package/app-node/discord_text.test.mjs +32 -0
- package/app-node/hermes_profiles.mjs +164 -0
- package/app-node/hermes_profiles.test.mjs +276 -0
- package/app-node/install_config.mjs +263 -0
- package/app-node/install_config.test.mjs +205 -0
- package/app-node/instance_doctor.mjs +137 -0
- package/app-node/instance_doctor.test.mjs +128 -0
- package/app-node/instance_profile_lifecycle.mjs +16 -0
- package/app-node/instances.mjs +153 -0
- package/app-node/instances.test.mjs +102 -0
- package/app-node/language_config.mjs +73 -0
- package/app-node/language_config.test.mjs +51 -0
- package/app-node/latency_metrics.mjs +133 -0
- package/app-node/latency_metrics.test.mjs +71 -0
- package/app-node/main.mjs +1771 -0
- package/app-node/mcp_tools.mjs +198 -0
- package/app-node/mcp_tools.test.mjs +39 -0
- package/app-node/progress_cache.mjs +7 -0
- package/app-node/progress_cache.test.mjs +23 -0
- package/app-node/progress_speech.mjs +102 -0
- package/app-node/progress_speech.test.mjs +48 -0
- package/app-node/project_sessions.mjs +148 -0
- package/app-node/project_sessions.test.mjs +77 -0
- package/app-node/restart_notice.mjs +57 -0
- package/app-node/restart_notice.test.mjs +37 -0
- package/app-node/restart_policy.mjs +27 -0
- package/app-node/restart_policy.test.mjs +33 -0
- package/app-node/text_routing.mjs +8 -0
- package/app-node/text_routing.test.mjs +18 -0
- package/app-node/tts_backends.mjs +251 -0
- package/app-node/tts_backends.test.mjs +400 -0
- package/app-node/tts_chunks.mjs +57 -0
- package/app-node/tts_chunks.test.mjs +35 -0
- package/app-node/tts_prefetch.mjs +38 -0
- package/app-node/tts_prefetch.test.mjs +49 -0
- package/app-node/tts_settings.mjs +72 -0
- package/app-node/tts_settings.test.mjs +127 -0
- package/app-node/tts_voice_config.mjs +127 -0
- package/app-node/tts_voice_config.test.mjs +64 -0
- package/app-node/voice_clone_capture.mjs +76 -0
- package/app-node/voice_clone_capture.test.mjs +51 -0
- package/app-node/voice_messages.mjs +62 -0
- package/app-node/voice_messages.test.mjs +33 -0
- package/docs/CONFIGURATION.md +183 -0
- package/docs/FRESH_INSTALL.md +193 -0
- package/docs/MULTI_INSTANCE.md +183 -0
- package/docs/RELEASE.md +72 -0
- package/docs/USAGE.md +108 -0
- package/docs/assets/figures/verbalcoding-flow.svg +63 -0
- package/docs/i18n/README.es.md +121 -0
- package/docs/i18n/README.fr.md +121 -0
- package/docs/i18n/README.ja.md +121 -0
- package/docs/i18n/README.ko.md +121 -0
- package/docs/i18n/README.ru.md +121 -0
- package/docs/i18n/README.zh.md +121 -0
- package/package.json +58 -0
- package/run.sh +82 -0
- package/scripts/bootstrap_prereqs.sh +193 -0
- package/scripts/cli.mjs +369 -0
- package/scripts/docker_ubuntu_smoke.sh +76 -0
- package/scripts/doctor.mjs +134 -0
- package/scripts/install.mjs +108 -0
- package/scripts/install.sh +44 -0
- package/scripts/mcp-server.mjs +84 -0
- package/scripts/openvoice_smoke.py +34 -0
- package/scripts/openvoice_synth.py +103 -0
- package/scripts/setup_openvoice.sh +34 -0
- package/scripts/setup_supertonic.sh +18 -0
package/.env.example
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Copy to .env and fill local values. Do not commit .env.
|
|
2
|
+
|
|
3
|
+
DISCORD_BOT_TOKEN=""
|
|
4
|
+
DISCORD_ALLOWED_USERS=""
|
|
5
|
+
AUTO_JOIN_VOICE_CHANNELS="일반,General,general"
|
|
6
|
+
TRANSCRIPT_CHANNEL_ID=""
|
|
7
|
+
|
|
8
|
+
# Agent harness: hermes, claude-code, claude, codex, gemini, opencode, openclaw, custom
|
|
9
|
+
AGENT_BACKEND="hermes"
|
|
10
|
+
# AGENT_LABEL="My Harness"
|
|
11
|
+
# AGENT_COMMAND="my-harness run --non-interactive"
|
|
12
|
+
AGENT_TASK_TIMEOUT_MS="0"
|
|
13
|
+
AGENT_CHAT_TIMEOUT_MS="45000"
|
|
14
|
+
AGENT_VERBOSE_PROGRESS="0" # default off; toggle in Discord with !verbose on/off
|
|
15
|
+
LATENCY_LOG_PATH="./.logs/latency.jsonl"
|
|
16
|
+
PROJECT_SESSIONS_FILE="./config/project-sessions.json"
|
|
17
|
+
# Agent workflow helper: off by default. Toggle with `vc restart auto on|off`.
|
|
18
|
+
VERBALCODING_AUTO_RESTART_VOICE_BOT="0"
|
|
19
|
+
|
|
20
|
+
STT_ENGINE="whisper_cpp"
|
|
21
|
+
WHISPER_CPP_BIN="whisper-cli"
|
|
22
|
+
WHISPER_CPP_MODEL="./models/ggml-small-q5_1.bin"
|
|
23
|
+
VOICE_LANGUAGE="ko" # ko | en | auto; controls progress/status language
|
|
24
|
+
WHISPER_CPP_LANGUAGE="ko" # ko | en | auto; auto omits forced whisper language
|
|
25
|
+
STT_LANGUAGE="ko"
|
|
26
|
+
|
|
27
|
+
TTS_BACKEND="edge" # edge | openvoice | speechswift | supertonic
|
|
28
|
+
EDGE_TTS_COMMAND="edge-tts"
|
|
29
|
+
TTS_VOICE="ko-KR-SunHiNeural"
|
|
30
|
+
TTS_RATE="+10%"
|
|
31
|
+
TTS_MAX_CHARS="495"
|
|
32
|
+
TTS_VOLUME="1.0" # Discord playback gain; 1.0 = unchanged, 1.6 = louder
|
|
33
|
+
|
|
34
|
+
# Optional local Supertonic TTS backend. Install with: python3 -m pip install supertonic
|
|
35
|
+
# First run downloads the model to SUPERTONIC_CACHE_DIR or ~/.cache/supertonic.
|
|
36
|
+
SUPERTONIC_COMMAND="supertonic"
|
|
37
|
+
SUPERTONIC_VOICE="M1" # M1-M5, F1-F5
|
|
38
|
+
SUPERTONIC_LANGUAGE="ko" # en | ko | es | pt | fr
|
|
39
|
+
SUPERTONIC_STEPS="2" # 2 is fastest; higher is better but slower
|
|
40
|
+
SUPERTONIC_SPEED="1.0"
|
|
41
|
+
SUPERTONIC_MAX_CHUNK_LENGTH="300"
|
|
42
|
+
SUPERTONIC_SILENCE_DURATION="0.15"
|
|
43
|
+
SUPERTONIC_PROGRESS="0" # keep progress prompts fast via Edge unless set to 1
|
|
44
|
+
# SUPERTONIC_CACHE_DIR="./.cache/supertonic"
|
|
45
|
+
# SUPERTONIC_INTRA_OP_THREADS="4"
|
|
46
|
+
# SUPERTONIC_INTER_OP_THREADS="1"
|
|
47
|
+
|
|
48
|
+
# Optional local speech-swift / CosyVoice backend.
|
|
49
|
+
# Server mode keeps audio-server warm and avoids launching the audio CLI per TTS request.
|
|
50
|
+
SPEECHSWIFT_MODE="server" # cli | server
|
|
51
|
+
SPEECHSWIFT_ENGINE="cosyvoice"
|
|
52
|
+
SPEECHSWIFT_LANGUAGE="korean"
|
|
53
|
+
SPEECHSWIFT_REF_AUDIO="./voice-samples/user-reference.wav"
|
|
54
|
+
SPEECHSWIFT_SERVER_HOST="127.0.0.1"
|
|
55
|
+
SPEECHSWIFT_SERVER_PORT="18080"
|
|
56
|
+
SPEECHSWIFT_SERVER_URL="http://127.0.0.1:18080"
|
|
57
|
+
SPEECHSWIFT_PROGRESS="0" # keep progress prompts fast via Edge unless set to 1
|
|
58
|
+
|
|
59
|
+
# Optional local voice cloning with myshell-ai/OpenVoice V2.
|
|
60
|
+
# Use only a reference sample you own or have permission to clone.
|
|
61
|
+
OPENVOICE_DIR="./vendor/OpenVoice"
|
|
62
|
+
OPENVOICE_VENV="./.venv-openvoice"
|
|
63
|
+
OPENVOICE_REF_AUDIO="./voice-samples/user-reference.wav"
|
|
64
|
+
OPENVOICE_LANGUAGE="KR"
|
|
65
|
+
OPENVOICE_STYLE="default"
|
|
66
|
+
OPENVOICE_TIMEOUT_MS="90000"
|
|
67
|
+
OPENVOICE_PROGRESS="0" # keep progress prompts fast via Edge unless set to 1
|
|
68
|
+
REQUIRE_WAKE_WORD="0"
|
|
69
|
+
MIN_UTTERANCE_SECONDS="1.4"
|
|
70
|
+
UTTERANCE_IDLE_MS="2000"
|
|
71
|
+
MIN_MEAN_VOLUME_DB="-35"
|
|
72
|
+
MIN_MAX_VOLUME_DB="-12"
|
|
73
|
+
BARGE_IN_MIN_SECONDS="1.4"
|
|
74
|
+
BARGE_IN_MIN_MEAN_VOLUME_DB="-30"
|
|
75
|
+
BARGE_IN_MIN_MAX_VOLUME_DB="-14"
|
|
76
|
+
PLAYBACK_BARGE_IN_MIN_SECONDS="0.9"
|
|
77
|
+
PLAYBACK_BARGE_IN_MIN_MEAN_VOLUME_DB="-36"
|
|
78
|
+
PLAYBACK_BARGE_IN_MIN_MAX_VOLUME_DB="-18"
|
|
79
|
+
PLAYBACK_BARGE_IN_REQUIRE_BOTH="1"
|
|
80
|
+
BARGE_IN_CONSERVATIVE_MIN_SECONDS="1.8"
|
|
81
|
+
BARGE_IN_CONSERVATIVE_MIN_MEAN_VOLUME_DB="-27"
|
|
82
|
+
BARGE_IN_CONSERVATIVE_MIN_MAX_VOLUME_DB="-12"
|
|
83
|
+
MAX_DEFERRED_PROCESSING_UTTERANCES="3"
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 VerbalCoding contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# VerbalCoding
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<strong>Talk to your CLI coding agents through Discord voice — like a phone call for software work.</strong>
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<a href="docs/i18n/README.ko.md">한국어</a> ·
|
|
9
|
+
<a href="docs/i18n/README.ja.md">日本語</a> ·
|
|
10
|
+
<a href="docs/i18n/README.zh.md">中文</a> ·
|
|
11
|
+
<a href="docs/i18n/README.es.md">Español</a> ·
|
|
12
|
+
<a href="docs/i18n/README.fr.md">Français</a> ·
|
|
13
|
+
<a href="docs/i18n/README.ru.md">Русский</a>
|
|
14
|
+
</p>
|
|
15
|
+
|
|
16
|
+
<p align="center">
|
|
17
|
+
<img alt="Node.js" src="https://img.shields.io/badge/Node.js-20%2B-339933?logo=node.js&logoColor=white">
|
|
18
|
+
<img alt="Discord" src="https://img.shields.io/badge/Discord-voice%20bridge-5865F2?logo=discord&logoColor=white">
|
|
19
|
+
<img alt="STT" src="https://img.shields.io/badge/STT-whisper.cpp-7C3AED">
|
|
20
|
+
<img alt="TTS" src="https://img.shields.io/badge/TTS-Edge%20%7C%20OpenVoice%20%7C%20Supertonic%20%7C%20SpeechSwift-0EA5E9">
|
|
21
|
+
<img alt="Agents" src="https://img.shields.io/badge/Agents-Hermes%20%7C%20Claude%20%7C%20Codex%20%7C%20Gemini%20%7C%20OpenCode-111827">
|
|
22
|
+
</p>
|
|
23
|
+
|
|
24
|
+
<p align="center">
|
|
25
|
+
<img src="docs/assets/figures/verbalcoding-flow.svg" alt="VerbalCoding voice-to-agent flow" width="860">
|
|
26
|
+
</p>
|
|
27
|
+
|
|
28
|
+
## Why
|
|
29
|
+
|
|
30
|
+
VerbalCoding turns a Discord voice channel into a hands-free control surface for coding agents. Speak a request, let your CLI agent work, and hear a concise answer back — with text transcripts, progress events, and guardrails for noisy code/log output.
|
|
31
|
+
|
|
32
|
+
## Highlights
|
|
33
|
+
|
|
34
|
+
| What you get | Why it feels good |
|
|
35
|
+
|---|---|
|
|
36
|
+
| Voice-first agent control | Talk to Hermes Agent, Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw, or any custom CLI harness. |
|
|
37
|
+
| Local-first speech loop | Discord voice capture → `whisper.cpp` STT → agent → chunked TTS playback. |
|
|
38
|
+
| Shared voice + text context | Voice turns and `!ask` text commands can reuse the same supported agent session. |
|
|
39
|
+
| Barge-in and sensitivity modes | Interrupt playback naturally and switch between normal and conservative/noisy environments. |
|
|
40
|
+
| Multilingual voice presets | Switch STT, progress language, and TTS voice together with `vc language ko/en/auto`. |
|
|
41
|
+
| Multi-room project isolation | Run one bot per project room with isolated Hermes profiles, sessions, memory, and logs. |
|
|
42
|
+
|
|
43
|
+
## Quick Start
|
|
44
|
+
|
|
45
|
+
Fastest path with npm:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
npm install -g verbalcoding
|
|
49
|
+
vc setup --yes
|
|
50
|
+
vc doctor
|
|
51
|
+
vc start
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Or run directly without a permanent global install:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
npx verbalcoding setup --yes
|
|
58
|
+
vc doctor
|
|
59
|
+
vc start
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
GitHub clone path for contributors:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
git clone https://github.com/ca1773130n/VerbalCoding.git
|
|
66
|
+
cd VerbalCoding
|
|
67
|
+
./scripts/install.sh --yes
|
|
68
|
+
vc doctor
|
|
69
|
+
./run.sh
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
`vc setup --yes` and `./scripts/install.sh --yes` bootstrap local prerequisites where possible: Node/npm dependencies, `ffmpeg`, `whisper-cli`, the default whisper.cpp model, a local `.venv-tts` Edge TTS helper, and the short `vc` shell command for clone installs. They support macOS/Homebrew plus common Linux package managers (`apt`, `dnf`, `pacman`); rerun with `--no-wizard` for dependency-only setup or `--skip-system` if you want to install OS packages yourself.
|
|
73
|
+
|
|
74
|
+
Need a clean install walkthrough? Start with [Fresh Install](docs/FRESH_INSTALL.md).
|
|
75
|
+
|
|
76
|
+
## How It Works
|
|
77
|
+
|
|
78
|
+
```mermaid
|
|
79
|
+
flowchart LR
|
|
80
|
+
A[Discord voice] --> B["@discordjs/voice"]
|
|
81
|
+
B --> C[PCM cleanup + gates]
|
|
82
|
+
C --> D["whisper.cpp STT"]
|
|
83
|
+
D --> E["CLI agent adapter"]
|
|
84
|
+
E --> F["Concise answer"]
|
|
85
|
+
F --> G["Chunked TTS"]
|
|
86
|
+
G --> H["Discord playback"]
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Supported Agent Backends
|
|
90
|
+
|
|
91
|
+
| Backend | Default command | Session support |
|
|
92
|
+
|---|---:|---|
|
|
93
|
+
| Hermes Agent | `hermes chat -Q -q` | Resume, verbose progress, cancellation, final-answer recovery |
|
|
94
|
+
| Claude Code | `claude -p` | CLI session file support through adapter defaults |
|
|
95
|
+
| Codex CLI | `codex exec` | CLI session file support through adapter defaults |
|
|
96
|
+
| Gemini CLI | `gemini -p` | CLI session file support through adapter defaults |
|
|
97
|
+
| OpenCode | `opencode run` | CLI session file support through adapter defaults |
|
|
98
|
+
| OpenClaw | `openclaw run` | CLI session file support through adapter defaults |
|
|
99
|
+
| Custom | `AGENT_COMMAND` | Bring your own non-interactive command |
|
|
100
|
+
|
|
101
|
+
## Learn More
|
|
102
|
+
|
|
103
|
+
| Guide | What you get |
|
|
104
|
+
|---|---|
|
|
105
|
+
| [Fresh Install](docs/FRESH_INSTALL.md) | Clean clone setup, model download, first run |
|
|
106
|
+
| [Usage Guide](docs/USAGE.md) | CLI commands, Discord commands, progress mode, latency metrics |
|
|
107
|
+
| [Configuration](docs/CONFIGURATION.md) | `.env`, agent backends, MCP, TTS backends, operational notes |
|
|
108
|
+
| [Multi-Instance](docs/MULTI_INSTANCE.md) | One permanent Discord voice room per project |
|
|
109
|
+
| [Release Notes](docs/RELEASE.md) | Current capabilities and pre-release checklist |
|
|
110
|
+
|
|
111
|
+
## Tiny Command Map
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
vc status # current language, TTS, and bridge settings
|
|
115
|
+
vc language ko|en|auto # switch STT/progress/TTS language preset
|
|
116
|
+
vc bot invite CLIENT_ID # generate the Discord bot invite URL
|
|
117
|
+
vc instance setup NAME # create an isolated project voice bot
|
|
118
|
+
vc instance start NAME # run that bot in the background
|
|
119
|
+
vc doctor # redacted health check
|
|
120
|
+
vc start # start the default bridge
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
In Discord:
|
|
124
|
+
|
|
125
|
+
```text
|
|
126
|
+
!join !ask <prompt> !verbose on/off
|
|
127
|
+
!latency !sensitivity normal !sensitivity conservative
|
|
128
|
+
!session new <name> <workdir> [context] --voice <voice-channel>
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Requirements
|
|
132
|
+
|
|
133
|
+
| Layer | Default |
|
|
134
|
+
|---|---|
|
|
135
|
+
| Runtime | Node.js 20+, npm; install script can install via Homebrew/apt/dnf/pacman |
|
|
136
|
+
| Audio | `ffmpeg`; install script can install it |
|
|
137
|
+
| STT | `whisper.cpp` / `whisper-cli`; install script uses Homebrew on macOS or local Linux build fallback |
|
|
138
|
+
| TTS | Edge TTS CLI; install script creates `.venv-tts` if needed |
|
|
139
|
+
| Discord | Bot token, Message Content intent, voice permissions |
|
|
140
|
+
| Agent | At least one authenticated CLI harness, Hermes Agent by default |
|
|
141
|
+
| Platform focus | macOS / Apple Silicon most tested; Linux bootstrap is best-effort and documented |
|
|
142
|
+
|
|
143
|
+
## Contributing
|
|
144
|
+
|
|
145
|
+
Run the lightweight checks before sending changes:
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
node --check app-node/main.mjs
|
|
149
|
+
npm test
|
|
150
|
+
bash -n run.sh scripts/install.sh
|
|
151
|
+
npm pack --dry-run
|
|
152
|
+
vc doctor
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Status
|
|
156
|
+
|
|
157
|
+
VerbalCoding is public-release oriented but still early. Demo video/GIF, broader Linux notes, and a formal license file are still TODOs.
|