verbalcoding 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +83 -0
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/app-node/agent_adapters.mjs +576 -0
- package/app-node/agent_adapters.test.mjs +455 -0
- package/app-node/agent_contract.mjs +45 -0
- package/app-node/barge_in.mjs +148 -0
- package/app-node/barge_in.test.mjs +179 -0
- package/app-node/bridge_logger.mjs +66 -0
- package/app-node/bridge_logger.test.mjs +73 -0
- package/app-node/bridge_state.mjs +104 -0
- package/app-node/bridge_state.test.mjs +64 -0
- package/app-node/cli_install.test.mjs +97 -0
- package/app-node/deferred_queue.mjs +12 -0
- package/app-node/deferred_queue.test.mjs +20 -0
- package/app-node/discord_invite_cli.test.mjs +31 -0
- package/app-node/discord_text.mjs +29 -0
- package/app-node/discord_text.test.mjs +32 -0
- package/app-node/hermes_profiles.mjs +164 -0
- package/app-node/hermes_profiles.test.mjs +276 -0
- package/app-node/install_config.mjs +263 -0
- package/app-node/install_config.test.mjs +205 -0
- package/app-node/instance_doctor.mjs +137 -0
- package/app-node/instance_doctor.test.mjs +128 -0
- package/app-node/instance_profile_lifecycle.mjs +16 -0
- package/app-node/instances.mjs +153 -0
- package/app-node/instances.test.mjs +102 -0
- package/app-node/language_config.mjs +73 -0
- package/app-node/language_config.test.mjs +51 -0
- package/app-node/latency_metrics.mjs +133 -0
- package/app-node/latency_metrics.test.mjs +71 -0
- package/app-node/main.mjs +1771 -0
- package/app-node/mcp_tools.mjs +198 -0
- package/app-node/mcp_tools.test.mjs +39 -0
- package/app-node/progress_cache.mjs +7 -0
- package/app-node/progress_cache.test.mjs +23 -0
- package/app-node/progress_speech.mjs +102 -0
- package/app-node/progress_speech.test.mjs +48 -0
- package/app-node/project_sessions.mjs +148 -0
- package/app-node/project_sessions.test.mjs +77 -0
- package/app-node/restart_notice.mjs +57 -0
- package/app-node/restart_notice.test.mjs +37 -0
- package/app-node/restart_policy.mjs +27 -0
- package/app-node/restart_policy.test.mjs +33 -0
- package/app-node/text_routing.mjs +8 -0
- package/app-node/text_routing.test.mjs +18 -0
- package/app-node/tts_backends.mjs +251 -0
- package/app-node/tts_backends.test.mjs +400 -0
- package/app-node/tts_chunks.mjs +57 -0
- package/app-node/tts_chunks.test.mjs +35 -0
- package/app-node/tts_prefetch.mjs +38 -0
- package/app-node/tts_prefetch.test.mjs +49 -0
- package/app-node/tts_settings.mjs +72 -0
- package/app-node/tts_settings.test.mjs +127 -0
- package/app-node/tts_voice_config.mjs +127 -0
- package/app-node/tts_voice_config.test.mjs +64 -0
- package/app-node/voice_clone_capture.mjs +76 -0
- package/app-node/voice_clone_capture.test.mjs +51 -0
- package/app-node/voice_messages.mjs +62 -0
- package/app-node/voice_messages.test.mjs +33 -0
- package/docs/CONFIGURATION.md +183 -0
- package/docs/FRESH_INSTALL.md +193 -0
- package/docs/MULTI_INSTANCE.md +183 -0
- package/docs/RELEASE.md +72 -0
- package/docs/USAGE.md +108 -0
- package/docs/assets/figures/verbalcoding-flow.svg +63 -0
- package/docs/i18n/README.es.md +121 -0
- package/docs/i18n/README.fr.md +121 -0
- package/docs/i18n/README.ja.md +121 -0
- package/docs/i18n/README.ko.md +121 -0
- package/docs/i18n/README.ru.md +121 -0
- package/docs/i18n/README.zh.md +121 -0
- package/package.json +58 -0
- package/run.sh +82 -0
- package/scripts/bootstrap_prereqs.sh +193 -0
- package/scripts/cli.mjs +369 -0
- package/scripts/docker_ubuntu_smoke.sh +76 -0
- package/scripts/doctor.mjs +134 -0
- package/scripts/install.mjs +108 -0
- package/scripts/install.sh +44 -0
- package/scripts/mcp-server.mjs +84 -0
- package/scripts/openvoice_smoke.py +34 -0
- package/scripts/openvoice_synth.py +103 -0
- package/scripts/setup_openvoice.sh +34 -0
- package/scripts/setup_supertonic.sh +18 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# VerbalCoding
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<strong>Общайтесь с CLI-агентами для программирования голосом в Discord — почти как по телефону.</strong>
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<a href="../../README.md">English</a> ·
|
|
9
|
+
<a href="README.ko.md">한국어</a> ·
|
|
10
|
+
<a href="README.ja.md">日本語</a> ·
|
|
11
|
+
<a href="README.zh.md">中文</a> ·
|
|
12
|
+
<a href="README.es.md">Español</a> ·
|
|
13
|
+
<a href="README.fr.md">Français</a> ·
|
|
14
|
+
<a href="README.ru.md">Русский</a>
|
|
15
|
+
</p>
|
|
16
|
+
|
|
17
|
+
<p align="center">
|
|
18
|
+
<img alt="Node.js" src="https://img.shields.io/badge/Node.js-20%2B-339933?logo=node.js&logoColor=white">
|
|
19
|
+
<img alt="Discord" src="https://img.shields.io/badge/Discord-voice%20bridge-5865F2?logo=discord&logoColor=white">
|
|
20
|
+
<img alt="STT" src="https://img.shields.io/badge/STT-whisper.cpp-7C3AED">
|
|
21
|
+
<img alt="TTS" src="https://img.shields.io/badge/TTS-Edge%20%7C%20OpenVoice%20%7C%20Supertonic%20%7C%20SpeechSwift-0EA5E9">
|
|
22
|
+
</p>
|
|
23
|
+
|
|
24
|
+
<p align="center">
|
|
25
|
+
<img src="../assets/figures/verbalcoding-flow.svg" alt="VerbalCoding voice-to-agent flow" width="860">
|
|
26
|
+
</p>
|
|
27
|
+
|
|
28
|
+
## Why
|
|
29
|
+
|
|
30
|
+
VerbalCoding превращает голосовой канал Discord в hands-free панель управления агентами для разработки. Скажите задачу, дайте CLI-агенту выполнить работу и получите краткий голосовой ответ — с текстовыми транскриптами, событиями прогресса и защитой от зачитывания длинного кода или логов.
|
|
31
|
+
|
|
32
|
+
## Возможности
|
|
33
|
+
|
|
34
|
+
| Что есть | Почему это удобно |
|
|
35
|
+
|---|---|
|
|
36
|
+
| Голосовое управление прежде всего | Управляйте Hermes Agent, Claude Code, Codex, Gemini CLI, OpenCode, OpenClaw или своим CLI голосом. |
|
|
37
|
+
| Локальный voice loop | Голос Discord → STT `whisper.cpp` → агент → фрагментированное TTS-воспроизведение. |
|
|
38
|
+
| Общий контекст голоса и текста | Голосовые реплики и `!ask` могут использовать одну и ту же поддерживаемую сессию агента. |
|
|
39
|
+
| Barge-in и режимы чувствительности | Естественно перебивайте воспроизведение и переключайте normal/conservative режимы. |
|
|
40
|
+
| Многоязычные voice presets | `vc language ko/en/auto` одновременно меняет STT, язык прогресса и TTS-голос. |
|
|
41
|
+
| Изоляция комнат по проектам | Отдельный bot, Hermes profile, сессия, память и логи для каждого проекта. |
|
|
42
|
+
|
|
43
|
+
## Быстрый старт
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
git clone git@github.com:ca1773130n/VerbalCoding.git
|
|
47
|
+
cd VerbalCoding
|
|
48
|
+
./scripts/install.sh
|
|
49
|
+
vc doctor
|
|
50
|
+
./run.sh
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Как это работает
|
|
54
|
+
|
|
55
|
+
```mermaid
|
|
56
|
+
flowchart LR
|
|
57
|
+
A[Discord voice] --> B["@discordjs/voice"]
|
|
58
|
+
B --> C[PCM cleanup + gates]
|
|
59
|
+
C --> D["whisper.cpp STT"]
|
|
60
|
+
D --> E["CLI agent adapter"]
|
|
61
|
+
E --> F["Concise answer"]
|
|
62
|
+
F --> G["Chunked TTS"]
|
|
63
|
+
G --> H["Discord playback"]
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Поддерживаемые agent-бэкенды
|
|
67
|
+
|
|
68
|
+
| Backend | Default command | Session support |
|
|
69
|
+
|---|---:|---|
|
|
70
|
+
| Hermes Agent | `hermes chat -Q -q` | Resume, verbose progress, cancellation, final-answer recovery |
|
|
71
|
+
| Claude Code | `claude -p` | CLI session file support through adapter defaults |
|
|
72
|
+
| Codex CLI | `codex exec` | CLI session file support through adapter defaults |
|
|
73
|
+
| Gemini CLI | `gemini -p` | CLI session file support through adapter defaults |
|
|
74
|
+
| OpenCode | `opencode run` | CLI session file support through adapter defaults |
|
|
75
|
+
| OpenClaw | `openclaw run` | CLI session file support through adapter defaults |
|
|
76
|
+
| Custom | `AGENT_COMMAND` | Bring your own non-interactive command |
|
|
77
|
+
|
|
78
|
+
## Подробнее
|
|
79
|
+
|
|
80
|
+
| Guide | What you get |
|
|
81
|
+
|---|---|
|
|
82
|
+
| [Fresh Install](../FRESH_INSTALL.md) | Чистая установка, загрузка модели, первый запуск |
|
|
83
|
+
| [Usage Guide](../USAGE.md) | CLI-команды, команды Discord, режим прогресса, метрики задержек |
|
|
84
|
+
| [Configuration](../CONFIGURATION.md) | .env, agent-бэкенды, MCP, TTS и эксплуатационные заметки |
|
|
85
|
+
| [Multi-Instance](../MULTI_INSTANCE.md) | Постоянная голосовая комната Discord для каждого проекта |
|
|
86
|
+
| [Release Notes](../RELEASE.md) | Текущие возможности и pre-release checklist |
|
|
87
|
+
|
|
88
|
+
## Карта команд
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
vc status
|
|
92
|
+
vc language ko|en|auto
|
|
93
|
+
vc bot invite CLIENT_ID
|
|
94
|
+
vc instance setup NAME
|
|
95
|
+
vc instance start NAME
|
|
96
|
+
vc doctor
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Требования
|
|
100
|
+
|
|
101
|
+
| Layer | Default |
|
|
102
|
+
|---|---|
|
|
103
|
+
| Runtime | Node.js 20+, npm |
|
|
104
|
+
| Audio | `ffmpeg` |
|
|
105
|
+
| STT | `whisper.cpp` / `whisper-cli` |
|
|
106
|
+
| Discord | Bot token, Message Content intent, voice permissions |
|
|
107
|
+
| Agent | At least one authenticated CLI harness, Hermes Agent by default |
|
|
108
|
+
| Platform focus | macOS / Apple Silicon currently gets the most testing |
|
|
109
|
+
|
|
110
|
+
## Участие
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
node --check app-node/main.mjs
|
|
114
|
+
npm test
|
|
115
|
+
bash -n run.sh scripts/install.sh
|
|
116
|
+
vc doctor
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Статус
|
|
120
|
+
|
|
121
|
+
VerbalCoding is public-release oriented but still early. Demo video/GIF, broader Linux notes, and a formal license file are still TODOs.
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# VerbalCoding
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<strong>通过 Discord 语音像打电话一样控制 CLI 编程 Agent。</strong>
|
|
5
|
+
</p>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<a href="../../README.md">English</a> ·
|
|
9
|
+
<a href="README.ko.md">한국어</a> ·
|
|
10
|
+
<a href="README.ja.md">日本語</a> ·
|
|
11
|
+
<a href="README.zh.md">中文</a> ·
|
|
12
|
+
<a href="README.es.md">Español</a> ·
|
|
13
|
+
<a href="README.fr.md">Français</a> ·
|
|
14
|
+
<a href="README.ru.md">Русский</a>
|
|
15
|
+
</p>
|
|
16
|
+
|
|
17
|
+
<p align="center">
|
|
18
|
+
<img alt="Node.js" src="https://img.shields.io/badge/Node.js-20%2B-339933?logo=node.js&logoColor=white">
|
|
19
|
+
<img alt="Discord" src="https://img.shields.io/badge/Discord-voice%20bridge-5865F2?logo=discord&logoColor=white">
|
|
20
|
+
<img alt="STT" src="https://img.shields.io/badge/STT-whisper.cpp-7C3AED">
|
|
21
|
+
<img alt="TTS" src="https://img.shields.io/badge/TTS-Edge%20%7C%20OpenVoice%20%7C%20Supertonic%20%7C%20SpeechSwift-0EA5E9">
|
|
22
|
+
</p>
|
|
23
|
+
|
|
24
|
+
<p align="center">
|
|
25
|
+
<img src="../assets/figures/verbalcoding-flow.svg" alt="VerbalCoding voice-to-agent flow" width="860">
|
|
26
|
+
</p>
|
|
27
|
+
|
|
28
|
+
## Why
|
|
29
|
+
|
|
30
|
+
VerbalCoding 把 Discord 语音频道变成面向编程 Agent 的免手动控制台。你可以直接说出需求,让 CLI Agent 工作,再听到简洁的语音回答;同时保留文字记录、进度事件,并避免把大段代码或日志读出来。
|
|
31
|
+
|
|
32
|
+
## 亮点
|
|
33
|
+
|
|
34
|
+
| 能力 | 价值 |
|
|
35
|
+
|---|---|
|
|
36
|
+
| 语音优先的 Agent 控制 | 用语音控制 Hermes Agent、Claude Code、Codex、Gemini CLI、OpenCode、OpenClaw 或自定义 CLI。 |
|
|
37
|
+
| 本地优先语音闭环 | Discord 语音捕获 → `whisper.cpp` STT → Agent → 分段 TTS 播放。 |
|
|
38
|
+
| 语音 + 文本共享上下文 | 在支持的 Agent 中,语音轮次和 `!ask` 文本命令可复用同一会话。 |
|
|
39
|
+
| 打断与灵敏度模式 | 可自然打断播放,并在普通/保守灵敏度之间切换。 |
|
|
40
|
+
| 多语言语音预设 | 用 `vc language ko/en/auto` 同步切换 STT、进度语言和 TTS 声音。 |
|
|
41
|
+
| 按项目隔离的多房间 | 每个项目房间使用独立 Bot、Hermes profile、会话、记忆和日志。 |
|
|
42
|
+
|
|
43
|
+
## 快速开始
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
git clone git@github.com:ca1773130n/VerbalCoding.git
|
|
47
|
+
cd VerbalCoding
|
|
48
|
+
./scripts/install.sh
|
|
49
|
+
vc doctor
|
|
50
|
+
./run.sh
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## 工作原理
|
|
54
|
+
|
|
55
|
+
```mermaid
|
|
56
|
+
flowchart LR
|
|
57
|
+
A[Discord voice] --> B["@discordjs/voice"]
|
|
58
|
+
B --> C[PCM cleanup + gates]
|
|
59
|
+
C --> D["whisper.cpp STT"]
|
|
60
|
+
D --> E["CLI agent adapter"]
|
|
61
|
+
E --> F["Concise answer"]
|
|
62
|
+
F --> G["Chunked TTS"]
|
|
63
|
+
G --> H["Discord playback"]
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## 支持的 Agent 后端
|
|
67
|
+
|
|
68
|
+
| Backend | Default command | Session support |
|
|
69
|
+
|---|---:|---|
|
|
70
|
+
| Hermes Agent | `hermes chat -Q -q` | Resume, verbose progress, cancellation, final-answer recovery |
|
|
71
|
+
| Claude Code | `claude -p` | CLI session file support through adapter defaults |
|
|
72
|
+
| Codex CLI | `codex exec` | CLI session file support through adapter defaults |
|
|
73
|
+
| Gemini CLI | `gemini -p` | CLI session file support through adapter defaults |
|
|
74
|
+
| OpenCode | `opencode run` | CLI session file support through adapter defaults |
|
|
75
|
+
| OpenClaw | `openclaw run` | CLI session file support through adapter defaults |
|
|
76
|
+
| Custom | `AGENT_COMMAND` | Bring your own non-interactive command |
|
|
77
|
+
|
|
78
|
+
## 了解更多
|
|
79
|
+
|
|
80
|
+
| Guide | What you get |
|
|
81
|
+
|---|---|
|
|
82
|
+
| [Fresh Install](../FRESH_INSTALL.md) | 干净克隆安装、模型下载、首次运行 |
|
|
83
|
+
| [Usage Guide](../USAGE.md) | CLI 命令、Discord 命令、进度模式、延迟指标 |
|
|
84
|
+
| [Configuration](../CONFIGURATION.md) | .env、Agent 后端、MCP、TTS 后端、运维说明 |
|
|
85
|
+
| [Multi-Instance](../MULTI_INSTANCE.md) | 每个项目一个常驻 Discord 语音房间 |
|
|
86
|
+
| [Release Notes](../RELEASE.md) | 当前能力与发布前检查清单 |
|
|
87
|
+
|
|
88
|
+
## 常用命令
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
vc status
|
|
92
|
+
vc language ko|en|auto
|
|
93
|
+
vc bot invite CLIENT_ID
|
|
94
|
+
vc instance setup NAME
|
|
95
|
+
vc instance start NAME
|
|
96
|
+
vc doctor
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## 要求
|
|
100
|
+
|
|
101
|
+
| Layer | Default |
|
|
102
|
+
|---|---|
|
|
103
|
+
| Runtime | Node.js 20+, npm |
|
|
104
|
+
| Audio | `ffmpeg` |
|
|
105
|
+
| STT | `whisper.cpp` / `whisper-cli` |
|
|
106
|
+
| Discord | Bot token, Message Content intent, voice permissions |
|
|
107
|
+
| Agent | At least one authenticated CLI harness, Hermes Agent by default |
|
|
108
|
+
| Platform focus | macOS / Apple Silicon currently gets the most testing |
|
|
109
|
+
|
|
110
|
+
## 贡献
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
node --check app-node/main.mjs
|
|
114
|
+
npm test
|
|
115
|
+
bash -n run.sh scripts/install.sh
|
|
116
|
+
vc doctor
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## 状态
|
|
120
|
+
|
|
121
|
+
VerbalCoding is public-release oriented but still early. Demo video/GIF, broader Linux notes, and a formal license file are still TODOs.
|
package/package.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "verbalcoding",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "Discord voice bridge for CLI coding agents.",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "git+https://github.com/ca1773130n/VerbalCoding.git"
|
|
9
|
+
},
|
|
10
|
+
"homepage": "https://github.com/ca1773130n/VerbalCoding#readme",
|
|
11
|
+
"bugs": {
|
|
12
|
+
"url": "https://github.com/ca1773130n/VerbalCoding/issues"
|
|
13
|
+
},
|
|
14
|
+
"keywords": [
|
|
15
|
+
"discord",
|
|
16
|
+
"voice",
|
|
17
|
+
"coding-agent",
|
|
18
|
+
"cli",
|
|
19
|
+
"speech-to-text",
|
|
20
|
+
"tts",
|
|
21
|
+
"whisper"
|
|
22
|
+
],
|
|
23
|
+
"type": "module",
|
|
24
|
+
"engines": {
|
|
25
|
+
"node": ">=20"
|
|
26
|
+
},
|
|
27
|
+
"bin": {
|
|
28
|
+
"vc": "scripts/cli.mjs",
|
|
29
|
+
"verbalcoding": "scripts/cli.mjs",
|
|
30
|
+
"verbalcoding-mcp": "scripts/mcp-server.mjs"
|
|
31
|
+
},
|
|
32
|
+
"files": [
|
|
33
|
+
"app-node/",
|
|
34
|
+
"docs/",
|
|
35
|
+
"scripts/*.mjs",
|
|
36
|
+
"scripts/*.sh",
|
|
37
|
+
"scripts/*.py",
|
|
38
|
+
"run.sh",
|
|
39
|
+
".env.example",
|
|
40
|
+
"README.md",
|
|
41
|
+
"LICENSE"
|
|
42
|
+
],
|
|
43
|
+
"scripts": {
|
|
44
|
+
"start": "node app-node/main.mjs",
|
|
45
|
+
"setup": "node scripts/install.mjs",
|
|
46
|
+
"doctor": "node scripts/doctor.mjs",
|
|
47
|
+
"vc": "node scripts/cli.mjs",
|
|
48
|
+
"mcp": "node scripts/mcp-server.mjs",
|
|
49
|
+
"test": "node --test app-node/*.test.mjs"
|
|
50
|
+
},
|
|
51
|
+
"dependencies": {
|
|
52
|
+
"@discordjs/opus": "^0.10.0",
|
|
53
|
+
"@discordjs/voice": "^0.19.2",
|
|
54
|
+
"discord.js": "^14.26.3",
|
|
55
|
+
"prism-media": "^1.3.5",
|
|
56
|
+
"wav": "^1.0.2"
|
|
57
|
+
}
|
|
58
|
+
}
|
package/run.sh
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
cd "$(dirname "$0")"
|
|
4
|
+
|
|
5
|
+
# Node @discordjs/voice receiver is the active implementation. The earlier Python
|
|
6
|
+
# discord-ext-voice-recv path produced corrupted PCM on this Mac/Discord setup.
|
|
7
|
+
mkdir -p /tmp/verbalcoding-node-debug
|
|
8
|
+
export NODE_AUDIO_DEBUG_DIR="${NODE_AUDIO_DEBUG_DIR:-/tmp/verbalcoding-node-debug}"
|
|
9
|
+
export MIN_UTTERANCE_SECONDS="${MIN_UTTERANCE_SECONDS:-1.0}"
|
|
10
|
+
export SUBSCRIBE_AFTER_SILENCE_MS="${SUBSCRIBE_AFTER_SILENCE_MS:-2200}"
|
|
11
|
+
export UTTERANCE_IDLE_MS="${UTTERANCE_IDLE_MS:-2600}"
|
|
12
|
+
export MIN_MEAN_VOLUME_DB="${MIN_MEAN_VOLUME_DB:--35}"
|
|
13
|
+
export MIN_MAX_VOLUME_DB="${MIN_MAX_VOLUME_DB:--18}"
|
|
14
|
+
export TTS_RATE="${TTS_RATE:-+10%}"
|
|
15
|
+
export TTS_MAX_CHARS="${TTS_MAX_CHARS:-495}"
|
|
16
|
+
export HERMES_TASK_TIMEOUT_MS="${HERMES_TASK_TIMEOUT_MS:-0}"
|
|
17
|
+
export HERMES_CHAT_TIMEOUT_MS="${HERMES_CHAT_TIMEOUT_MS:-45000}"
|
|
18
|
+
|
|
19
|
+
INSTANCE_ENV="${VERBALCODING_INSTANCE_ENV:-${1:-}}"
|
|
20
|
+
if [ -n "$INSTANCE_ENV" ] && [ ! -f "$INSTANCE_ENV" ]; then
|
|
21
|
+
echo "instance env file not found: $INSTANCE_ENV" >&2
|
|
22
|
+
exit 2
|
|
23
|
+
fi
|
|
24
|
+
|
|
25
|
+
# In instance mode, the launcher passes isolated runtime defaults in the
|
|
26
|
+
# process environment. Source the shared .env for non-secret/common defaults,
|
|
27
|
+
# but do not let it replace the instance launcher defaults or token.
|
|
28
|
+
LAUNCH_BRIDGE_LOG_PATH="${BRIDGE_LOG_PATH:-}"
|
|
29
|
+
LAUNCH_NODE_AUDIO_DEBUG_DIR="${NODE_AUDIO_DEBUG_DIR:-}"
|
|
30
|
+
LAUNCH_PROJECT_SESSIONS_FILE="${PROJECT_SESSIONS_FILE:-}"
|
|
31
|
+
LAUNCH_HERMES_SESSION_FILE="${HERMES_SESSION_FILE:-}"
|
|
32
|
+
LAUNCH_HERMES_HOME="${HERMES_HOME:-}"
|
|
33
|
+
LAUNCH_VERBALCODING_INSTANCE_NAME="${VERBALCODING_INSTANCE_NAME:-}"
|
|
34
|
+
LAUNCH_VERBALCODING_INSTANCE_ENV="${VERBALCODING_INSTANCE_ENV:-}"
|
|
35
|
+
if [ -f .env ]; then
|
|
36
|
+
set -a
|
|
37
|
+
# shellcheck disable=SC1091
|
|
38
|
+
source ./.env
|
|
39
|
+
set +a
|
|
40
|
+
fi
|
|
41
|
+
if [ -n "$INSTANCE_ENV" ]; then
|
|
42
|
+
[ -n "$LAUNCH_BRIDGE_LOG_PATH" ] && export BRIDGE_LOG_PATH="$LAUNCH_BRIDGE_LOG_PATH"
|
|
43
|
+
[ -n "$LAUNCH_NODE_AUDIO_DEBUG_DIR" ] && export NODE_AUDIO_DEBUG_DIR="$LAUNCH_NODE_AUDIO_DEBUG_DIR"
|
|
44
|
+
[ -n "$LAUNCH_PROJECT_SESSIONS_FILE" ] && export PROJECT_SESSIONS_FILE="$LAUNCH_PROJECT_SESSIONS_FILE"
|
|
45
|
+
[ -n "$LAUNCH_HERMES_SESSION_FILE" ] && export HERMES_SESSION_FILE="$LAUNCH_HERMES_SESSION_FILE"
|
|
46
|
+
[ -n "$LAUNCH_HERMES_HOME" ] && export HERMES_HOME="$LAUNCH_HERMES_HOME"
|
|
47
|
+
[ -n "$LAUNCH_VERBALCODING_INSTANCE_NAME" ] && export VERBALCODING_INSTANCE_NAME="$LAUNCH_VERBALCODING_INSTANCE_NAME"
|
|
48
|
+
[ -n "$LAUNCH_VERBALCODING_INSTANCE_ENV" ] && export VERBALCODING_INSTANCE_ENV="$LAUNCH_VERBALCODING_INSTANCE_ENV"
|
|
49
|
+
unset DISCORD_BOT_TOKEN DISCORD_TOKEN
|
|
50
|
+
set -a
|
|
51
|
+
# shellcheck disable=SC1090
|
|
52
|
+
source "$INSTANCE_ENV"
|
|
53
|
+
set +a
|
|
54
|
+
fi
|
|
55
|
+
|
|
56
|
+
mkdir -p "$NODE_AUDIO_DEBUG_DIR"
|
|
57
|
+
|
|
58
|
+
if [ "${TTS_BACKEND:-}" = "speechswift" ] && [ "${SPEECHSWIFT_MODE:-cli}" = "server" ]; then
|
|
59
|
+
export SPEECHSWIFT_SERVER_HOST="${SPEECHSWIFT_SERVER_HOST:-127.0.0.1}"
|
|
60
|
+
export SPEECHSWIFT_SERVER_PORT="${SPEECHSWIFT_SERVER_PORT:-18080}"
|
|
61
|
+
export SPEECHSWIFT_SERVER_URL="${SPEECHSWIFT_SERVER_URL:-http://${SPEECHSWIFT_SERVER_HOST}:${SPEECHSWIFT_SERVER_PORT}}"
|
|
62
|
+
if command -v audio-server >/dev/null 2>&1; then
|
|
63
|
+
if ! curl -fsS --max-time 1 "${SPEECHSWIFT_SERVER_URL%/}/health" >/dev/null 2>&1; then
|
|
64
|
+
mkdir -p .logs
|
|
65
|
+
audio-server --host "$SPEECHSWIFT_SERVER_HOST" --port "$SPEECHSWIFT_SERVER_PORT" >> .logs/speechswift-audio-server.log 2>&1 &
|
|
66
|
+
export SPEECHSWIFT_SERVER_PID="$!"
|
|
67
|
+
for _ in 1 2 3 4 5; do
|
|
68
|
+
curl -fsS --max-time 1 "${SPEECHSWIFT_SERVER_URL%/}/health" >/dev/null 2>&1 && break
|
|
69
|
+
sleep 1
|
|
70
|
+
done
|
|
71
|
+
fi
|
|
72
|
+
else
|
|
73
|
+
echo "speech-swift server mode requested but audio-server was not found; TTS will fall back if server calls fail" >&2
|
|
74
|
+
fi
|
|
75
|
+
fi
|
|
76
|
+
export PYTHONUNBUFFERED=1
|
|
77
|
+
|
|
78
|
+
if [ ! -d node_modules ]; then
|
|
79
|
+
npm install
|
|
80
|
+
fi
|
|
81
|
+
|
|
82
|
+
exec node app-node/main.mjs
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
|
5
|
+
cd "$ROOT"
|
|
6
|
+
|
|
7
|
+
ASSUME_YES=0
|
|
8
|
+
SKIP_SYSTEM=0
|
|
9
|
+
SKIP_MODEL=0
|
|
10
|
+
SKIP_EDGE_TTS=0
|
|
11
|
+
|
|
12
|
+
for arg in "$@"; do
|
|
13
|
+
case "$arg" in
|
|
14
|
+
-y|--yes) ASSUME_YES=1 ;;
|
|
15
|
+
--skip-system) SKIP_SYSTEM=1 ;;
|
|
16
|
+
--skip-model) SKIP_MODEL=1 ;;
|
|
17
|
+
--skip-edge-tts) SKIP_EDGE_TTS=1 ;;
|
|
18
|
+
-h|--help)
|
|
19
|
+
cat <<'USAGE'
|
|
20
|
+
Usage: scripts/bootstrap_prereqs.sh [--yes] [--skip-system] [--skip-model] [--skip-edge-tts]
|
|
21
|
+
|
|
22
|
+
Installs public-release prerequisites where possible:
|
|
23
|
+
- Node/npm package dependencies
|
|
24
|
+
- ffmpeg
|
|
25
|
+
- whisper.cpp / whisper-cli
|
|
26
|
+
- default whisper.cpp model
|
|
27
|
+
- local Edge TTS CLI in .venv-tts when edge-tts is not already available
|
|
28
|
+
|
|
29
|
+
System package installation supports macOS Homebrew and common Linux package managers
|
|
30
|
+
(apt, dnf, pacman). Linux whisper.cpp is built locally under vendor/whisper.cpp if no
|
|
31
|
+
package-manager whisper-cli is available.
|
|
32
|
+
USAGE
|
|
33
|
+
exit 0
|
|
34
|
+
;;
|
|
35
|
+
esac
|
|
36
|
+
done
|
|
37
|
+
|
|
38
|
+
log() { printf '==> %s\n' "$*"; }
|
|
39
|
+
warn() { printf 'Warning: %s\n' "$*" >&2; }
|
|
40
|
+
has_cmd() { command -v "$1" >/dev/null 2>&1; }
|
|
41
|
+
confirm() {
|
|
42
|
+
if [ "$ASSUME_YES" = "1" ]; then return 0; fi
|
|
43
|
+
printf '%s [y/N]: ' "$1" >&2
|
|
44
|
+
read -r answer
|
|
45
|
+
case "$answer" in y|Y|yes|YES) return 0 ;; *) return 1 ;; esac
|
|
46
|
+
}
|
|
47
|
+
run_sudo() {
|
|
48
|
+
if [ "$(id -u)" = "0" ]; then "$@"; else sudo "$@"; fi
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
install_node_modules() {
|
|
52
|
+
if [ ! -d node_modules ]; then
|
|
53
|
+
log 'Installing npm dependencies'
|
|
54
|
+
npm install
|
|
55
|
+
else
|
|
56
|
+
log 'npm dependencies already installed'
|
|
57
|
+
fi
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
install_system_macos() {
|
|
61
|
+
if ! has_cmd brew; then
|
|
62
|
+
warn 'Homebrew is not installed. Install it from https://brew.sh, then rerun this script.'
|
|
63
|
+
return 1
|
|
64
|
+
fi
|
|
65
|
+
local packages=()
|
|
66
|
+
has_cmd node || packages+=(node)
|
|
67
|
+
has_cmd npm || packages+=(node)
|
|
68
|
+
has_cmd ffmpeg || packages+=(ffmpeg)
|
|
69
|
+
has_cmd whisper-cli || packages+=(whisper-cpp)
|
|
70
|
+
if [ "${#packages[@]}" -gt 0 ]; then
|
|
71
|
+
log "Installing Homebrew packages: ${packages[*]}"
|
|
72
|
+
brew install "${packages[@]}"
|
|
73
|
+
else
|
|
74
|
+
log 'System packages already available'
|
|
75
|
+
fi
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
install_system_linux_packages() {
|
|
79
|
+
if has_cmd apt-get; then
|
|
80
|
+
log 'Installing Linux packages with apt-get'
|
|
81
|
+
run_sudo apt-get update
|
|
82
|
+
run_sudo apt-get install -y curl ca-certificates git python3 python3-venv python3-pip build-essential cmake pkg-config ffmpeg nodejs npm
|
|
83
|
+
elif has_cmd dnf; then
|
|
84
|
+
log 'Installing Linux packages with dnf'
|
|
85
|
+
run_sudo dnf install -y curl ca-certificates git python3 python3-pip gcc gcc-c++ make cmake pkgconf-pkg-config ffmpeg nodejs npm
|
|
86
|
+
elif has_cmd pacman; then
|
|
87
|
+
log 'Installing Linux packages with pacman'
|
|
88
|
+
run_sudo pacman -Sy --needed --noconfirm curl ca-certificates git python python-pip base-devel cmake pkgconf ffmpeg nodejs npm
|
|
89
|
+
else
|
|
90
|
+
warn 'No supported Linux package manager found. Install node/npm, ffmpeg, python3, git, cmake, and a C++ toolchain manually.'
|
|
91
|
+
return 1
|
|
92
|
+
fi
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
install_whisper_cpp_linux() {
|
|
96
|
+
if has_cmd whisper-cli; then
|
|
97
|
+
log 'whisper-cli already available'
|
|
98
|
+
return 0
|
|
99
|
+
fi
|
|
100
|
+
local bin_dir="$ROOT/.local/bin"
|
|
101
|
+
local src_dir="$ROOT/vendor/whisper.cpp"
|
|
102
|
+
mkdir -p "$bin_dir" "$ROOT/vendor"
|
|
103
|
+
if [ ! -d "$src_dir/.git" ]; then
|
|
104
|
+
log 'Cloning whisper.cpp under vendor/whisper.cpp'
|
|
105
|
+
git clone --depth 1 https://github.com/ggml-org/whisper.cpp.git "$src_dir"
|
|
106
|
+
fi
|
|
107
|
+
log 'Building whisper.cpp locally'
|
|
108
|
+
cmake -S "$src_dir" -B "$src_dir/build" -DCMAKE_BUILD_TYPE=Release
|
|
109
|
+
cmake --build "$src_dir/build" --config Release -j "$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 2)"
|
|
110
|
+
local candidate=""
|
|
111
|
+
for path in "$src_dir/build/bin/whisper-cli" "$src_dir/build/examples/main/whisper-cli" "$src_dir/build/main"; do
|
|
112
|
+
if [ -x "$path" ]; then candidate="$path"; break; fi
|
|
113
|
+
done
|
|
114
|
+
if [ -z "$candidate" ]; then
|
|
115
|
+
warn 'Built whisper.cpp but could not find whisper-cli binary. Check vendor/whisper.cpp/build.'
|
|
116
|
+
return 1
|
|
117
|
+
fi
|
|
118
|
+
ln -sf "$candidate" "$bin_dir/whisper-cli"
|
|
119
|
+
log "Installed local whisper-cli shim: $bin_dir/whisper-cli"
|
|
120
|
+
export PATH="$bin_dir:$PATH"
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
install_system_deps() {
|
|
124
|
+
if [ "$SKIP_SYSTEM" = "1" ]; then
|
|
125
|
+
log 'Skipping system package installation'
|
|
126
|
+
return 0
|
|
127
|
+
fi
|
|
128
|
+
case "$(uname -s)" in
|
|
129
|
+
Darwin) install_system_macos ;;
|
|
130
|
+
Linux)
|
|
131
|
+
install_system_linux_packages || true
|
|
132
|
+
install_whisper_cpp_linux || true
|
|
133
|
+
;;
|
|
134
|
+
*) warn "Unsupported OS $(uname -s). Install node/npm, ffmpeg, and whisper-cli manually." ;;
|
|
135
|
+
esac
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
install_edge_tts() {
|
|
139
|
+
if [ "$SKIP_EDGE_TTS" = "1" ]; then
|
|
140
|
+
log 'Skipping Edge TTS helper installation'
|
|
141
|
+
return 0
|
|
142
|
+
fi
|
|
143
|
+
if has_cmd edge-tts; then
|
|
144
|
+
log 'edge-tts already available'
|
|
145
|
+
return 0
|
|
146
|
+
fi
|
|
147
|
+
if [ -x "$ROOT/.venv-tts/bin/edge-tts" ]; then
|
|
148
|
+
log 'Local edge-tts venv already available'
|
|
149
|
+
return 0
|
|
150
|
+
fi
|
|
151
|
+
if ! has_cmd python3; then
|
|
152
|
+
warn 'python3 not found; skipping local edge-tts install'
|
|
153
|
+
return 1
|
|
154
|
+
fi
|
|
155
|
+
log 'Installing local edge-tts helper in .venv-tts'
|
|
156
|
+
python3 -m venv "$ROOT/.venv-tts"
|
|
157
|
+
"$ROOT/.venv-tts/bin/python" -m pip install --upgrade pip >/dev/null
|
|
158
|
+
"$ROOT/.venv-tts/bin/python" -m pip install edge-tts
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
download_model() {
|
|
162
|
+
if [ "$SKIP_MODEL" = "1" ]; then
|
|
163
|
+
log 'Skipping whisper.cpp model download'
|
|
164
|
+
return 0
|
|
165
|
+
fi
|
|
166
|
+
local model="$ROOT/models/ggml-small-q5_1.bin"
|
|
167
|
+
if [ -s "$model" ]; then
|
|
168
|
+
log 'Default whisper.cpp model already exists'
|
|
169
|
+
return 0
|
|
170
|
+
fi
|
|
171
|
+
if ! has_cmd curl; then
|
|
172
|
+
warn 'curl not found; cannot download whisper.cpp model automatically'
|
|
173
|
+
return 1
|
|
174
|
+
fi
|
|
175
|
+
mkdir -p "$ROOT/models"
|
|
176
|
+
log 'Downloading default whisper.cpp Korean-capable model'
|
|
177
|
+
curl -L --fail --retry 3 -o "$model.tmp" \
|
|
178
|
+
https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small-q5_1.bin
|
|
179
|
+
mv "$model.tmp" "$model"
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
if [ "$SKIP_SYSTEM" != "1" ] && [ "$ASSUME_YES" != "1" ]; then
|
|
183
|
+
if ! confirm 'Install missing system prerequisites when supported?'; then
|
|
184
|
+
SKIP_SYSTEM=1
|
|
185
|
+
fi
|
|
186
|
+
fi
|
|
187
|
+
|
|
188
|
+
install_system_deps
|
|
189
|
+
install_edge_tts || true
|
|
190
|
+
install_node_modules
|
|
191
|
+
download_model || true
|
|
192
|
+
|
|
193
|
+
log 'Prerequisite bootstrap complete. Run `vc doctor` after setup to verify.'
|