verbalcoding 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,8 @@ test('package exposes a short vc shell command', () => {
19
19
  assert.ok(pkg.files.includes('app-node/'));
20
20
  assert.ok(pkg.files.includes('scripts/*.mjs'));
21
21
  assert.ok(pkg.files.includes('scripts/*.sh'));
22
- assert.ok(pkg.files.includes('scripts/*.py'));
22
+ assert.ok(pkg.files.includes('integrations/openvoice/'));
23
+ assert.ok(!pkg.files.includes('scripts/*.py'));
23
24
  assert.ok(pkg.files.includes('run.sh'));
24
25
  assert.ok(pkg.files.includes('LICENSE'));
25
26
  });
@@ -39,12 +40,23 @@ test('installer shell script links the vc command during setup', () => {
39
40
 
40
41
  assert.match(script, /bootstrap_prereqs\.sh/);
41
42
  assert.match(script, /--no-wizard/);
43
+ assert.match(script, /--yes\) BOOTSTRAP_ARGS\+=\("\$arg"\); INSTALL_ARGS\+=\("\$arg"\)/);
42
44
  assert.match(script, /VERBALCODING_SKIP_BOOTSTRAP/);
43
45
  assert.match(script, /npm link/);
44
46
  assert.match(script, /Installed shell CLI: vc/);
45
47
  assert.match(script, /VERBALCODING_SKIP_CLI_LINK/);
46
48
  });
47
49
 
50
+ test('npm setup supports non-interactive --yes mode', () => {
51
+ const installer = fs.readFileSync(path.join(ROOT, 'scripts', 'install.mjs'), 'utf8');
52
+ const config = fs.readFileSync(path.join(ROOT, 'app-node', 'install_config.mjs'), 'utf8');
53
+
54
+ assert.match(installer, /args\.includes\('--yes'\)/);
55
+ assert.match(installer, /normalizeInstallAnswers\(process\.env\)/);
56
+ assert.match(config, /vc start/);
57
+ assert.doesNotMatch(config, /npm install -g \.\s+#/);
58
+ });
59
+
48
60
  test('bootstrap script installs cross-platform prerequisites and local model helpers', () => {
49
61
  const script = fs.readFileSync(path.join(ROOT, 'scripts', 'bootstrap_prereqs.sh'), 'utf8');
50
62
 
@@ -8,6 +8,20 @@ test('splitDiscordMessage chunks long text for Discord', () => {
8
8
  assert.deepEqual(chunks.map(c => c.length), [1900, 1900, 201]);
9
9
  });
10
10
 
11
+ test('sendDiscordText returns false without fetching when transcript channel id is missing', async () => {
12
+ const warnings = [];
13
+ let fetched = false;
14
+ const delivered = await sendDiscordText({
15
+ channelId: '',
16
+ text: 'restart complete',
17
+ client: { channels: { fetch: async () => { fetched = true; } } },
18
+ warn: (...args) => warnings.push(args.join(' ')),
19
+ });
20
+ assert.equal(delivered, false);
21
+ assert.equal(fetched, false);
22
+ assert.match(warnings.join('\n'), /missing transcript channel id/);
23
+ });
24
+
11
25
  test('sendDiscordText returns false when target is not text based', async () => {
12
26
  const warnings = [];
13
27
  const delivered = await sendDiscordText({
@@ -247,13 +247,14 @@ export function renderInstallSummary(values = {}) {
247
247
  `Configured Discord voice bridge for harness: ${backend}`,
248
248
  '',
249
249
  'Next commands:',
250
- ' npm install -g . # or ./scripts/install.sh to install the vc command',
251
250
  ' vc doctor',
252
- ' ./run.sh',
251
+ ' vc start',
253
252
  '',
254
253
  'Legacy project-local equivalents still work:',
255
254
  ' npm install',
255
+ ' ./scripts/install.sh',
256
256
  ' npm run doctor',
257
+ ' ./run.sh',
257
258
  '',
258
259
  `Auto-join voice channels: ${values.AUTO_JOIN_VOICE_CHANNELS || '일반,General,general'}`,
259
260
  `TTS backend: ${values.TTS_BACKEND || 'edge'}`,
package/app-node/main.mjs CHANGED
@@ -166,7 +166,7 @@ const settings = {
166
166
  token: process.env.DISCORD_BOT_TOKEN || process.env.DISCORD_TOKEN,
167
167
  allowedUsers: new Set((process.env.DISCORD_ALLOWED_USERS || '').split(/[;,]/).map(s => s.trim()).filter(Boolean)),
168
168
  autoJoinVoiceChannels: (process.env.AUTO_JOIN_VOICE_CHANNELS || '일반,General,general').split(',').map(s => s.trim().toLowerCase()).filter(Boolean),
169
- transcriptChannelId: (process.env.TRANSCRIPT_CHANNEL_ID || '123456789012345678').trim(),
169
+ transcriptChannelId: (process.env.TRANSCRIPT_CHANNEL_ID || '').trim(),
170
170
  whisperBin: process.env.WHISPER_CPP_BIN || 'whisper-cli',
171
171
  whisperModel: process.env.WHISPER_CPP_MODEL || path.join(ROOT, 'models', 'ggml-small-q5_1.bin'),
172
172
  whisperLanguage: process.env.WHISPER_CPP_LANGUAGE || process.env.STT_LANGUAGE || 'ko',
@@ -1402,26 +1402,32 @@ async function connectTo(channel) {
1402
1402
  selfDeaf: false,
1403
1403
  selfMute: false,
1404
1404
  });
1405
- connection.subscribe(player);
1406
- connection.on('error', e => warn('voice connection error', e?.stack || e));
1407
- connection.on('stateChange', async (oldState, newState) => {
1405
+ const voiceConnection = connection;
1406
+ voiceConnection.subscribe(player);
1407
+ voiceConnection.on('error', e => warn('voice connection error', e?.stack || e));
1408
+ voiceConnection.on('stateChange', async (oldState, newState) => {
1408
1409
  log('voice connection state', oldState.status, '->', newState.status);
1410
+ if (connection !== voiceConnection) {
1411
+ log('ignore stale voice connection state', oldState.status, '->', newState.status);
1412
+ return;
1413
+ }
1409
1414
  if (newState.status === VoiceConnectionStatus.Disconnected) {
1410
1415
  try {
1411
1416
  await Promise.race([
1412
- entersState(connection, VoiceConnectionStatus.Signalling, 5000),
1413
- entersState(connection, VoiceConnectionStatus.Connecting, 5000),
1417
+ entersState(voiceConnection, VoiceConnectionStatus.Signalling, 5000),
1418
+ entersState(voiceConnection, VoiceConnectionStatus.Connecting, 5000),
1414
1419
  ]);
1415
1420
  } catch (e) {
1421
+ if (connection !== voiceConnection) return;
1416
1422
  warn('voice connection disconnected; reconnecting to channel', channel.guild.name, channel.name, e?.message || e);
1417
- try { connection?.destroy(); } catch {}
1423
+ try { voiceConnection.destroy(); } catch {}
1418
1424
  connection = null;
1419
1425
  setTimeout(() => connectTo(channel).catch(err => warn('voice reconnect failed', err?.stack || err)), 1500);
1420
1426
  }
1421
1427
  }
1422
1428
  });
1423
- await entersState(connection, VoiceConnectionStatus.Ready, 30000);
1424
- connection.receiver.speaking.on('start', userId => subscribeUser(connection.receiver, userId));
1429
+ await entersState(voiceConnection, VoiceConnectionStatus.Ready, 30000);
1430
+ voiceConnection.receiver.speaking.on('start', userId => subscribeUser(voiceConnection.receiver, userId));
1425
1431
  log(`Listening in voice channel ${channel.guild.name} / ${channel.name}`);
1426
1432
  }
1427
1433
 
@@ -135,7 +135,7 @@ export function createOpenVoiceBackend(settings, deps = {}) {
135
135
  return edge.synthesize(text, { signal, kind });
136
136
  }
137
137
  const out = uniquePath(tmpdir, 'verbalcoding-openvoice', 'wav');
138
- const script = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', 'scripts', 'openvoice_synth.py');
138
+ const script = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..', 'integrations', 'openvoice', 'synth.py');
139
139
  const args = [
140
140
  script,
141
141
  '--openvoice-dir', openvoice.dir,
@@ -121,7 +121,7 @@ test('OpenVoice final synthesis calls Python wrapper with reference audio and ou
121
121
  const out = await backend.synthesize('복제 음성 테스트', { kind: 'final' });
122
122
 
123
123
  assert.equal(calls[0].cmd, path.join('/project/.venv-openvoice', 'bin', 'python'));
124
- assert.ok(calls[0].args.some(arg => String(arg).endsWith('scripts/openvoice_synth.py')));
124
+ assert.ok(calls[0].args.some(arg => String(arg).endsWith('integrations/openvoice/synth.py')));
125
125
  assert.ok(calls[0].args.includes('--ref-audio'));
126
126
  assert.ok(calls[0].args.includes('/project/voice-samples/me.wav'));
127
127
  assert.ok(calls[0].args.includes('--text'));
@@ -2,6 +2,12 @@
2
2
 
3
3
  ## Setup Wizard
4
4
 
5
+ Discord bot/application setup is intentionally not re-explained from scratch here. Use these upstream guides for the Discord-side steps, then return to VerbalCoding setup:
6
+
7
+ - Hermes Agent Discord messaging guide: <https://hermes-agent.nousresearch.com/docs/user-guide/messaging/discord>
8
+ - Discord official bot overview: <https://docs.discord.com/developers/bots/overview>
9
+ - Discord official quick start: <https://docs.discord.com/developers/quick-start/getting-started>
10
+
5
11
  ```bash
6
12
  ./scripts/install.sh
7
13
  ```
@@ -165,7 +171,7 @@ Edge TTS remains the default and fallback. To try local voice cloning with OpenV
165
171
  mkdir -p voice-samples
166
172
  # Put a permitted reference sample at voice-samples/user-reference.wav,
167
173
  # or capture one from Discord with !voice-clone capture.
168
- python3 scripts/openvoice_smoke.py
174
+ python3 integrations/openvoice/synth.py --openvoice-dir vendor/OpenVoice --ref-audio voice-samples/user-reference.wav --text '안녕하세요. 버벌코딩 목소리 복제 테스트입니다.' --output /tmp/verbalcoding-openvoice-smoke.wav
169
175
  ```
170
176
 
171
177
  Then set:
@@ -77,6 +77,14 @@ If your OS is unsupported, install these manually before rerunning:
77
77
 
78
78
  ## 3. Discord application setup
79
79
 
80
+ Read the upstream Discord bot setup guides first if this is your first bot:
81
+
82
+ - Hermes Agent Discord messaging guide: <https://hermes-agent.nousresearch.com/docs/user-guide/messaging/discord>
83
+ - Discord official bot overview: <https://docs.discord.com/developers/bots/overview>
84
+ - Discord official getting started guide: <https://docs.discord.com/developers/quick-start/getting-started>
85
+
86
+ Those pages show how to create a Discord application, add a bot user, enable privileged intents, and invite it to a server. VerbalCoding uses the same Discord bot setup, then adds voice receive, STT, CLI-agent execution, and TTS playback on top.
87
+
80
88
  1. Create a Discord application and bot in the Discord Developer Portal.
81
89
  2. Enable the Message Content privileged intent.
82
90
  3. Copy the bot token into the installer prompt or `.env` as `DISCORD_BOT_TOKEN`.
@@ -162,7 +170,7 @@ OpenVoice voice cloning is optional. Keep `TTS_BACKEND=edge` for a fresh public
162
170
  # Download OpenVoice V2 checkpoints into vendor/OpenVoice/checkpoints_v2/
163
171
  # Add a permitted local sample at voice-samples/user-reference.wav,
164
172
  # or run the bot, say "목소리 샘플 녹음 시작해", then speak 10-30 seconds.
165
- python3 scripts/openvoice_smoke.py
173
+ python3 integrations/openvoice/synth.py --openvoice-dir vendor/OpenVoice --ref-audio voice-samples/user-reference.wav --text '안녕하세요. 버벌코딩 목소리 복제 테스트입니다.' --output /tmp/verbalcoding-openvoice-smoke.wav
166
174
  ```
167
175
 
168
176
  Then set `TTS_BACKEND=openvoice`, run `vc doctor`, and test `!voice-test <text>` in Discord.
package/docs/USAGE.md CHANGED
@@ -43,6 +43,13 @@ The bot auto-joins the first configured channel name, defaulting to `일반,Gene
43
43
 
44
44
  ## Discord Commands
45
45
 
46
+ Before wiring commands, set up the Discord application/bot using the upstream guides:
47
+
48
+ - Hermes Agent Discord guide: <https://hermes-agent.nousresearch.com/docs/user-guide/messaging/discord>
49
+ - Discord official bot docs: <https://docs.discord.com/developers/bots/overview>
50
+
51
+ Then use `vc bot invite CLIENT_ID` to generate the VerbalCoding-specific invite URL with text and voice permissions.
52
+
46
53
  | Command | Purpose |
47
54
  |---|---|
48
55
  | `!ping` | Basic bot check |
@@ -2,6 +2,12 @@
2
2
 
3
3
  ## 설정 마법사
4
4
 
5
+ Discord 봇/애플리케이션 생성 절차는 여기에서 처음부터 반복 설명하지 않습니다. Discord 쪽 설정은 아래 상위 문서를 보고 진행한 뒤 VerbalCoding 설정으로 돌아오세요.
6
+
7
+ - Hermes Agent Discord 메시징 가이드: <https://hermes-agent.nousresearch.com/docs/user-guide/messaging/discord>
8
+ - Discord 공식 봇 개요: <https://docs.discord.com/developers/bots/overview>
9
+ - Discord 공식 시작 가이드: <https://docs.discord.com/developers/quick-start/getting-started>
10
+
5
11
  npm으로 설치한 경우:
6
12
 
7
13
  ```bash
@@ -173,7 +179,7 @@ Edge TTS가 기본값이자 fallback입니다. OpenVoice V2로 로컬 음성 복
173
179
  mkdir -p voice-samples
174
180
  # 허가된 기준 샘플을 voice-samples/user-reference.wav에 넣거나,
175
181
  # Discord에서 !voice-clone capture로 샘플을 캡처합니다.
176
- python3 scripts/openvoice_smoke.py
182
+ python3 integrations/openvoice/synth.py --openvoice-dir vendor/OpenVoice --ref-audio voice-samples/user-reference.wav --text '안녕하세요. 버벌코딩 목소리 복제 테스트입니다.' --output /tmp/verbalcoding-openvoice-smoke.wav
177
183
  ```
178
184
 
179
185
  그 뒤 설정:
@@ -77,6 +77,14 @@ OS가 지원되지 않으면 아래를 직접 설치한 뒤 다시 실행하세
77
77
 
78
78
  ## 3. Discord 애플리케이션 설정
79
79
 
80
+ Discord 봇을 처음 만든다면 먼저 공식/상위 문서를 확인하세요.
81
+
82
+ - Hermes Agent Discord 메시징 가이드: <https://hermes-agent.nousresearch.com/docs/user-guide/messaging/discord>
83
+ - Discord 공식 봇 개요: <https://docs.discord.com/developers/bots/overview>
84
+ - Discord 공식 시작 가이드: <https://docs.discord.com/developers/quick-start/getting-started>
85
+
86
+ 위 문서에는 Discord 애플리케이션 생성, bot user 추가, privileged intent 활성화, 서버 초대 방법이 설명되어 있습니다. VerbalCoding도 같은 Discord bot 설정을 사용하고, 그 위에 음성 수신, STT, CLI 에이전트 실행, TTS 재생을 얹습니다.
87
+
80
88
  1. Discord Developer Portal에서 애플리케이션과 봇을 만듭니다.
81
89
  2. Message Content privileged intent를 켭니다.
82
90
  3. 봇 토큰을 설치 프롬프트 또는 `.env`의 `DISCORD_BOT_TOKEN`에 넣습니다.
@@ -162,7 +170,7 @@ OpenVoice 음성 복제는 선택 기능입니다. 공개 설치 직후에는 `T
162
170
  # OpenVoice V2 체크포인트를 vendor/OpenVoice/checkpoints_v2/ 아래에 넣습니다.
163
171
  # 허가된 로컬 샘플을 voice-samples/user-reference.wav에 두거나,
164
172
  # 봇 실행 후 “목소리 샘플 녹음 시작해”라고 말하고 10~30초 발화합니다.
165
- python3 scripts/openvoice_smoke.py
173
+ python3 integrations/openvoice/synth.py --openvoice-dir vendor/OpenVoice --ref-audio voice-samples/user-reference.wav --text '안녕하세요. 버벌코딩 목소리 복제 테스트입니다.' --output /tmp/verbalcoding-openvoice-smoke.wav
166
174
  ```
167
175
 
168
176
  그 뒤 `TTS_BACKEND=openvoice`로 설정하고 `vc doctor`, Discord의 `!voice-test <text>`로 테스트합니다.
@@ -51,6 +51,13 @@ VERBALCODING_INSTANCE_ENV=instances/my-project.env ./run.sh
51
51
 
52
52
  ## Discord 명령
53
53
 
54
+ 명령을 연결하기 전에 먼저 상위 문서대로 Discord 애플리케이션/봇을 설정하세요.
55
+
56
+ - Hermes Agent Discord 가이드: <https://hermes-agent.nousresearch.com/docs/user-guide/messaging/discord>
57
+ - Discord 공식 봇 문서: <https://docs.discord.com/developers/bots/overview>
58
+
59
+ 그 다음 `vc bot invite CLIENT_ID`를 사용하면 VerbalCoding에 필요한 텍스트/음성 권한이 포함된 초대 URL을 만들 수 있습니다.
60
+
54
61
  | 명령 | 용도 |
55
62
  |---|---|
56
63
  | `!ping` | 봇 연결 기본 확인 |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "verbalcoding",
3
- "version": "0.2.3",
3
+ "version": "0.2.5",
4
4
  "description": "Discord voice bridge for CLI coding agents.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -34,7 +34,7 @@
34
34
  "docs/",
35
35
  "scripts/*.mjs",
36
36
  "scripts/*.sh",
37
- "scripts/*.py",
37
+ "integrations/openvoice/",
38
38
  "run.sh",
39
39
  ".env.example",
40
40
  "README.md",
@@ -86,7 +86,7 @@ if (ttsBackend === 'edge') {
86
86
  ok = check('OpenVoice repo', fs.existsSync(openvoiceDir), path.relative(ROOT, openvoiceDir)) && ok;
87
87
  ok = check('OpenVoice venv', fs.existsSync(openvoiceVenv), path.relative(ROOT, openvoiceVenv)) && ok;
88
88
  ok = check('OpenVoice reference audio', fs.existsSync(refAudio), path.relative(ROOT, refAudio)) && ok;
89
- ok = check('OpenVoice synth wrapper help', spawnSync('python3', ['scripts/openvoice_synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'scripts/openvoice_synth.py') && ok;
89
+ ok = check('OpenVoice synth wrapper help', spawnSync('python3', ['integrations/openvoice/synth.py', '--help'], { cwd: ROOT, encoding: 'utf8' }).status === 0, 'integrations/openvoice/synth.py') && ok;
90
90
  note('OpenVoice progress prompts', ['1', 'true', 'yes', 'on'].includes(String(env.OPENVOICE_PROGRESS || '0').toLowerCase()) ? 'openvoice' : 'edge fallback');
91
91
  } else if (ttsBackend === 'speechswift') {
92
92
  const mode = String(env.SPEECHSWIFT_MODE || 'cli').toLowerCase() === 'server' ? 'server' : 'cli';
@@ -17,6 +17,7 @@ async function ask(question, fallback = '', options = {}) {
17
17
 
18
18
  async function main() {
19
19
  const args = process.argv.slice(2);
20
+ const yes = args.includes('--yes') || args.includes('-y');
20
21
  if (args[0] === 'instance' || args.includes('--instance')) {
21
22
  const { spawnSync } = await import('node:child_process');
22
23
  const pass = args[0] === 'instance'
@@ -26,6 +27,19 @@ async function main() {
26
27
  process.exitCode = result.status ?? 1;
27
28
  return;
28
29
  }
30
+ if (yes) {
31
+ const values = normalizeInstallAnswers(process.env);
32
+ const envPath = path.join(ROOT, '.env');
33
+ if (fs.existsSync(envPath)) {
34
+ const backup = `${envPath}.bak-${Date.now()}`;
35
+ fs.copyFileSync(envPath, backup);
36
+ console.log(`Backed up existing .env to ${backup}`);
37
+ }
38
+ fs.writeFileSync(envPath, buildEnvFile(values), { mode: 0o600 });
39
+ console.log(`Wrote ${envPath}`);
40
+ console.log(renderInstallSummary(values));
41
+ return;
42
+ }
29
43
  globalThis.__rl = readline.createInterface({ input, output });
30
44
  try {
31
45
  console.log('VerbalCoding installer');
@@ -9,7 +9,8 @@ for arg in "$@"; do
9
9
  case "$arg" in
10
10
  --no-wizard) RUN_WIZARD=0 ;;
11
11
  --skip-bootstrap) export VERBALCODING_SKIP_BOOTSTRAP=1 ;;
12
- --yes|--skip-system|--skip-model|--skip-edge-tts) BOOTSTRAP_ARGS+=("$arg") ;;
12
+ --yes) BOOTSTRAP_ARGS+=("$arg"); INSTALL_ARGS+=("$arg") ;;
13
+ --skip-system|--skip-model|--skip-edge-tts) BOOTSTRAP_ARGS+=("$arg") ;;
13
14
  *) INSTALL_ARGS+=("$arg") ;;
14
15
  esac
15
16
  done
@@ -29,6 +29,7 @@ Next manual steps:
29
29
  https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip
30
30
  2. Extract them under vendor/OpenVoice/checkpoints_v2/
31
31
  3. Put a permitted reference sample at voice-samples/user-reference.wav
32
- 4. Run: python3 scripts/openvoice_smoke.py
32
+ 4. Smoke test manually if needed:
33
+ python3 integrations/openvoice/synth.py --openvoice-dir vendor/OpenVoice --ref-audio voice-samples/user-reference.wav --text '안녕하세요. 버벌코딩 목소리 복제 테스트입니다.' --output /tmp/verbalcoding-openvoice-smoke.wav
33
34
  5. Set TTS_BACKEND=openvoice in .env and restart VerbalCoding.
34
35
  MSG
@@ -1,34 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Small OpenVoice smoke-test helper for VerbalCoding."""
3
-
4
- from __future__ import annotations
5
-
6
- import argparse
7
- from pathlib import Path
8
- import subprocess
9
- import sys
10
-
11
-
12
- def main() -> int:
13
- parser = argparse.ArgumentParser(description="Run a short Korean OpenVoice smoke test")
14
- parser.add_argument("--openvoice-dir", default="./vendor/OpenVoice")
15
- parser.add_argument("--ref-audio", default="./voice-samples/user-reference.wav")
16
- parser.add_argument("--output", default="/tmp/verbalcoding-openvoice-smoke.wav")
17
- parser.add_argument("--text", default="안녕하세요. 버벌코딩 목소리 복제 테스트입니다.")
18
- args = parser.parse_args()
19
- script = Path(__file__).with_name("openvoice_synth.py")
20
- cmd = [
21
- sys.executable,
22
- str(script),
23
- "--openvoice-dir", args.openvoice_dir,
24
- "--ref-audio", args.ref_audio,
25
- "--text", args.text,
26
- "--language", "KR",
27
- "--style", "default",
28
- "--output", args.output,
29
- ]
30
- return subprocess.call(cmd)
31
-
32
-
33
- if __name__ == "__main__":
34
- raise SystemExit(main())