verbalcoding 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +83 -0
- package/LICENSE +21 -0
- package/README.md +157 -0
- package/app-node/agent_adapters.mjs +576 -0
- package/app-node/agent_adapters.test.mjs +455 -0
- package/app-node/agent_contract.mjs +45 -0
- package/app-node/barge_in.mjs +148 -0
- package/app-node/barge_in.test.mjs +179 -0
- package/app-node/bridge_logger.mjs +66 -0
- package/app-node/bridge_logger.test.mjs +73 -0
- package/app-node/bridge_state.mjs +104 -0
- package/app-node/bridge_state.test.mjs +64 -0
- package/app-node/cli_install.test.mjs +97 -0
- package/app-node/deferred_queue.mjs +12 -0
- package/app-node/deferred_queue.test.mjs +20 -0
- package/app-node/discord_invite_cli.test.mjs +31 -0
- package/app-node/discord_text.mjs +29 -0
- package/app-node/discord_text.test.mjs +32 -0
- package/app-node/hermes_profiles.mjs +164 -0
- package/app-node/hermes_profiles.test.mjs +276 -0
- package/app-node/install_config.mjs +263 -0
- package/app-node/install_config.test.mjs +205 -0
- package/app-node/instance_doctor.mjs +137 -0
- package/app-node/instance_doctor.test.mjs +128 -0
- package/app-node/instance_profile_lifecycle.mjs +16 -0
- package/app-node/instances.mjs +153 -0
- package/app-node/instances.test.mjs +102 -0
- package/app-node/language_config.mjs +73 -0
- package/app-node/language_config.test.mjs +51 -0
- package/app-node/latency_metrics.mjs +133 -0
- package/app-node/latency_metrics.test.mjs +71 -0
- package/app-node/main.mjs +1771 -0
- package/app-node/mcp_tools.mjs +198 -0
- package/app-node/mcp_tools.test.mjs +39 -0
- package/app-node/progress_cache.mjs +7 -0
- package/app-node/progress_cache.test.mjs +23 -0
- package/app-node/progress_speech.mjs +102 -0
- package/app-node/progress_speech.test.mjs +48 -0
- package/app-node/project_sessions.mjs +148 -0
- package/app-node/project_sessions.test.mjs +77 -0
- package/app-node/restart_notice.mjs +57 -0
- package/app-node/restart_notice.test.mjs +37 -0
- package/app-node/restart_policy.mjs +27 -0
- package/app-node/restart_policy.test.mjs +33 -0
- package/app-node/text_routing.mjs +8 -0
- package/app-node/text_routing.test.mjs +18 -0
- package/app-node/tts_backends.mjs +251 -0
- package/app-node/tts_backends.test.mjs +400 -0
- package/app-node/tts_chunks.mjs +57 -0
- package/app-node/tts_chunks.test.mjs +35 -0
- package/app-node/tts_prefetch.mjs +38 -0
- package/app-node/tts_prefetch.test.mjs +49 -0
- package/app-node/tts_settings.mjs +72 -0
- package/app-node/tts_settings.test.mjs +127 -0
- package/app-node/tts_voice_config.mjs +127 -0
- package/app-node/tts_voice_config.test.mjs +64 -0
- package/app-node/voice_clone_capture.mjs +76 -0
- package/app-node/voice_clone_capture.test.mjs +51 -0
- package/app-node/voice_messages.mjs +62 -0
- package/app-node/voice_messages.test.mjs +33 -0
- package/docs/CONFIGURATION.md +183 -0
- package/docs/FRESH_INSTALL.md +193 -0
- package/docs/MULTI_INSTANCE.md +183 -0
- package/docs/RELEASE.md +72 -0
- package/docs/USAGE.md +108 -0
- package/docs/assets/figures/verbalcoding-flow.svg +63 -0
- package/docs/i18n/README.es.md +121 -0
- package/docs/i18n/README.fr.md +121 -0
- package/docs/i18n/README.ja.md +121 -0
- package/docs/i18n/README.ko.md +121 -0
- package/docs/i18n/README.ru.md +121 -0
- package/docs/i18n/README.zh.md +121 -0
- package/package.json +58 -0
- package/run.sh +82 -0
- package/scripts/bootstrap_prereqs.sh +193 -0
- package/scripts/cli.mjs +369 -0
- package/scripts/docker_ubuntu_smoke.sh +76 -0
- package/scripts/doctor.mjs +134 -0
- package/scripts/install.mjs +108 -0
- package/scripts/install.sh +44 -0
- package/scripts/mcp-server.mjs +84 -0
- package/scripts/openvoice_smoke.py +34 -0
- package/scripts/openvoice_synth.py +103 -0
- package/scripts/setup_openvoice.sh +34 -0
- package/scripts/setup_supertonic.sh +18 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import readline from 'node:readline/promises';
|
|
5
|
+
import { stdin as input, stdout as output } from 'node:process';
|
|
6
|
+
import { buildEnvFile, normalizeInstallAnswers, renderInstallSummary, SUPPORTED_HARNESSES } from '../app-node/install_config.mjs';
|
|
7
|
+
|
|
8
|
+
const ROOT = path.resolve(path.dirname(new URL(import.meta.url).pathname), '..');
|
|
9
|
+
|
|
10
|
+
async function ask(question, fallback = '', options = {}) {
|
|
11
|
+
const rl = globalThis.__rl;
|
|
12
|
+
const suffixValue = options.fallbackLabel ?? fallback;
|
|
13
|
+
const suffix = suffixValue ? ` [${suffixValue}]` : '';
|
|
14
|
+
const answer = (await rl.question(`${question}${suffix}: `)).trim();
|
|
15
|
+
return answer || fallback;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
async function main() {
|
|
19
|
+
const args = process.argv.slice(2);
|
|
20
|
+
if (args[0] === 'instance' || args.includes('--instance')) {
|
|
21
|
+
const { spawnSync } = await import('node:child_process');
|
|
22
|
+
const pass = args[0] === 'instance'
|
|
23
|
+
? args.slice(1)
|
|
24
|
+
: args.filter(arg => arg !== '--instance');
|
|
25
|
+
const result = spawnSync(process.execPath, [path.join(ROOT, 'scripts', 'cli.mjs'), 'instance', 'setup', ...pass], { stdio: 'inherit', cwd: ROOT });
|
|
26
|
+
process.exitCode = result.status ?? 1;
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
globalThis.__rl = readline.createInterface({ input, output });
|
|
30
|
+
try {
|
|
31
|
+
console.log('VerbalCoding installer');
|
|
32
|
+
console.log(`Supported harnesses: ${SUPPORTED_HARNESSES.join(', ')}`);
|
|
33
|
+
const harness = await ask('Harness/backend', 'hermes');
|
|
34
|
+
let agentCommand = '';
|
|
35
|
+
let agentLabel = '';
|
|
36
|
+
if (harness.toLowerCase() === 'custom') {
|
|
37
|
+
agentLabel = await ask('Custom harness label', 'Custom Agent');
|
|
38
|
+
agentCommand = await ask('Custom harness command, prompt appended as final argv', 'my-agent run');
|
|
39
|
+
}
|
|
40
|
+
const existingDiscordBotToken = process.env.DISCORD_BOT_TOKEN || '';
|
|
41
|
+
const discordBotToken = await ask('Discord bot token (DISCORD_BOT_TOKEN)', existingDiscordBotToken, { fallbackLabel: existingDiscordBotToken ? 'keep existing' : '' });
|
|
42
|
+
const allowedUsers = await ask('Allowed Discord user IDs, comma-separated', process.env.DISCORD_ALLOWED_USERS || '');
|
|
43
|
+
const autoJoinVoiceChannels = await ask('Auto-join voice channel names', process.env.AUTO_JOIN_VOICE_CHANNELS || '일반,General,general');
|
|
44
|
+
const transcriptChannelId = await ask('Transcript text channel/thread ID', process.env.TRANSCRIPT_CHANNEL_ID || '');
|
|
45
|
+
const language = await ask('Default voice language: ko/en/auto', process.env.VOICE_LANGUAGE || process.env.WHISPER_CPP_LANGUAGE || process.env.STT_LANGUAGE || 'ko');
|
|
46
|
+
const ttsBackend = await ask('TTS backend: edge/openvoice/speechswift/supertonic', process.env.TTS_BACKEND || 'edge');
|
|
47
|
+
const edgeTtsCommand = await ask('Edge TTS command', process.env.EDGE_TTS_COMMAND || process.env.TTS_EDGE_COMMAND || 'edge-tts');
|
|
48
|
+
const ttsVoice = await ask('TTS voice', process.env.TTS_VOICE || 'ko-KR-SunHiNeural');
|
|
49
|
+
const ttsRate = await ask('TTS rate', process.env.TTS_RATE || '+10%');
|
|
50
|
+
const ttsVolume = await ask('TTS playback volume', process.env.TTS_VOLUME || '1.0');
|
|
51
|
+
const supertonicCommand = await ask('Supertonic command', process.env.SUPERTONIC_COMMAND || 'supertonic');
|
|
52
|
+
const supertonicVoice = await ask('Supertonic voice', process.env.SUPERTONIC_VOICE || 'M1');
|
|
53
|
+
const supertonicLanguage = await ask('Supertonic language', process.env.SUPERTONIC_LANGUAGE || 'ko');
|
|
54
|
+
const supertonicSteps = await ask('Supertonic steps', process.env.SUPERTONIC_STEPS || '2');
|
|
55
|
+
const supertonicSpeed = await ask('Supertonic speed', process.env.SUPERTONIC_SPEED || '1.0');
|
|
56
|
+
const openvoiceDir = await ask('OpenVoice repo dir', process.env.OPENVOICE_DIR || './vendor/OpenVoice');
|
|
57
|
+
const openvoiceVenv = await ask('OpenVoice venv dir', process.env.OPENVOICE_VENV || './.venv-openvoice');
|
|
58
|
+
const openvoiceRefAudio = await ask('OpenVoice reference audio path', process.env.OPENVOICE_REF_AUDIO || './voice-samples/user-reference.wav');
|
|
59
|
+
const requireWake = (await ask('Require wake word? 1/0', process.env.REQUIRE_WAKE_WORD || '0')) === '1';
|
|
60
|
+
const verboseProgress = (await ask('Verbose progress by default? 1/0', process.env.AGENT_VERBOSE_PROGRESS || process.env.VERBALCODING_VERBOSE_PROGRESS || '0')) === '1';
|
|
61
|
+
const utteranceIdleMs = await ask('Utterance idle wait before STT, ms', process.env.UTTERANCE_IDLE_MS || '2000');
|
|
62
|
+
const latencyLogPath = await ask('Latency JSONL log path', process.env.LATENCY_LOG_PATH || './.logs/latency.jsonl');
|
|
63
|
+
|
|
64
|
+
const values = normalizeInstallAnswers({
|
|
65
|
+
harness,
|
|
66
|
+
agentLabel,
|
|
67
|
+
agentCommand,
|
|
68
|
+
discordBotToken,
|
|
69
|
+
allowedUsers,
|
|
70
|
+
autoJoinVoiceChannels,
|
|
71
|
+
transcriptChannelId,
|
|
72
|
+
language,
|
|
73
|
+
ttsBackend,
|
|
74
|
+
edgeTtsCommand,
|
|
75
|
+
ttsVoice,
|
|
76
|
+
ttsRate,
|
|
77
|
+
ttsVolume,
|
|
78
|
+
supertonicCommand,
|
|
79
|
+
supertonicVoice,
|
|
80
|
+
supertonicLanguage,
|
|
81
|
+
supertonicSteps,
|
|
82
|
+
supertonicSpeed,
|
|
83
|
+
openvoiceDir,
|
|
84
|
+
openvoiceVenv,
|
|
85
|
+
openvoiceRefAudio,
|
|
86
|
+
requireWakeWord: requireWake,
|
|
87
|
+
verboseProgress,
|
|
88
|
+
utteranceIdleMs,
|
|
89
|
+
latencyLogPath,
|
|
90
|
+
});
|
|
91
|
+
const envPath = path.join(ROOT, '.env');
|
|
92
|
+
if (fs.existsSync(envPath)) {
|
|
93
|
+
const backup = `${envPath}.bak-${Date.now()}`;
|
|
94
|
+
fs.copyFileSync(envPath, backup);
|
|
95
|
+
console.log(`Backed up existing .env to ${backup}`);
|
|
96
|
+
}
|
|
97
|
+
fs.writeFileSync(envPath, buildEnvFile(values), { mode: 0o600 });
|
|
98
|
+
console.log(`Wrote ${envPath}`);
|
|
99
|
+
console.log(renderInstallSummary(values));
|
|
100
|
+
} finally {
|
|
101
|
+
globalThis.__rl.close();
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
main().catch(err => {
|
|
106
|
+
console.error(err?.stack || err);
|
|
107
|
+
process.exit(1);
|
|
108
|
+
});
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
cd "$(dirname "$0")/.."
|
|
4
|
+
|
|
5
|
+
RUN_WIZARD=1
|
|
6
|
+
BOOTSTRAP_ARGS=()
|
|
7
|
+
INSTALL_ARGS=()
|
|
8
|
+
for arg in "$@"; do
|
|
9
|
+
case "$arg" in
|
|
10
|
+
--no-wizard) RUN_WIZARD=0 ;;
|
|
11
|
+
--skip-bootstrap) export VERBALCODING_SKIP_BOOTSTRAP=1 ;;
|
|
12
|
+
--yes|--skip-system|--skip-model|--skip-edge-tts) BOOTSTRAP_ARGS+=("$arg") ;;
|
|
13
|
+
*) INSTALL_ARGS+=("$arg") ;;
|
|
14
|
+
esac
|
|
15
|
+
done
|
|
16
|
+
|
|
17
|
+
if [ "${VERBALCODING_SKIP_BOOTSTRAP:-0}" != "1" ]; then
|
|
18
|
+
./scripts/bootstrap_prereqs.sh "${BOOTSTRAP_ARGS[@]}"
|
|
19
|
+
elif [ ! -d node_modules ]; then
|
|
20
|
+
if ! command -v node >/dev/null 2>&1; then
|
|
21
|
+
echo "node is required. Install Node.js first, or rerun without VERBALCODING_SKIP_BOOTSTRAP=1." >&2
|
|
22
|
+
exit 1
|
|
23
|
+
fi
|
|
24
|
+
npm install
|
|
25
|
+
fi
|
|
26
|
+
|
|
27
|
+
if [ -x "./.venv-tts/bin/edge-tts" ] && ! command -v edge-tts >/dev/null 2>&1; then
|
|
28
|
+
export EDGE_TTS_COMMAND="$(pwd)/.venv-tts/bin/edge-tts"
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
if [ "${VERBALCODING_SKIP_CLI_LINK:-0}" != "1" ]; then
|
|
32
|
+
if npm link >/dev/null 2>&1; then
|
|
33
|
+
echo "Installed shell CLI: vc"
|
|
34
|
+
else
|
|
35
|
+
echo "Warning: could not install shell CLI with npm link." >&2
|
|
36
|
+
echo "Run this later from the project root: npm link" >&2
|
|
37
|
+
fi
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
if [ "$RUN_WIZARD" = "1" ]; then
|
|
41
|
+
node scripts/install.mjs "${INSTALL_ARGS[@]}"
|
|
42
|
+
else
|
|
43
|
+
echo "Skipped interactive .env wizard. Run ./scripts/install.sh later or copy .env.example to .env."
|
|
44
|
+
fi
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
|
|
5
|
+
import { createVerbalCodingMcpTools, toolResultContent } from '../app-node/mcp_tools.mjs';
|
|
6
|
+
|
|
7
|
+
const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
|
|
8
|
+
const { toolDefs, tools } = createVerbalCodingMcpTools({ root: ROOT });
|
|
9
|
+
|
|
10
|
+
let input = '';
|
|
11
|
+
process.stdin.setEncoding('utf8');
|
|
12
|
+
process.stdin.on('data', chunk => {
|
|
13
|
+
input += chunk;
|
|
14
|
+
let idx;
|
|
15
|
+
while ((idx = input.indexOf('\n')) >= 0) {
|
|
16
|
+
const line = input.slice(0, idx).trim();
|
|
17
|
+
input = input.slice(idx + 1);
|
|
18
|
+
if (line) void handleLine(line);
|
|
19
|
+
}
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
function send(message) {
|
|
23
|
+
process.stdout.write(`${JSON.stringify(message)}\n`);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function sendResult(id, result) {
|
|
27
|
+
if (id === undefined || id === null) return;
|
|
28
|
+
send({ jsonrpc: '2.0', id, result });
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function sendError(id, code, message) {
|
|
32
|
+
if (id === undefined || id === null) return;
|
|
33
|
+
send({ jsonrpc: '2.0', id, error: { code, message } });
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async function handleLine(line) {
|
|
37
|
+
let request;
|
|
38
|
+
try {
|
|
39
|
+
request = JSON.parse(line);
|
|
40
|
+
} catch (e) {
|
|
41
|
+
sendError(null, -32700, 'Parse error');
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
const { id, method, params = {} } = request;
|
|
45
|
+
try {
|
|
46
|
+
if (method === 'initialize') {
|
|
47
|
+
sendResult(id, {
|
|
48
|
+
protocolVersion: params.protocolVersion || '2024-11-05',
|
|
49
|
+
capabilities: { tools: {} },
|
|
50
|
+
serverInfo: { name: 'verbalcoding', version: '0.1.0' },
|
|
51
|
+
});
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
if (method === 'notifications/initialized') return;
|
|
55
|
+
if (method === 'ping') {
|
|
56
|
+
sendResult(id, {});
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
if (method === 'tools/list') {
|
|
60
|
+
sendResult(id, {
|
|
61
|
+
tools: toolDefs.map(({ name, description, inputSchema }) => ({ name, description, inputSchema })),
|
|
62
|
+
});
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
if (method === 'tools/call') {
|
|
66
|
+
const name = params.name;
|
|
67
|
+
const tool = tools.get(name);
|
|
68
|
+
if (!tool) {
|
|
69
|
+
sendError(id, -32602, `Unknown tool: ${name}`);
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
try {
|
|
73
|
+
const result = await tool.handler(params.arguments || {});
|
|
74
|
+
sendResult(id, { content: toolResultContent(result), isError: false });
|
|
75
|
+
} catch (e) {
|
|
76
|
+
sendResult(id, { content: toolResultContent({ error: String(e?.message || e) }), isError: true });
|
|
77
|
+
}
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
sendError(id, -32601, `Method not found: ${method}`);
|
|
81
|
+
} catch (e) {
|
|
82
|
+
sendError(id, -32603, String(e?.message || e));
|
|
83
|
+
}
|
|
84
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Small OpenVoice smoke-test helper for VerbalCoding."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import subprocess
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def main() -> int:
|
|
13
|
+
parser = argparse.ArgumentParser(description="Run a short Korean OpenVoice smoke test")
|
|
14
|
+
parser.add_argument("--openvoice-dir", default="./vendor/OpenVoice")
|
|
15
|
+
parser.add_argument("--ref-audio", default="./voice-samples/user-reference.wav")
|
|
16
|
+
parser.add_argument("--output", default="/tmp/verbalcoding-openvoice-smoke.wav")
|
|
17
|
+
parser.add_argument("--text", default="안녕하세요. 버벌코딩 목소리 복제 테스트입니다.")
|
|
18
|
+
args = parser.parse_args()
|
|
19
|
+
script = Path(__file__).with_name("openvoice_synth.py")
|
|
20
|
+
cmd = [
|
|
21
|
+
sys.executable,
|
|
22
|
+
str(script),
|
|
23
|
+
"--openvoice-dir", args.openvoice_dir,
|
|
24
|
+
"--ref-audio", args.ref_audio,
|
|
25
|
+
"--text", args.text,
|
|
26
|
+
"--language", "KR",
|
|
27
|
+
"--style", "default",
|
|
28
|
+
"--output", args.output,
|
|
29
|
+
]
|
|
30
|
+
return subprocess.call(cmd)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
if __name__ == "__main__":
|
|
34
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Synthesize speech with OpenVoice V2 for VerbalCoding.
|
|
3
|
+
|
|
4
|
+
This wrapper intentionally imports OpenVoice lazily inside main(), so --help and
|
|
5
|
+
basic argument validation work before the optional OpenVoice environment is set up.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import sys
|
|
13
|
+
import tempfile
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_args() -> argparse.Namespace:
|
|
17
|
+
parser = argparse.ArgumentParser(description="OpenVoice V2 synthesis wrapper for VerbalCoding")
|
|
18
|
+
parser.add_argument("--openvoice-dir", required=True, help="Path to cloned myshell-ai/OpenVoice repo")
|
|
19
|
+
parser.add_argument("--ref-audio", required=True, help="User-owned reference voice sample")
|
|
20
|
+
parser.add_argument("--text", required=True, help="Text to synthesize")
|
|
21
|
+
parser.add_argument("--language", default="KR", help="MeloTTS language code, e.g. KR")
|
|
22
|
+
parser.add_argument("--style", default="default", help="Reserved style label for future OpenVoice control")
|
|
23
|
+
parser.add_argument("--output", required=True, help="Output WAV path")
|
|
24
|
+
parser.add_argument("--speed", type=float, default=1.0, help="Base TTS speed")
|
|
25
|
+
return parser.parse_args()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def fail(message: str, code: int = 2) -> None:
|
|
29
|
+
print(f"openvoice_synth: {message}", file=sys.stderr)
|
|
30
|
+
raise SystemExit(code)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def main() -> int:
|
|
34
|
+
args = parse_args()
|
|
35
|
+
openvoice_dir = Path(args.openvoice_dir).expanduser().resolve()
|
|
36
|
+
ref_audio = Path(args.ref_audio).expanduser().resolve()
|
|
37
|
+
output = Path(args.output).expanduser().resolve()
|
|
38
|
+
|
|
39
|
+
if not openvoice_dir.exists():
|
|
40
|
+
fail(f"OpenVoice directory not found: {openvoice_dir}")
|
|
41
|
+
if not ref_audio.exists():
|
|
42
|
+
fail(f"reference audio not found: {ref_audio}")
|
|
43
|
+
if not args.text.strip():
|
|
44
|
+
fail("text is empty")
|
|
45
|
+
|
|
46
|
+
sys.path.insert(0, str(openvoice_dir))
|
|
47
|
+
try:
|
|
48
|
+
import torch # type: ignore
|
|
49
|
+
from melo.api import TTS # type: ignore
|
|
50
|
+
from openvoice.api import ToneColorConverter # type: ignore
|
|
51
|
+
except Exception as exc: # pragma: no cover - depends on optional env
|
|
52
|
+
fail(
|
|
53
|
+
"OpenVoice/MeloTTS import failed. Run scripts/setup_openvoice.sh "
|
|
54
|
+
f"and install checkpoints first. Detail: {exc}",
|
|
55
|
+
code=3,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
ckpt_converter = openvoice_dir / "checkpoints_v2" / "converter"
|
|
59
|
+
config = ckpt_converter / "config.json"
|
|
60
|
+
checkpoint = ckpt_converter / "checkpoint.pth"
|
|
61
|
+
if not config.exists() or not checkpoint.exists():
|
|
62
|
+
fail("OpenVoice V2 checkpoints missing under vendor/OpenVoice/checkpoints_v2/converter", code=4)
|
|
63
|
+
|
|
64
|
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
65
|
+
if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
|
|
66
|
+
# The OpenVoice demo disables MPS for parts of MeloTTS; CPU is more predictable on macOS.
|
|
67
|
+
device = "cpu"
|
|
68
|
+
|
|
69
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
70
|
+
with tempfile.TemporaryDirectory(prefix="verbalcoding-openvoice-") as tmp:
|
|
71
|
+
tmp_wav = Path(tmp) / "base.wav"
|
|
72
|
+
tone_color_converter = ToneColorConverter(str(config), device=device)
|
|
73
|
+
tone_color_converter.watermark_model = None
|
|
74
|
+
tone_color_converter.load_ckpt(str(checkpoint))
|
|
75
|
+
target_se = tone_color_converter.extract_se([str(ref_audio)])
|
|
76
|
+
|
|
77
|
+
model = TTS(language=args.language, device=device)
|
|
78
|
+
speaker_ids = model.hps.data.spk2id
|
|
79
|
+
speaker_key = next(iter(speaker_ids.keys()))
|
|
80
|
+
speaker_id = speaker_ids[speaker_key]
|
|
81
|
+
speaker_file_key = speaker_key.lower().replace("_", "-")
|
|
82
|
+
source_se_path = openvoice_dir / "checkpoints_v2" / "base_speakers" / "ses" / f"{speaker_file_key}.pth"
|
|
83
|
+
if not source_se_path.exists():
|
|
84
|
+
fail(f"source speaker embedding missing: {source_se_path}", code=4)
|
|
85
|
+
source_se = torch.load(str(source_se_path), map_location=device)
|
|
86
|
+
|
|
87
|
+
model.tts_to_file(args.text, speaker_id, str(tmp_wav), speed=args.speed)
|
|
88
|
+
tone_color_converter.convert(
|
|
89
|
+
audio_src_path=str(tmp_wav),
|
|
90
|
+
src_se=source_se,
|
|
91
|
+
tgt_se=target_se,
|
|
92
|
+
output_path=str(output),
|
|
93
|
+
message="@VerbalCoding",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if not output.exists() or output.stat().st_size <= 0:
|
|
97
|
+
fail("OpenVoice produced empty output", code=5)
|
|
98
|
+
print(f"openvoice_synth: wrote {output}")
|
|
99
|
+
return 0
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
if __name__ == "__main__":
|
|
103
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
cd "$(dirname "$0")/.."
|
|
4
|
+
|
|
5
|
+
OPENVOICE_DIR="${OPENVOICE_DIR:-vendor/OpenVoice}"
|
|
6
|
+
OPENVOICE_VENV="${OPENVOICE_VENV:-.venv-openvoice}"
|
|
7
|
+
|
|
8
|
+
mkdir -p "$(dirname "$OPENVOICE_DIR")" voice-samples
|
|
9
|
+
if [ ! -d "$OPENVOICE_DIR/.git" ]; then
|
|
10
|
+
git clone https://github.com/myshell-ai/OpenVoice "$OPENVOICE_DIR"
|
|
11
|
+
else
|
|
12
|
+
echo "OpenVoice repo already exists: $OPENVOICE_DIR"
|
|
13
|
+
fi
|
|
14
|
+
|
|
15
|
+
if [ ! -x "$OPENVOICE_VENV/bin/python" ]; then
|
|
16
|
+
python3 -m venv "$OPENVOICE_VENV"
|
|
17
|
+
fi
|
|
18
|
+
# shellcheck disable=SC1091
|
|
19
|
+
. "$OPENVOICE_VENV/bin/activate"
|
|
20
|
+
python -m pip install --upgrade pip setuptools wheel
|
|
21
|
+
python -m pip install -e "$OPENVOICE_DIR"
|
|
22
|
+
python -m pip install git+https://github.com/myshell-ai/MeloTTS.git
|
|
23
|
+
python -m unidic download || true
|
|
24
|
+
|
|
25
|
+
cat <<'MSG'
|
|
26
|
+
OpenVoice Python environment is installed.
|
|
27
|
+
Next manual steps:
|
|
28
|
+
1. Download OpenVoice V2 checkpoints from:
|
|
29
|
+
https://myshell-public-repo-host.s3.amazonaws.com/openvoice/checkpoints_v2_0417.zip
|
|
30
|
+
2. Extract them under vendor/OpenVoice/checkpoints_v2/
|
|
31
|
+
3. Put a permitted reference sample at voice-samples/user-reference.wav
|
|
32
|
+
4. Run: python3 scripts/openvoice_smoke.py
|
|
33
|
+
5. Set TTS_BACKEND=openvoice in .env and restart VerbalCoding.
|
|
34
|
+
MSG
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
5
|
+
VENV="${SUPERTONIC_VENV:-$ROOT/.venv-supertonic}"
|
|
6
|
+
CACHE_DIR="${SUPERTONIC_CACHE_DIR:-$ROOT/.cache/supertonic}"
|
|
7
|
+
|
|
8
|
+
python3 -m venv "$VENV"
|
|
9
|
+
"$VENV/bin/python" -m pip install --upgrade pip wheel setuptools
|
|
10
|
+
"$VENV/bin/python" -m pip install supertonic
|
|
11
|
+
mkdir -p "$CACHE_DIR"
|
|
12
|
+
|
|
13
|
+
# Pre-download the model when the installed CLI supports it. If the command is
|
|
14
|
+
# unavailable in an older package build, leave download to the first synth call.
|
|
15
|
+
SUPERTONIC_CACHE_DIR="$CACHE_DIR" "$VENV/bin/supertonic" download || true
|
|
16
|
+
|
|
17
|
+
echo "Supertonic installed: $VENV/bin/supertonic"
|
|
18
|
+
echo "Set SUPERTONIC_COMMAND=\"$VENV/bin/supertonic\" and TTS_BACKEND=\"supertonic\" to use it."
|