verbalcoding 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/.env.example +83 -0
  2. package/LICENSE +21 -0
  3. package/README.md +157 -0
  4. package/app-node/agent_adapters.mjs +576 -0
  5. package/app-node/agent_adapters.test.mjs +455 -0
  6. package/app-node/agent_contract.mjs +45 -0
  7. package/app-node/barge_in.mjs +148 -0
  8. package/app-node/barge_in.test.mjs +179 -0
  9. package/app-node/bridge_logger.mjs +66 -0
  10. package/app-node/bridge_logger.test.mjs +73 -0
  11. package/app-node/bridge_state.mjs +104 -0
  12. package/app-node/bridge_state.test.mjs +64 -0
  13. package/app-node/cli_install.test.mjs +97 -0
  14. package/app-node/deferred_queue.mjs +12 -0
  15. package/app-node/deferred_queue.test.mjs +20 -0
  16. package/app-node/discord_invite_cli.test.mjs +31 -0
  17. package/app-node/discord_text.mjs +29 -0
  18. package/app-node/discord_text.test.mjs +32 -0
  19. package/app-node/hermes_profiles.mjs +164 -0
  20. package/app-node/hermes_profiles.test.mjs +276 -0
  21. package/app-node/install_config.mjs +263 -0
  22. package/app-node/install_config.test.mjs +205 -0
  23. package/app-node/instance_doctor.mjs +137 -0
  24. package/app-node/instance_doctor.test.mjs +128 -0
  25. package/app-node/instance_profile_lifecycle.mjs +16 -0
  26. package/app-node/instances.mjs +153 -0
  27. package/app-node/instances.test.mjs +102 -0
  28. package/app-node/language_config.mjs +73 -0
  29. package/app-node/language_config.test.mjs +51 -0
  30. package/app-node/latency_metrics.mjs +133 -0
  31. package/app-node/latency_metrics.test.mjs +71 -0
  32. package/app-node/main.mjs +1771 -0
  33. package/app-node/mcp_tools.mjs +198 -0
  34. package/app-node/mcp_tools.test.mjs +39 -0
  35. package/app-node/progress_cache.mjs +7 -0
  36. package/app-node/progress_cache.test.mjs +23 -0
  37. package/app-node/progress_speech.mjs +102 -0
  38. package/app-node/progress_speech.test.mjs +48 -0
  39. package/app-node/project_sessions.mjs +148 -0
  40. package/app-node/project_sessions.test.mjs +77 -0
  41. package/app-node/restart_notice.mjs +57 -0
  42. package/app-node/restart_notice.test.mjs +37 -0
  43. package/app-node/restart_policy.mjs +27 -0
  44. package/app-node/restart_policy.test.mjs +33 -0
  45. package/app-node/text_routing.mjs +8 -0
  46. package/app-node/text_routing.test.mjs +18 -0
  47. package/app-node/tts_backends.mjs +251 -0
  48. package/app-node/tts_backends.test.mjs +400 -0
  49. package/app-node/tts_chunks.mjs +57 -0
  50. package/app-node/tts_chunks.test.mjs +35 -0
  51. package/app-node/tts_prefetch.mjs +38 -0
  52. package/app-node/tts_prefetch.test.mjs +49 -0
  53. package/app-node/tts_settings.mjs +72 -0
  54. package/app-node/tts_settings.test.mjs +127 -0
  55. package/app-node/tts_voice_config.mjs +127 -0
  56. package/app-node/tts_voice_config.test.mjs +64 -0
  57. package/app-node/voice_clone_capture.mjs +76 -0
  58. package/app-node/voice_clone_capture.test.mjs +51 -0
  59. package/app-node/voice_messages.mjs +62 -0
  60. package/app-node/voice_messages.test.mjs +33 -0
  61. package/docs/CONFIGURATION.md +183 -0
  62. package/docs/FRESH_INSTALL.md +193 -0
  63. package/docs/MULTI_INSTANCE.md +183 -0
  64. package/docs/RELEASE.md +72 -0
  65. package/docs/USAGE.md +108 -0
  66. package/docs/assets/figures/verbalcoding-flow.svg +63 -0
  67. package/docs/i18n/README.es.md +121 -0
  68. package/docs/i18n/README.fr.md +121 -0
  69. package/docs/i18n/README.ja.md +121 -0
  70. package/docs/i18n/README.ko.md +121 -0
  71. package/docs/i18n/README.ru.md +121 -0
  72. package/docs/i18n/README.zh.md +121 -0
  73. package/package.json +58 -0
  74. package/run.sh +82 -0
  75. package/scripts/bootstrap_prereqs.sh +193 -0
  76. package/scripts/cli.mjs +369 -0
  77. package/scripts/docker_ubuntu_smoke.sh +76 -0
  78. package/scripts/doctor.mjs +134 -0
  79. package/scripts/install.mjs +108 -0
  80. package/scripts/install.sh +44 -0
  81. package/scripts/mcp-server.mjs +84 -0
  82. package/scripts/openvoice_smoke.py +34 -0
  83. package/scripts/openvoice_synth.py +103 -0
  84. package/scripts/setup_openvoice.sh +34 -0
  85. package/scripts/setup_supertonic.sh +18 -0
@@ -0,0 +1,127 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import path from 'node:path';
4
+
5
+ import { buildTtsSettings } from './tts_settings.mjs';
6
+
7
+ test('buildTtsSettings defaults to Edge TTS with Korean voice', () => {
8
+ const root = '/project';
9
+ const settings = buildTtsSettings({}, root);
10
+
11
+ assert.equal(settings.backend, 'edge');
12
+ assert.equal(settings.edge.command, 'edge-tts');
13
+ assert.equal(settings.edge.voice, 'ko-KR-SunHiNeural');
14
+ assert.equal(settings.edge.rate, '+10%');
15
+ assert.equal(settings.maxChars, 495);
16
+ assert.equal(settings.volume, 1.0);
17
+ assert.equal(settings.progressCacheDir, path.join(root, '.cache', 'progress-tts'));
18
+ });
19
+
20
+ test('buildTtsSettings allows explicit Edge TTS command path', () => {
21
+ const root = '/project';
22
+ const settings = buildTtsSettings({ EDGE_TTS_COMMAND: '/project/.venv/bin/edge-tts' }, root);
23
+
24
+ assert.equal(settings.edge.command, '/project/.venv/bin/edge-tts');
25
+ });
26
+
27
+ test('buildTtsSettings normalizes OpenVoice settings and keeps Edge fallback', () => {
28
+ const root = '/project';
29
+ const settings = buildTtsSettings({
30
+ TTS_BACKEND: 'openvoice',
31
+ TTS_VOICE: 'ko-KR-InJoonNeural',
32
+ TTS_RATE: '+5%',
33
+ OPENVOICE_DIR: './vendor/OpenVoice',
34
+ OPENVOICE_VENV: './.venv-openvoice',
35
+ OPENVOICE_REF_AUDIO: './voice-samples/me.wav',
36
+ OPENVOICE_LANGUAGE: 'KR',
37
+ OPENVOICE_STYLE: 'cheerful',
38
+ OPENVOICE_TIMEOUT_MS: '12345',
39
+ OPENVOICE_PROGRESS: '1',
40
+ TTS_MAX_CHARS: '333',
41
+ PROGRESS_TTS_CACHE_DIR: './.cache/progress',
42
+ }, root);
43
+
44
+ assert.equal(settings.backend, 'openvoice');
45
+ assert.equal(settings.edge.voice, 'ko-KR-InJoonNeural');
46
+ assert.equal(settings.edge.rate, '+5%');
47
+ assert.equal(settings.maxChars, 333);
48
+ assert.equal(settings.progressCacheDir, path.join(root, '.cache', 'progress'));
49
+ assert.equal(settings.openvoice.dir, path.join(root, 'vendor', 'OpenVoice'));
50
+ assert.equal(settings.openvoice.venv, path.join(root, '.venv-openvoice'));
51
+ assert.equal(settings.openvoice.refAudio, path.join(root, 'voice-samples', 'me.wav'));
52
+ assert.equal(settings.openvoice.language, 'KR');
53
+ assert.equal(settings.openvoice.style, 'cheerful');
54
+ assert.equal(settings.openvoice.timeoutMs, 12345);
55
+ assert.equal(settings.openvoice.useForProgress, true);
56
+ });
57
+
58
+ test('buildTtsSettings normalizes speech-swift CosyVoice settings', () => {
59
+ const root = '/project';
60
+ const settings = buildTtsSettings({
61
+ TTS_BACKEND: 'speechswift',
62
+ SPEECHSWIFT_COMMAND: 'audio',
63
+ SPEECHSWIFT_ENGINE: 'cosyvoice',
64
+ SPEECHSWIFT_LANGUAGE: 'korean',
65
+ SPEECHSWIFT_REF_AUDIO: './voice-samples/me.wav',
66
+ SPEECHSWIFT_MODEL_ID: 'aufklarer/CosyVoice3-0.5B-MLX-4bit',
67
+ SPEECHSWIFT_TIMEOUT_MS: '120000',
68
+ SPEECHSWIFT_STREAM: '1',
69
+ SPEECHSWIFT_PROGRESS: '0',
70
+ SPEECHSWIFT_MODE: 'server',
71
+ SPEECHSWIFT_SERVER_URL: 'http://127.0.0.1:18080/',
72
+ }, root);
73
+
74
+ assert.equal(settings.backend, 'speechswift');
75
+ assert.equal(settings.speechswift.command, 'audio');
76
+ assert.equal(settings.speechswift.engine, 'cosyvoice');
77
+ assert.equal(settings.speechswift.language, 'korean');
78
+ assert.equal(settings.speechswift.refAudio, path.join(root, 'voice-samples', 'me.wav'));
79
+ assert.equal(settings.speechswift.modelId, 'aufklarer/CosyVoice3-0.5B-MLX-4bit');
80
+ assert.equal(settings.speechswift.timeoutMs, 120000);
81
+ assert.equal(settings.speechswift.stream, true);
82
+ assert.equal(settings.speechswift.useForProgress, false);
83
+ assert.equal(settings.speechswift.mode, 'server');
84
+ assert.equal(settings.speechswift.serverUrl, 'http://127.0.0.1:18080');
85
+ });
86
+
87
+ test('buildTtsSettings normalizes Supertonic local backend settings', () => {
88
+ const root = '/project';
89
+ const settings = buildTtsSettings({
90
+ TTS_BACKEND: 'supertonic',
91
+ SUPERTONIC_COMMAND: './.venv-supertonic/bin/supertonic',
92
+ SUPERTONIC_VOICE: 'M4',
93
+ SUPERTONIC_LANGUAGE: 'ko',
94
+ SUPERTONIC_STEPS: '3',
95
+ SUPERTONIC_SPEED: '1.15',
96
+ SUPERTONIC_MAX_CHUNK_LENGTH: '240',
97
+ SUPERTONIC_SILENCE_DURATION: '0.1',
98
+ SUPERTONIC_CUSTOM_STYLE_PATH: './voice-styles/custom.json',
99
+ SUPERTONIC_TIMEOUT_MS: '45000',
100
+ SUPERTONIC_PROGRESS: '1',
101
+ SUPERTONIC_CACHE_DIR: './.cache/supertonic',
102
+ SUPERTONIC_INTRA_OP_THREADS: '4',
103
+ SUPERTONIC_INTER_OP_THREADS: '1',
104
+ TTS_VOLUME: '1.6',
105
+ }, root);
106
+
107
+ assert.equal(settings.backend, 'supertonic');
108
+ assert.equal(settings.volume, 1.6);
109
+ assert.equal(settings.supertonic.command, './.venv-supertonic/bin/supertonic');
110
+ assert.equal(settings.supertonic.voice, 'M4');
111
+ assert.equal(settings.supertonic.language, 'ko');
112
+ assert.equal(settings.supertonic.steps, 3);
113
+ assert.equal(settings.supertonic.speed, 1.15);
114
+ assert.equal(settings.supertonic.maxChunkLength, 240);
115
+ assert.equal(settings.supertonic.silenceDuration, 0.1);
116
+ assert.equal(settings.supertonic.customStylePath, path.join(root, 'voice-styles', 'custom.json'));
117
+ assert.equal(settings.supertonic.timeoutMs, 45000);
118
+ assert.equal(settings.supertonic.useForProgress, true);
119
+ assert.equal(settings.supertonic.cacheDir, path.join(root, '.cache', 'supertonic'));
120
+ assert.equal(settings.supertonic.intraOpThreads, '4');
121
+ assert.equal(settings.supertonic.interOpThreads, '1');
122
+ });
123
+
124
+ test('buildTtsSettings falls back to edge for unsupported backend', () => {
125
+ const settings = buildTtsSettings({ TTS_BACKEND: 'unknown' }, '/project');
126
+ assert.equal(settings.backend, 'edge');
127
+ });
@@ -0,0 +1,127 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+
4
+ export const DEFAULT_TTS_VOICE_CONFIG = {
5
+ currentBackend: 'edge',
6
+ currentVoiceType: 'english_male',
7
+ backends: {
8
+ edge: {
9
+ currentVoiceType: 'english_male',
10
+ voices: {
11
+ english_male: { label: 'English male', language: 'en', voice: 'en-US-GuyNeural' },
12
+ english_female: { label: 'English female', language: 'en', voice: 'en-US-AriaNeural' },
13
+ korean_male: { label: 'Korean male', language: 'ko', voice: 'ko-KR-InJoonNeural' },
14
+ korean_female: { label: 'Korean female', language: 'ko', voice: 'ko-KR-SunHiNeural' },
15
+ korean_multilingual_male: { label: 'Korean multilingual male', language: 'ko', voice: 'ko-KR-HyunsuMultilingualNeural' },
16
+ },
17
+ },
18
+ openvoice: {
19
+ currentVoiceType: 'cloned_reference',
20
+ voices: {
21
+ cloned_reference: { label: 'OpenVoice reference sample', language: 'ko', voice: 'voice-samples/user-reference.wav' },
22
+ },
23
+ },
24
+ speechswift: {
25
+ currentVoiceType: 'cosyvoice_reference',
26
+ voices: {
27
+ cosyvoice_reference: { label: 'CosyVoice reference sample', language: 'ko', voice: 'voice-samples/user-reference.wav' },
28
+ qwen3_default: { label: 'Qwen3 default speaker', language: 'ko', voice: 'qwen3_default' },
29
+ },
30
+ },
31
+ supertonic: {
32
+ currentVoiceType: 'm1',
33
+ voices: {
34
+ m1: { label: 'Supertonic M1', language: 'ko', voice: 'M1' },
35
+ },
36
+ },
37
+ },
38
+ };
39
+
40
+ function clone(value) {
41
+ return JSON.parse(JSON.stringify(value));
42
+ }
43
+
44
+ export function defaultTtsVoiceConfig() {
45
+ return clone(DEFAULT_TTS_VOICE_CONFIG);
46
+ }
47
+
48
+ function normalizeBackend(value, config) {
49
+ const key = String(value || '').trim().toLowerCase();
50
+ return config.backends?.[key] ? key : 'edge';
51
+ }
52
+
53
+ function normalizeVoiceType(backendConfig, requested) {
54
+ const key = String(requested || backendConfig?.currentVoiceType || '').trim();
55
+ if (key && backendConfig?.voices?.[key]) return key;
56
+ return Object.keys(backendConfig?.voices || {})[0] || '';
57
+ }
58
+
59
+ export function readTtsVoiceConfig(configPath, fallback = DEFAULT_TTS_VOICE_CONFIG) {
60
+ try {
61
+ if (!configPath || !fs.existsSync(configPath)) return clone(fallback);
62
+ const parsed = JSON.parse(fs.readFileSync(configPath, 'utf8'));
63
+ return { ...clone(fallback), ...parsed, backends: { ...clone(fallback).backends, ...(parsed.backends || {}) } };
64
+ } catch {
65
+ return clone(fallback);
66
+ }
67
+ }
68
+
69
+ export function writeTtsVoiceConfig(configPath, config) {
70
+ fs.mkdirSync(path.dirname(configPath), { recursive: true });
71
+ fs.writeFileSync(configPath, `${JSON.stringify(config, null, 2)}\n`, { mode: 0o600 });
72
+ }
73
+
74
+ export function effectiveTtsVoiceSelection(config, env = {}) {
75
+ const merged = config || defaultTtsVoiceConfig();
76
+ const backend = normalizeBackend(env.TTS_BACKEND || merged.currentBackend, merged);
77
+ const backendConfig = merged.backends[backend] || merged.backends.edge;
78
+ const voiceType = normalizeVoiceType(backendConfig, env.TTS_VOICE_TYPE || merged.currentVoiceType || backendConfig.currentVoiceType);
79
+ const voice = backendConfig.voices[voiceType];
80
+ return { backend, voiceType, voice, backendConfig };
81
+ }
82
+
83
+ export function applyTtsVoiceSelectionToEnv(env = {}, selection) {
84
+ const next = { ...env, TTS_BACKEND: selection.backend, TTS_VOICE_TYPE: selection.voiceType };
85
+ if (selection.backend === 'edge') next.TTS_VOICE = selection.voice.voice;
86
+ if (selection.voice?.language) next.VOICE_LANGUAGE = selection.voice.language;
87
+ return next;
88
+ }
89
+
90
+ export function updateTtsVoiceConfig(config, { backend, voiceType } = {}) {
91
+ const next = clone(config || DEFAULT_TTS_VOICE_CONFIG);
92
+ const selectedBackend = normalizeBackend(backend || next.currentBackend, next);
93
+ const backendConfig = next.backends[selectedBackend];
94
+ const selectedVoiceType = normalizeVoiceType(backendConfig, voiceType || next.currentVoiceType || backendConfig.currentVoiceType);
95
+ next.currentBackend = selectedBackend;
96
+ next.currentVoiceType = selectedVoiceType;
97
+ backendConfig.currentVoiceType = selectedVoiceType;
98
+ return next;
99
+ }
100
+
101
+ export function preferredVoiceTypeForLanguage(config, language) {
102
+ const lang = /^ko/i.test(String(language || '')) ? 'ko' : 'en';
103
+ const backend = normalizeBackend(config.currentBackend, config);
104
+ const voices = config.backends[backend]?.voices || {};
105
+ const preferred = lang === 'ko'
106
+ ? ['korean_male', 'korean_female', 'korean_multilingual_male']
107
+ : ['english_male', 'english_female'];
108
+ for (const key of preferred) if (voices[key]?.language === lang) return key;
109
+ return Object.entries(voices).find(([, voice]) => voice.language === lang)?.[0] || Object.keys(voices)[0] || '';
110
+ }
111
+
112
+ export function voiceCommandFromTranscript(text) {
113
+ const raw = String(text || '').trim();
114
+ if (!raw) return null;
115
+ const compact = raw.toLowerCase().replace(/\s+/g, '');
116
+ const looksLikeVoice = /\b(change|switch|set)\b.*\b(voice|speaker)\b/i.test(raw)
117
+ || /\b(voice|speaker)\b.*\b(to|as)\b/i.test(raw)
118
+ || /(목소리|음성).*(바꿔|변경|설정|해줘)|목소리.*로|음성.*로/u.test(compact);
119
+ if (!looksLikeVoice) return null;
120
+ const language = /(korean|한국|한글|ko-kr|kor)/iu.test(raw) ? 'ko' : /(english|영어|en-us|eng)/iu.test(raw) ? 'en' : null;
121
+ const gender = /(female|woman|여자|여성)/iu.test(raw) ? 'female' : /(male|man|남자|남성)/iu.test(raw) ? 'male' : null;
122
+ if (language === 'ko' && gender === 'female') return { voiceType: 'korean_female' };
123
+ if (language === 'ko') return { voiceType: 'korean_male' };
124
+ if (language === 'en' && gender === 'female') return { voiceType: 'english_female' };
125
+ if (language === 'en') return { voiceType: 'english_male' };
126
+ return null;
127
+ }
@@ -0,0 +1,64 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import fs from 'node:fs';
4
+ import os from 'node:os';
5
+ import path from 'node:path';
6
+
7
+ import {
8
+ applyTtsVoiceSelectionToEnv,
9
+ defaultTtsVoiceConfig,
10
+ effectiveTtsVoiceSelection,
11
+ preferredVoiceTypeForLanguage,
12
+ readTtsVoiceConfig,
13
+ updateTtsVoiceConfig,
14
+ voiceCommandFromTranscript,
15
+ writeTtsVoiceConfig,
16
+ } from './tts_voice_config.mjs';
17
+
18
+ test('effectiveTtsVoiceSelection reads backend and voice type from config', () => {
19
+ const config = defaultTtsVoiceConfig();
20
+ config.currentBackend = 'edge';
21
+ config.currentVoiceType = 'korean_female';
22
+
23
+ const selected = effectiveTtsVoiceSelection(config, {});
24
+
25
+ assert.equal(selected.backend, 'edge');
26
+ assert.equal(selected.voiceType, 'korean_female');
27
+ assert.equal(selected.voice.voice, 'ko-KR-SunHiNeural');
28
+ });
29
+
30
+ test('applyTtsVoiceSelectionToEnv updates backend voice and voice language together', () => {
31
+ const selected = effectiveTtsVoiceSelection(updateTtsVoiceConfig(defaultTtsVoiceConfig(), { voiceType: 'korean_male' }), {});
32
+
33
+ assert.deepEqual(applyTtsVoiceSelectionToEnv({ TTS_RATE: '+0%' }, selected), {
34
+ TTS_RATE: '+0%',
35
+ TTS_BACKEND: 'edge',
36
+ TTS_VOICE_TYPE: 'korean_male',
37
+ TTS_VOICE: 'ko-KR-InJoonNeural',
38
+ VOICE_LANGUAGE: 'ko',
39
+ });
40
+ });
41
+
42
+ test('preferredVoiceTypeForLanguage maps language to available backend voice type', () => {
43
+ const config = defaultTtsVoiceConfig();
44
+ assert.equal(preferredVoiceTypeForLanguage(config, 'ko'), 'korean_male');
45
+ assert.equal(preferredVoiceTypeForLanguage(config, 'en'), 'english_male');
46
+ });
47
+
48
+ test('voiceCommandFromTranscript detects voice type changes', () => {
49
+ assert.deepEqual(voiceCommandFromTranscript('change voice to Korean female'), { voiceType: 'korean_female' });
50
+ assert.deepEqual(voiceCommandFromTranscript('남자 한국어 목소리로 바꿔'), { voiceType: 'korean_male' });
51
+ assert.deepEqual(voiceCommandFromTranscript('switch speaker to English'), { voiceType: 'english_male' });
52
+ assert.equal(voiceCommandFromTranscript('change language to Korean'), null);
53
+ });
54
+
55
+ test('read and write voice config round trips current selection', () => {
56
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'voice-config-test-'));
57
+ const file = path.join(dir, 'tts-voices.json');
58
+ const config = updateTtsVoiceConfig(defaultTtsVoiceConfig(), { voiceType: 'korean_female' });
59
+
60
+ writeTtsVoiceConfig(file, config);
61
+ const loaded = readTtsVoiceConfig(file);
62
+
63
+ assert.equal(loaded.currentVoiceType, 'korean_female');
64
+ });
@@ -0,0 +1,76 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+
4
+ function compact(text) {
5
+ return String(text || '').replace(/\s+/g, '').toLowerCase();
6
+ }
7
+
8
+ export function voiceCloneCommandFromText(text) {
9
+ const c = compact(text);
10
+ const mentionsVoiceClone = /(목소리|음성|보이스|voice).*(클론|클로닝|clone|샘플|sample|참조|reference)|(클론|클로닝|clone).*(목소리|음성|보이스|voice)/u.test(c);
11
+ if (!mentionsVoiceClone) return null;
12
+ if (/(취소|cancel|그만|중지|멈춰)/u.test(c)) return { action: 'cancel' };
13
+ if (/(상태|status|확인|됐|준비)/u.test(c)) return { action: 'status' };
14
+ if (/(녹음|저장|캡처|capture|record|따|시작|받아|만들)/u.test(c)) return { action: 'start' };
15
+ return null;
16
+ }
17
+
18
+ export function createVoiceCloneCaptureState({ defaultTargetPath }) {
19
+ let armed = null;
20
+ return {
21
+ arm({ userId, targetPath = defaultTargetPath, source = 'command' }) {
22
+ armed = { userId: String(userId), targetPath, source, armedAt: Date.now() };
23
+ return armed;
24
+ },
25
+ cancel(userId = null) {
26
+ if (!armed) return null;
27
+ if (userId != null && String(userId) !== armed.userId) return null;
28
+ const previous = armed;
29
+ armed = null;
30
+ return previous;
31
+ },
32
+ current() {
33
+ return armed;
34
+ },
35
+ consume(userId) {
36
+ if (!armed || String(userId) !== armed.userId) return null;
37
+ const previous = armed;
38
+ armed = null;
39
+ return previous;
40
+ },
41
+ };
42
+ }
43
+
44
+ export function isVoiceCloneCaptureArmedFor(state, userId) {
45
+ return state.current()?.userId === String(userId);
46
+ }
47
+
48
+ export async function saveVoiceCloneReference({
49
+ sourceWav,
50
+ targetPath,
51
+ execFileAsync,
52
+ mkdirSync = fs.mkdirSync,
53
+ existsSync = fs.existsSync,
54
+ statSync = fs.statSync,
55
+ }) {
56
+ mkdirSync(path.dirname(targetPath), { recursive: true });
57
+ await execFileAsync('ffmpeg', [
58
+ '-y',
59
+ '-hide_banner',
60
+ '-loglevel',
61
+ 'error',
62
+ '-i',
63
+ sourceWav,
64
+ '-ac',
65
+ '1',
66
+ '-ar',
67
+ '16000',
68
+ '-sample_fmt',
69
+ 's16',
70
+ targetPath,
71
+ ], { timeout: 30000, maxBuffer: 1024 * 1024 });
72
+ if (!existsSync(targetPath) || statSync(targetPath).size <= 0) {
73
+ throw new Error(`voice clone reference was not written: ${targetPath}`);
74
+ }
75
+ return targetPath;
76
+ }
@@ -0,0 +1,51 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+ import path from 'node:path';
4
+
5
+ import {
6
+ voiceCloneCommandFromText,
7
+ createVoiceCloneCaptureState,
8
+ isVoiceCloneCaptureArmedFor,
9
+ saveVoiceCloneReference,
10
+ } from './voice_clone_capture.mjs';
11
+
12
+ test('voiceCloneCommandFromText detects Korean voice commands for arming capture', () => {
13
+ assert.equal(voiceCloneCommandFromText('내 목소리 샘플 녹음 시작해').action, 'start');
14
+ assert.equal(voiceCloneCommandFromText('보이스 클로닝 샘플 저장 취소').action, 'cancel');
15
+ assert.equal(voiceCloneCommandFromText('보이스 클론 상태 알려줘').action, 'status');
16
+ assert.equal(voiceCloneCommandFromText('그냥 테스트야'), null);
17
+ });
18
+
19
+ test('createVoiceCloneCaptureState arms and consumes only the requested user', () => {
20
+ const state = createVoiceCloneCaptureState({ defaultTargetPath: '/project/voice-samples/user-reference.wav' });
21
+ const armed = state.arm({ userId: 'u1', source: 'voice-command' });
22
+
23
+ assert.equal(armed.targetPath, '/project/voice-samples/user-reference.wav');
24
+ assert.equal(isVoiceCloneCaptureArmedFor(state, 'u1'), true);
25
+ assert.equal(isVoiceCloneCaptureArmedFor(state, 'u2'), false);
26
+ assert.equal(state.consume('u2'), null);
27
+ assert.equal(state.consume('u1').source, 'voice-command');
28
+ assert.equal(isVoiceCloneCaptureArmedFor(state, 'u1'), false);
29
+ });
30
+
31
+ test('saveVoiceCloneReference normalizes the captured Discord WAV with ffmpeg', async () => {
32
+ const calls = [];
33
+ const target = '/project/voice-samples/user-reference.wav';
34
+ await saveVoiceCloneReference({
35
+ sourceWav: '/tmp/utterance.wav',
36
+ targetPath: target,
37
+ execFileAsync: async (cmd, args, options) => calls.push({ cmd, args, options }),
38
+ mkdirSync: dir => calls.push({ mkdir: dir }),
39
+ existsSync: () => true,
40
+ statSync: () => ({ size: 12345 }),
41
+ });
42
+
43
+ assert.equal(calls[0].mkdir, path.dirname(target));
44
+ assert.equal(calls[1].cmd, 'ffmpeg');
45
+ assert.deepEqual(calls[1].args.slice(0, 6), ['-y', '-hide_banner', '-loglevel', 'error', '-i', '/tmp/utterance.wav']);
46
+ assert.ok(calls[1].args.includes('-ac'));
47
+ assert.ok(calls[1].args.includes('1'));
48
+ assert.ok(calls[1].args.includes('-ar'));
49
+ assert.ok(calls[1].args.includes('16000'));
50
+ assert.equal(calls[1].args.at(-1), target);
51
+ });
@@ -0,0 +1,62 @@
1
+ function isEnglish(language = 'ko') {
2
+ return /^en/i.test(String(language || ''));
3
+ }
4
+
5
+ export function formatSttStartMessage(language = 'ko') {
6
+ return isEnglish(language)
7
+ ? '🎧 Transcribing your speech now.'
8
+ : '🎧 음성을 텍스트로 변환하는 중이야.';
9
+ }
10
+
11
+ export function formatSttResultMessage(language = 'ko', userId, text) {
12
+ return isEnglish(language)
13
+ ? `📝 STT result <@${userId}>: ${text}`
14
+ : `📝 STT 결과 <@${userId}>: ${text}`;
15
+ }
16
+
17
+ export function formatWakeRejectedMessage(language = 'ko') {
18
+ return isEnglish(language)
19
+ ? 'Wake word missing: I will not respond.'
20
+ : 'wake word 없음: 응답은 안 함';
21
+ }
22
+
23
+ export function formatVoiceErrorMessage(language = 'ko', detail) {
24
+ return isEnglish(language)
25
+ ? `⚠️ Voice processing failed: ${detail}`
26
+ : `⚠️ 음성 처리 실패: ${detail}`;
27
+ }
28
+
29
+ export function sensitivityStatusTextForLanguage(thresholds, ttl = 0, language = 'ko') {
30
+ const minSeconds = (thresholds.minBytes / (48000 * 2 * 2)).toFixed(1);
31
+ if (isEnglish(language)) {
32
+ return `Sensitivity mode: ${thresholds.mode}, minimum ${minSeconds}s, mean>=${thresholds.minMeanDb}dB or max>=${thresholds.minMaxDb}dB${ttl ? `, restoring default in ${ttl}s` : ''}`;
33
+ }
34
+ return `감도 모드: ${thresholds.mode}, 최소 ${minSeconds}초, mean>=${thresholds.minMeanDb}dB 또는 max>=${thresholds.minMaxDb}dB${ttl ? `, ${ttl}초 뒤 기본으로 복귀` : ''}`;
35
+ }
36
+
37
+ export function verboseStatusTextForLanguage(enabled, language = 'ko') {
38
+ if (isEnglish(language)) {
39
+ return enabled
40
+ ? 'Verbose progress mode: on — I will send and speak intermediate file-read, skill, tool, web-search, terminal, and test progress in English.'
41
+ : 'Verbose progress mode: off — I will keep quiet and focus on final results.';
42
+ }
43
+ return `verbose 진행 모드: ${enabled ? '켜짐' : '꺼짐'}${enabled ? ' — 에이전트의 파일 읽기/스킬 사용/툴 사용/웹 검색/터미널 실행 같은 중간 항목을 텍스트와 음성으로 알려줄게.' : ' — 기본은 조용하게 최종 결과 중심으로만 알려줄게.'}`;
44
+ }
45
+
46
+ export function sensitivityChangedSpeech(mode, language = 'ko') {
47
+ if (isEnglish(language)) return mode === 'conservative' ? 'Switched to conservative outdoor sensitivity.' : 'Switched back to normal sensitivity.';
48
+ return mode === 'conservative' ? '외부 보수 모드로 바꿨어.' : '기본 감도로 바꿨어.';
49
+ }
50
+
51
+ export function verboseChangedSpeech(enabled, language = 'ko') {
52
+ if (isEnglish(language)) return enabled ? 'Verbose progress mode is on.' : 'Verbose progress mode is off.';
53
+ return enabled ? '상세 진행 모드 켰어.' : '상세 진행 모드 껐어.';
54
+ }
55
+
56
+ export function agentAnswerHeader(language = 'ko', label = 'Agent') {
57
+ return isEnglish(language) ? `✅ ${label} response:` : `✅ ${label} 응답:`;
58
+ }
59
+
60
+ export function emptyAgentAnswer(language = 'ko') {
61
+ return isEnglish(language) ? 'The response was empty.' : '응답이 비어 있어.';
62
+ }
@@ -0,0 +1,33 @@
1
+ import test from 'node:test';
2
+ import assert from 'node:assert/strict';
3
+
4
+ import {
5
+ formatSttStartMessage,
6
+ formatSttResultMessage,
7
+ formatWakeRejectedMessage,
8
+ formatVoiceErrorMessage,
9
+ sensitivityStatusTextForLanguage,
10
+ verboseStatusTextForLanguage,
11
+ } from './voice_messages.mjs';
12
+
13
+ test('voice-mode status messages use English when voice language is English', () => {
14
+ assert.equal(formatSttStartMessage('en'), '🎧 Transcribing your speech now.');
15
+ assert.equal(formatSttResultMessage('en', '123', 'hello'), '📝 STT result <@123>: hello');
16
+ assert.equal(formatWakeRejectedMessage('en'), 'Wake word missing: I will not respond.');
17
+ assert.equal(formatVoiceErrorMessage('en', 'edge failed'), '⚠️ Voice processing failed: edge failed');
18
+ });
19
+
20
+ test('voice-mode status messages use Korean when voice language is Korean', () => {
21
+ assert.equal(formatSttStartMessage('ko'), '🎧 음성을 텍스트로 변환하는 중이야.');
22
+ assert.equal(formatSttResultMessage('ko', '123', '안녕'), '📝 STT 결과 <@123>: 안녕');
23
+ assert.equal(formatWakeRejectedMessage('ko'), 'wake word 없음: 응답은 안 함');
24
+ assert.equal(formatVoiceErrorMessage('ko', '실패'), '⚠️ 음성 처리 실패: 실패');
25
+ });
26
+
27
+ test('verbose and sensitivity status messages are localized', () => {
28
+ const thresholds = { mode: 'normal', minBytes: 48000 * 2 * 2 * 1.4, minMeanDb: -30, minMaxDb: -14 };
29
+ assert.match(verboseStatusTextForLanguage(true, 'en'), /^Verbose progress mode: on/);
30
+ assert.match(verboseStatusTextForLanguage(false, 'ko'), /^verbose 진행 모드: 꺼짐/);
31
+ assert.match(sensitivityStatusTextForLanguage(thresholds, 0, 'en'), /^Sensitivity mode: normal/);
32
+ assert.match(sensitivityStatusTextForLanguage(thresholds, 0, 'ko'), /^감도 모드: normal/);
33
+ });