agentgui 1.0.178 → 1.0.179
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/pocket-sidecar.js +190 -0
- package/lib/speech.js +47 -5
- package/package.json +1 -1
- package/server.js +1 -1
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import { spawn } from 'child_process';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import fs from 'fs';
|
|
4
|
+
import os from 'os';
|
|
5
|
+
import { fileURLToPath } from 'url';
|
|
6
|
+
import http from 'http';
|
|
7
|
+
|
|
8
|
+
const ROOT = path.dirname(path.dirname(fileURLToPath(import.meta.url)));
|
|
9
|
+
const POCKET_BIN = path.join(ROOT, 'data', 'pocket-venv', 'bin', 'pocket-tts');
|
|
10
|
+
const PORT = 8787;
|
|
11
|
+
|
|
12
|
+
const FALLBACK_VOICE = 'alba';
|
|
13
|
+
const state = {
|
|
14
|
+
process: null, port: PORT, status: 'stopped', pid: null,
|
|
15
|
+
restartCount: 0, failureCount: 0, lastError: null,
|
|
16
|
+
healthy: false, voicePath: null, starting: false,
|
|
17
|
+
shutdownRequested: false, healthTimer: null, restartTimer: null,
|
|
18
|
+
voiceCloning: false,
|
|
19
|
+
};
|
|
20
|
+
globalThis.__pocketSidecar = state;
|
|
21
|
+
|
|
22
|
+
function isInstalled() { return fs.existsSync(POCKET_BIN); }
|
|
23
|
+
|
|
24
|
+
function findVoiceFile(voiceId) {
|
|
25
|
+
if (!voiceId || voiceId === 'default') return null;
|
|
26
|
+
const baseName = voiceId.replace(/^custom_/, '');
|
|
27
|
+
const dirs = [
|
|
28
|
+
path.join(process.env.STARTUP_CWD || process.cwd(), 'voices'),
|
|
29
|
+
path.join(ROOT, 'voices'), path.join(os.homedir(), 'voices'), '/config/voices',
|
|
30
|
+
];
|
|
31
|
+
for (const dir of dirs)
|
|
32
|
+
for (const ext of ['.wav', '.mp3', '.ogg', '.flac']) {
|
|
33
|
+
const p = path.join(dir, baseName + ext);
|
|
34
|
+
if (fs.existsSync(p)) return p;
|
|
35
|
+
}
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function healthCheck() {
|
|
40
|
+
return new Promise((resolve) => {
|
|
41
|
+
const req = http.get(`http://127.0.0.1:${PORT}/health`, { timeout: 3000 }, (res) => {
|
|
42
|
+
res.resume();
|
|
43
|
+
res.on('end', () => { state.healthy = res.statusCode === 200; resolve(state.healthy); });
|
|
44
|
+
});
|
|
45
|
+
req.on('error', () => { state.healthy = false; resolve(false); });
|
|
46
|
+
req.on('timeout', () => { req.destroy(); state.healthy = false; resolve(false); });
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function killProcess() {
|
|
51
|
+
if (state.process) { try { state.process.kill('SIGTERM'); } catch (_) {} }
|
|
52
|
+
state.process = null; state.pid = null; state.healthy = false; state.status = 'stopped';
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function scheduleRestart() {
|
|
56
|
+
if (state.shutdownRequested) return;
|
|
57
|
+
killProcess();
|
|
58
|
+
const delay = Math.min(1000 * Math.pow(2, state.restartCount), 30000);
|
|
59
|
+
state.restartCount++;
|
|
60
|
+
console.log(`[POCKET-TTS] Restart in ${delay}ms (attempt ${state.restartCount})`);
|
|
61
|
+
state.restartTimer = setTimeout(() => {
|
|
62
|
+
state.restartTimer = null;
|
|
63
|
+
start(state.voicePath).catch(e => console.error('[POCKET-TTS] Restart failed:', e.message));
|
|
64
|
+
}, delay);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function spawnSidecar(voice) {
|
|
68
|
+
const args = ['serve', '--host', '0.0.0.0', '--port', String(PORT)];
|
|
69
|
+
if (voice) args.push('--voice', voice);
|
|
70
|
+
console.log('[POCKET-TTS] Starting:', POCKET_BIN, args.join(' '));
|
|
71
|
+
return spawn(POCKET_BIN, args, {
|
|
72
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
73
|
+
env: { ...process.env, PYTHONUNBUFFERED: '1' },
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function attachProc(proc) {
|
|
78
|
+
state.process = proc; state.pid = proc.pid; state.status = 'starting';
|
|
79
|
+
proc.stdout.on('data', d => { const l = d.toString().trim(); if (l) console.log('[POCKET-TTS]', l); });
|
|
80
|
+
proc.stderr.on('data', d => { const l = d.toString().trim(); if (l) console.error('[POCKET-TTS]', l); });
|
|
81
|
+
proc.on('error', e => { state.lastError = e.message; });
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async function waitForReady(proc, timeoutSec) {
|
|
85
|
+
let exited = false;
|
|
86
|
+
proc.on('exit', () => { exited = true; });
|
|
87
|
+
for (let i = 0; i < timeoutSec; i++) {
|
|
88
|
+
if (exited) return false;
|
|
89
|
+
await new Promise(r => setTimeout(r, 1000));
|
|
90
|
+
if (await healthCheck()) return true;
|
|
91
|
+
}
|
|
92
|
+
return false;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function start(voicePath) {
|
|
96
|
+
if (!isInstalled()) { state.lastError = 'not installed'; state.status = 'unavailable'; return false; }
|
|
97
|
+
if (state.starting) return false;
|
|
98
|
+
if (state.status === 'running' && state.healthy) return true;
|
|
99
|
+
state.starting = true; state.shutdownRequested = false;
|
|
100
|
+
const requestedVoice = voicePath || state.voicePath;
|
|
101
|
+
try {
|
|
102
|
+
killProcess();
|
|
103
|
+
let proc = spawnSidecar(requestedVoice);
|
|
104
|
+
attachProc(proc);
|
|
105
|
+
let ready = await waitForReady(proc, 120);
|
|
106
|
+
if (!ready && requestedVoice && requestedVoice !== FALLBACK_VOICE) {
|
|
107
|
+
console.log('[POCKET-TTS] Custom voice failed, trying predefined voice:', FALLBACK_VOICE);
|
|
108
|
+
killProcess();
|
|
109
|
+
proc = spawnSidecar(FALLBACK_VOICE);
|
|
110
|
+
attachProc(proc);
|
|
111
|
+
state.voiceCloning = false;
|
|
112
|
+
ready = await waitForReady(proc, 120);
|
|
113
|
+
if (ready) state.voicePath = FALLBACK_VOICE;
|
|
114
|
+
} else if (ready) {
|
|
115
|
+
state.voicePath = requestedVoice;
|
|
116
|
+
state.voiceCloning = !!requestedVoice && !['alba','marius','javert','jean','fantine','cosette','eponine','azelma'].includes(requestedVoice);
|
|
117
|
+
}
|
|
118
|
+
if (ready) {
|
|
119
|
+
state.status = 'running'; state.restartCount = 0; state.failureCount = 0; state.lastError = null;
|
|
120
|
+
proc.on('exit', (code, sig) => {
|
|
121
|
+
console.log(`[POCKET-TTS] Exited: code=${code} signal=${sig}`);
|
|
122
|
+
state.process = null; state.pid = null; state.healthy = false; state.status = 'stopped';
|
|
123
|
+
if (!state.shutdownRequested) scheduleRestart();
|
|
124
|
+
});
|
|
125
|
+
if (!state.healthTimer) state.healthTimer = setInterval(async () => {
|
|
126
|
+
if (state.status !== 'running') return;
|
|
127
|
+
const ok = await healthCheck();
|
|
128
|
+
if (!ok && !state.shutdownRequested) {
|
|
129
|
+
state.failureCount++;
|
|
130
|
+
if (state.failureCount >= 3) scheduleRestart();
|
|
131
|
+
} else if (ok) state.failureCount = 0;
|
|
132
|
+
}, 10000);
|
|
133
|
+
console.log('[POCKET-TTS] Ready on port', PORT, '(voice cloning:', state.voiceCloning + ')');
|
|
134
|
+
return true;
|
|
135
|
+
}
|
|
136
|
+
state.lastError = 'Start timeout'; state.status = 'error'; killProcess(); return false;
|
|
137
|
+
} catch (err) {
|
|
138
|
+
state.lastError = err.message; state.status = 'error'; return false;
|
|
139
|
+
} finally { state.starting = false; }
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async function stop() {
|
|
143
|
+
state.shutdownRequested = true;
|
|
144
|
+
if (state.healthTimer) { clearInterval(state.healthTimer); state.healthTimer = null; }
|
|
145
|
+
if (state.restartTimer) { clearTimeout(state.restartTimer); state.restartTimer = null; }
|
|
146
|
+
killProcess();
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
async function synthesize(text, voicePath) {
|
|
150
|
+
if (!state.healthy) throw new Error('pocket-tts not ready');
|
|
151
|
+
const boundary = '----PocketTTS' + Date.now();
|
|
152
|
+
const parts = [];
|
|
153
|
+
parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="text"\r\n\r\n${text}\r\n`);
|
|
154
|
+
if (state.voiceCloning && voicePath && voicePath !== state.voicePath) {
|
|
155
|
+
const data = fs.readFileSync(voicePath);
|
|
156
|
+
const name = path.basename(voicePath);
|
|
157
|
+
parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="voice_wav"; filename="${name}"\r\nContent-Type: audio/wav\r\n\r\n`);
|
|
158
|
+
parts.push(data); parts.push('\r\n');
|
|
159
|
+
}
|
|
160
|
+
parts.push(`--${boundary}--\r\n`);
|
|
161
|
+
const body = Buffer.concat(parts.map(p => Buffer.isBuffer(p) ? p : Buffer.from(p)));
|
|
162
|
+
return new Promise((resolve, reject) => {
|
|
163
|
+
const req = http.request({
|
|
164
|
+
hostname: '127.0.0.1', port: PORT, path: '/tts', method: 'POST',
|
|
165
|
+
headers: { 'Content-Type': `multipart/form-data; boundary=${boundary}`, 'Content-Length': body.length },
|
|
166
|
+
timeout: 60000,
|
|
167
|
+
}, res => {
|
|
168
|
+
if (res.statusCode !== 200) {
|
|
169
|
+
let e = ''; res.on('data', d => e += d);
|
|
170
|
+
res.on('end', () => reject(new Error(`pocket-tts HTTP ${res.statusCode}: ${e}`)));
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
const chunks = []; res.on('data', d => chunks.push(d));
|
|
174
|
+
res.on('end', () => resolve(Buffer.concat(chunks)));
|
|
175
|
+
});
|
|
176
|
+
req.on('error', reject);
|
|
177
|
+
req.on('timeout', () => { req.destroy(); reject(new Error('pocket-tts timeout')); });
|
|
178
|
+
req.write(body); req.end();
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function getState() {
|
|
183
|
+
return {
|
|
184
|
+
status: state.status, healthy: state.healthy, pid: state.pid, port: state.port,
|
|
185
|
+
restartCount: state.restartCount, failureCount: state.failureCount,
|
|
186
|
+
lastError: state.lastError, installed: isInstalled(), voiceCloning: state.voiceCloning,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
export { start, stop, synthesize, healthCheck, getState, isInstalled, findVoiceFile };
|
package/lib/speech.js
CHANGED
|
@@ -3,6 +3,7 @@ import fs from 'fs';
|
|
|
3
3
|
import os from 'os';
|
|
4
4
|
import path from 'path';
|
|
5
5
|
import { fileURLToPath } from 'url';
|
|
6
|
+
import * as pocket from './pocket-sidecar.js';
|
|
6
7
|
|
|
7
8
|
const require = createRequire(import.meta.url);
|
|
8
9
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
@@ -510,6 +511,24 @@ function cachePut(key, buf) {
|
|
|
510
511
|
ttsCacheBytes += buf.length;
|
|
511
512
|
}
|
|
512
513
|
|
|
514
|
+
function resolveVoicePath(voiceId) {
|
|
515
|
+
if (!voiceId || voiceId === 'default') return null;
|
|
516
|
+
return pocket.findVoiceFile(voiceId) || findCustomVoiceFile(voiceId);
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
async function synthesizeViaPocket(text, voiceId) {
|
|
520
|
+
const pState = pocket.getState();
|
|
521
|
+
if (!pState.healthy) return null;
|
|
522
|
+
try {
|
|
523
|
+
const voicePath = resolveVoicePath(voiceId);
|
|
524
|
+
const wav = await pocket.synthesize(text, voicePath);
|
|
525
|
+
if (wav && wav.length > 44) return wav;
|
|
526
|
+
} catch (err) {
|
|
527
|
+
console.error('[TTS] pocket-tts failed, falling back:', err.message);
|
|
528
|
+
}
|
|
529
|
+
return null;
|
|
530
|
+
}
|
|
531
|
+
|
|
513
532
|
async function synthesize(text, voiceId) {
|
|
514
533
|
const cacheKey = (voiceId || 'default') + ':' + text;
|
|
515
534
|
const cached = ttsCache.get(cacheKey);
|
|
@@ -521,6 +540,8 @@ async function synthesize(text, voiceId) {
|
|
|
521
540
|
const inflight = ttsInflight.get(cacheKey);
|
|
522
541
|
if (inflight) return inflight;
|
|
523
542
|
const promise = (async () => {
|
|
543
|
+
const pocketWav = await synthesizeViaPocket(text, voiceId);
|
|
544
|
+
if (pocketWav) { cachePut(cacheKey, pocketWav); return pocketWav; }
|
|
524
545
|
const tts = await getTTS();
|
|
525
546
|
const embeddings = await loadVoiceEmbedding(voiceId);
|
|
526
547
|
const result = await tts(text, { speaker_embeddings: embeddings });
|
|
@@ -534,8 +555,12 @@ async function synthesize(text, voiceId) {
|
|
|
534
555
|
|
|
535
556
|
async function* synthesizeStream(text, voiceId) {
|
|
536
557
|
const sentences = splitSentences(text);
|
|
537
|
-
const
|
|
538
|
-
|
|
558
|
+
const usePocket = pocket.getState().healthy;
|
|
559
|
+
let tts, embeddings;
|
|
560
|
+
if (!usePocket) {
|
|
561
|
+
tts = await getTTS();
|
|
562
|
+
embeddings = await loadVoiceEmbedding(voiceId);
|
|
563
|
+
}
|
|
539
564
|
for (const sentence of sentences) {
|
|
540
565
|
const cacheKey = (voiceId || 'default') + ':' + sentence;
|
|
541
566
|
const cached = ttsCache.get(cacheKey);
|
|
@@ -545,6 +570,11 @@ async function* synthesizeStream(text, voiceId) {
|
|
|
545
570
|
yield cached;
|
|
546
571
|
continue;
|
|
547
572
|
}
|
|
573
|
+
if (usePocket) {
|
|
574
|
+
const pocketWav = await synthesizeViaPocket(sentence, voiceId);
|
|
575
|
+
if (pocketWav) { cachePut(cacheKey, pocketWav); yield pocketWav; continue; }
|
|
576
|
+
}
|
|
577
|
+
if (!tts) { tts = await getTTS(); embeddings = await loadVoiceEmbedding(voiceId); }
|
|
548
578
|
const result = await tts(sentence, { speaker_embeddings: embeddings });
|
|
549
579
|
const wav = encodeWav(result.audio, result.sampling_rate || SAMPLE_RATE_TTS);
|
|
550
580
|
cachePut(cacheKey, wav);
|
|
@@ -554,18 +584,30 @@ async function* synthesizeStream(text, voiceId) {
|
|
|
554
584
|
|
|
555
585
|
function getStatus() {
|
|
556
586
|
const ttsRetryExpired = ttsLoadError && (Date.now() - ttsLoadErrorTime >= TTS_ERROR_RETRY_MS);
|
|
587
|
+
const pState = pocket.getState();
|
|
557
588
|
return {
|
|
558
589
|
sttReady: !!sttPipeline,
|
|
559
|
-
ttsReady: !!ttsPipeline,
|
|
590
|
+
ttsReady: !!ttsPipeline || pState.healthy,
|
|
560
591
|
sttLoading,
|
|
561
592
|
ttsLoading,
|
|
562
593
|
sttError: sttLoadError ? sttLoadError.message : null,
|
|
563
|
-
ttsError: (ttsLoadError && !ttsRetryExpired) ? ttsLoadError.message : null,
|
|
594
|
+
ttsError: (ttsLoadError && !ttsRetryExpired && !pState.healthy) ? ttsLoadError.message : null,
|
|
595
|
+
pocketTts: pState,
|
|
564
596
|
};
|
|
565
597
|
}
|
|
566
598
|
|
|
567
599
|
function preloadTTS() {
|
|
568
|
-
|
|
600
|
+
const defaultVoice = findCustomVoiceFile('custom_cleetus') || '/config/voices/cleetus.wav';
|
|
601
|
+
const voicePath = fs.existsSync(defaultVoice) ? defaultVoice : null;
|
|
602
|
+
pocket.start(voicePath).then(ok => {
|
|
603
|
+
if (ok) console.log('[TTS] pocket-tts sidecar started');
|
|
604
|
+
else {
|
|
605
|
+
console.log('[TTS] pocket-tts unavailable, falling back to SpeechT5');
|
|
606
|
+
getTTS().catch(err => console.error('[TTS] SpeechT5 preload failed:', err.message));
|
|
607
|
+
}
|
|
608
|
+
}).catch(() => {
|
|
609
|
+
getTTS().catch(err => console.error('[TTS] SpeechT5 preload failed:', err.message));
|
|
610
|
+
});
|
|
569
611
|
}
|
|
570
612
|
|
|
571
613
|
function ttsCacheKey(text, voiceId) {
|
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -1414,7 +1414,7 @@ function onServerReady() {
|
|
|
1414
1414
|
// Recover stale active sessions from previous run
|
|
1415
1415
|
recoverStaleSessions();
|
|
1416
1416
|
|
|
1417
|
-
getSpeech().then(s => s.
|
|
1417
|
+
getSpeech().then(s => s.preloadTTS()).catch(e => debugLog('[TTS] Preload failed: ' + e.message));
|
|
1418
1418
|
|
|
1419
1419
|
performAutoImport();
|
|
1420
1420
|
|