agentgui 1.0.191 → 1.0.192
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/speech.js +95 -8
- package/package.json +1 -1
- package/lib/pocket-sidecar.js +0 -221
package/lib/speech.js
CHANGED
|
@@ -1,17 +1,75 @@
|
|
|
1
1
|
import { createRequire } from 'module';
|
|
2
2
|
import fs from 'fs';
|
|
3
3
|
import path from 'path';
|
|
4
|
+
import http from 'http';
|
|
4
5
|
import { fileURLToPath } from 'url';
|
|
5
6
|
|
|
6
7
|
const require = createRequire(import.meta.url);
|
|
7
8
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
8
9
|
const ROOT = path.dirname(__dirname);
|
|
9
10
|
|
|
10
|
-
// Use webtalk's server-side modules
|
|
11
11
|
const serverSTT = require('webtalk/server-stt');
|
|
12
12
|
const serverTTS = require('webtalk/server-tts');
|
|
13
13
|
|
|
14
14
|
const EXTRA_VOICE_DIRS = [path.join(ROOT, 'voices')];
|
|
15
|
+
const TTS_PORT = 8787;
|
|
16
|
+
|
|
17
|
+
const TTS_CACHE_MAX = 10 * 1024 * 1024;
|
|
18
|
+
let cacheBytes = 0;
|
|
19
|
+
const cache = new Map();
|
|
20
|
+
const inflight = new Map();
|
|
21
|
+
|
|
22
|
+
function resolveVoice(voiceId) {
|
|
23
|
+
if (!voiceId || voiceId === 'default') return null;
|
|
24
|
+
return serverTTS.findVoiceFile(voiceId, EXTRA_VOICE_DIRS);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function cachePut(key, buf) {
|
|
28
|
+
if (cache.has(key)) { cacheBytes -= cache.get(key).length; cache.delete(key); }
|
|
29
|
+
while (cacheBytes + buf.length > TTS_CACHE_MAX && cache.size > 0) {
|
|
30
|
+
const oldest = cache.keys().next().value;
|
|
31
|
+
cacheBytes -= cache.get(oldest).length;
|
|
32
|
+
cache.delete(oldest);
|
|
33
|
+
}
|
|
34
|
+
cache.set(key, buf);
|
|
35
|
+
cacheBytes += buf.length;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function sendToPocket(text, voicePath) {
|
|
39
|
+
return new Promise((resolve, reject) => {
|
|
40
|
+
const boundary = '----PocketTTS' + Date.now();
|
|
41
|
+
const parts = [];
|
|
42
|
+
parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="text"\r\n\r\n${text}\r\n`);
|
|
43
|
+
if (voicePath) {
|
|
44
|
+
const data = fs.readFileSync(voicePath);
|
|
45
|
+
const name = path.basename(voicePath);
|
|
46
|
+
parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="voice_wav"; filename="${name}"\r\nContent-Type: audio/wav\r\n\r\n`);
|
|
47
|
+
parts.push(data);
|
|
48
|
+
parts.push('\r\n');
|
|
49
|
+
}
|
|
50
|
+
parts.push(`--${boundary}--\r\n`);
|
|
51
|
+
const body = Buffer.concat(parts.map(p => Buffer.isBuffer(p) ? p : Buffer.from(p)));
|
|
52
|
+
const req = http.request({
|
|
53
|
+
hostname: '127.0.0.1', port: TTS_PORT, path: '/tts', method: 'POST',
|
|
54
|
+
headers: { 'Content-Type': `multipart/form-data; boundary=${boundary}`, 'Content-Length': body.length },
|
|
55
|
+
timeout: 60000,
|
|
56
|
+
}, res => {
|
|
57
|
+
if (res.statusCode !== 200) {
|
|
58
|
+
let e = '';
|
|
59
|
+
res.on('data', d => e += d);
|
|
60
|
+
res.on('end', () => reject(new Error(`pocket-tts HTTP ${res.statusCode}: ${e}`)));
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
const chunks = [];
|
|
64
|
+
res.on('data', d => chunks.push(d));
|
|
65
|
+
res.on('end', () => resolve(Buffer.concat(chunks)));
|
|
66
|
+
});
|
|
67
|
+
req.on('error', reject);
|
|
68
|
+
req.on('timeout', () => { req.destroy(); reject(new Error('pocket-tts timeout')); });
|
|
69
|
+
req.write(body);
|
|
70
|
+
req.end();
|
|
71
|
+
});
|
|
72
|
+
}
|
|
15
73
|
|
|
16
74
|
function transcribe(audioBuffer) {
|
|
17
75
|
return serverSTT.transcribe(audioBuffer);
|
|
@@ -21,12 +79,37 @@ function getSTT() {
|
|
|
21
79
|
return serverSTT.getSTT();
|
|
22
80
|
}
|
|
23
81
|
|
|
24
|
-
function synthesize(text, voiceId) {
|
|
25
|
-
|
|
82
|
+
async function synthesize(text, voiceId) {
|
|
83
|
+
const status = serverTTS.getStatus();
|
|
84
|
+
if (!status.ready) throw new Error('pocket-tts not healthy');
|
|
85
|
+
const key = (voiceId || 'default') + ':' + text;
|
|
86
|
+
const cached = cache.get(key);
|
|
87
|
+
if (cached) { cache.delete(key); cache.set(key, cached); return cached; }
|
|
88
|
+
const existing = inflight.get(key);
|
|
89
|
+
if (existing) return existing;
|
|
90
|
+
const promise = (async () => {
|
|
91
|
+
const voicePath = resolveVoice(voiceId);
|
|
92
|
+
const wav = await sendToPocket(text, voicePath);
|
|
93
|
+
if (!wav || wav.length <= 44) throw new Error('pocket-tts returned empty audio');
|
|
94
|
+
cachePut(key, wav);
|
|
95
|
+
return wav;
|
|
96
|
+
})();
|
|
97
|
+
inflight.set(key, promise);
|
|
98
|
+
try { return await promise; } finally { inflight.delete(key); }
|
|
26
99
|
}
|
|
27
100
|
|
|
28
|
-
function synthesizeStream(text, voiceId) {
|
|
29
|
-
|
|
101
|
+
async function* synthesizeStream(text, voiceId) {
|
|
102
|
+
const status = serverTTS.getStatus();
|
|
103
|
+
if (!status.ready) throw new Error('pocket-tts not healthy');
|
|
104
|
+
const sentences = splitSentences(text);
|
|
105
|
+
for (const sentence of sentences) {
|
|
106
|
+
const key = (voiceId || 'default') + ':' + sentence;
|
|
107
|
+
const cached = cache.get(key);
|
|
108
|
+
if (cached) { cache.delete(key); cache.set(key, cached); yield cached; continue; }
|
|
109
|
+
const voicePath = resolveVoice(voiceId);
|
|
110
|
+
const wav = await sendToPocket(sentence, voicePath);
|
|
111
|
+
if (wav && wav.length > 44) { cachePut(key, wav); yield wav; }
|
|
112
|
+
}
|
|
30
113
|
}
|
|
31
114
|
|
|
32
115
|
function getVoices() {
|
|
@@ -59,15 +142,19 @@ function preloadTTS() {
|
|
|
59
142
|
}
|
|
60
143
|
|
|
61
144
|
function ttsCacheKey(text, voiceId) {
|
|
62
|
-
return
|
|
145
|
+
return (voiceId || 'default') + ':' + text;
|
|
63
146
|
}
|
|
64
147
|
|
|
65
148
|
function ttsCacheGet(key) {
|
|
66
|
-
|
|
149
|
+
const cached = cache.get(key);
|
|
150
|
+
if (cached) { cache.delete(key); cache.set(key, cached); }
|
|
151
|
+
return cached || null;
|
|
67
152
|
}
|
|
68
153
|
|
|
69
154
|
function splitSentences(text) {
|
|
70
|
-
|
|
155
|
+
const raw = text.match(/[^.!?]+[.!?]+[\s]?|[^.!?]+$/g);
|
|
156
|
+
if (!raw) return [text];
|
|
157
|
+
return raw.map(s => s.trim()).filter(s => s.length > 0);
|
|
71
158
|
}
|
|
72
159
|
|
|
73
160
|
export { transcribe, synthesize, synthesizeStream, getSTT, getStatus, getVoices, preloadTTS, ttsCacheKey, ttsCacheGet, splitSentences };
|
package/package.json
CHANGED
package/lib/pocket-sidecar.js
DELETED
|
@@ -1,221 +0,0 @@
|
|
|
1
|
-
import { spawn } from 'child_process';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import fs from 'fs';
|
|
4
|
-
import os from 'os';
|
|
5
|
-
import { fileURLToPath } from 'url';
|
|
6
|
-
import http from 'http';
|
|
7
|
-
|
|
8
|
-
const ROOT = path.dirname(path.dirname(fileURLToPath(import.meta.url)));
|
|
9
|
-
const PORT = 8787;
|
|
10
|
-
|
|
11
|
-
const FALLBACK_VOICE = 'alba';
|
|
12
|
-
const state = {
|
|
13
|
-
process: null, port: PORT, status: 'stopped', pid: null,
|
|
14
|
-
restartCount: 0, failureCount: 0, lastError: null,
|
|
15
|
-
healthy: false, voicePath: null, starting: false,
|
|
16
|
-
shutdownRequested: false, healthTimer: null, restartTimer: null,
|
|
17
|
-
voiceCloning: false, adopted: false,
|
|
18
|
-
};
|
|
19
|
-
globalThis.__pocketSidecar = state;
|
|
20
|
-
|
|
21
|
-
function findBinary() {
|
|
22
|
-
const candidates = [
|
|
23
|
-
path.join(ROOT, 'data', 'pocket-venv', 'bin', 'pocket-tts'),
|
|
24
|
-
'/config/workspace/agentgui/data/pocket-venv/bin/pocket-tts',
|
|
25
|
-
path.join(os.homedir(), '.gmgui', 'pocket-venv', 'bin', 'pocket-tts'),
|
|
26
|
-
];
|
|
27
|
-
for (const p of candidates) if (fs.existsSync(p)) return p;
|
|
28
|
-
return null;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
function isInstalled() { return !!findBinary(); }
|
|
32
|
-
|
|
33
|
-
function findVoiceFile(voiceId) {
|
|
34
|
-
if (!voiceId || voiceId === 'default') return null;
|
|
35
|
-
const baseName = voiceId.replace(/^custom_/, '');
|
|
36
|
-
const dirs = [
|
|
37
|
-
path.join(process.env.STARTUP_CWD || process.cwd(), 'voices'),
|
|
38
|
-
path.join(ROOT, 'voices'), path.join(os.homedir(), 'voices'), '/config/voices',
|
|
39
|
-
];
|
|
40
|
-
for (const dir of dirs)
|
|
41
|
-
for (const ext of ['.wav', '.mp3', '.ogg', '.flac']) {
|
|
42
|
-
const p = path.join(dir, baseName + ext);
|
|
43
|
-
if (fs.existsSync(p)) return p;
|
|
44
|
-
}
|
|
45
|
-
return null;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
function healthCheck() {
|
|
49
|
-
return new Promise((resolve) => {
|
|
50
|
-
const req = http.get(`http://127.0.0.1:${PORT}/health`, { timeout: 3000 }, (res) => {
|
|
51
|
-
res.resume();
|
|
52
|
-
res.on('end', () => { state.healthy = res.statusCode === 200; resolve(state.healthy); });
|
|
53
|
-
});
|
|
54
|
-
req.on('error', () => { state.healthy = false; resolve(false); });
|
|
55
|
-
req.on('timeout', () => { req.destroy(); state.healthy = false; resolve(false); });
|
|
56
|
-
});
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
function killProcess() {
|
|
60
|
-
if (state.process) { try { state.process.kill('SIGTERM'); } catch (_) {} }
|
|
61
|
-
state.process = null; state.pid = null; state.healthy = false; state.status = 'stopped';
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
function scheduleRestart() {
|
|
65
|
-
if (state.shutdownRequested) return;
|
|
66
|
-
if (!state.adopted) killProcess();
|
|
67
|
-
const delay = Math.min(1000 * Math.pow(2, state.restartCount), 30000);
|
|
68
|
-
state.restartCount++;
|
|
69
|
-
console.log(`[POCKET-TTS] Restart in ${delay}ms (attempt ${state.restartCount})`);
|
|
70
|
-
state.restartTimer = setTimeout(() => {
|
|
71
|
-
state.restartTimer = null;
|
|
72
|
-
state.adopted = false;
|
|
73
|
-
start(state.voicePath).catch(e => console.error('[POCKET-TTS] Restart failed:', e.message));
|
|
74
|
-
}, delay);
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
function spawnSidecar(voice) {
|
|
78
|
-
const bin = findBinary();
|
|
79
|
-
if (!bin) throw new Error('pocket-tts binary not found');
|
|
80
|
-
const args = ['serve', '--host', '0.0.0.0', '--port', String(PORT)];
|
|
81
|
-
if (voice) args.push('--voice', voice);
|
|
82
|
-
console.log('[POCKET-TTS] Starting:', bin, args.join(' '));
|
|
83
|
-
return spawn(bin, args, {
|
|
84
|
-
stdio: ['ignore', 'pipe', 'pipe'],
|
|
85
|
-
env: { ...process.env, PYTHONUNBUFFERED: '1' },
|
|
86
|
-
});
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
function attachProc(proc) {
|
|
90
|
-
state.process = proc; state.pid = proc.pid; state.status = 'starting';
|
|
91
|
-
proc.stdout.on('data', d => { const l = d.toString().trim(); if (l) console.log('[POCKET-TTS]', l); });
|
|
92
|
-
proc.stderr.on('data', d => { const l = d.toString().trim(); if (l) console.error('[POCKET-TTS]', l); });
|
|
93
|
-
proc.on('error', e => { state.lastError = e.message; });
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
async function waitForReady(proc, timeoutSec) {
|
|
97
|
-
let exited = false;
|
|
98
|
-
proc.on('exit', () => { exited = true; });
|
|
99
|
-
for (let i = 0; i < timeoutSec; i++) {
|
|
100
|
-
if (exited) return false;
|
|
101
|
-
await new Promise(r => setTimeout(r, 1000));
|
|
102
|
-
if (await healthCheck()) return true;
|
|
103
|
-
}
|
|
104
|
-
return false;
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
async function adoptRunning() {
|
|
108
|
-
if (await healthCheck()) {
|
|
109
|
-
state.status = 'running'; state.healthy = true; state.adopted = true;
|
|
110
|
-
state.restartCount = 0; state.failureCount = 0; state.lastError = null;
|
|
111
|
-
if (!state.healthTimer) state.healthTimer = setInterval(async () => {
|
|
112
|
-
if (state.status !== 'running') return;
|
|
113
|
-
const ok = await healthCheck();
|
|
114
|
-
if (!ok && !state.shutdownRequested) {
|
|
115
|
-
state.failureCount++;
|
|
116
|
-
if (state.failureCount >= 3) { state.adopted = false; scheduleRestart(); }
|
|
117
|
-
} else if (ok) state.failureCount = 0;
|
|
118
|
-
}, 10000);
|
|
119
|
-
console.log('[POCKET-TTS] Adopted existing instance on port', PORT);
|
|
120
|
-
return true;
|
|
121
|
-
}
|
|
122
|
-
return false;
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
async function start(voicePath) {
|
|
126
|
-
if (state.starting) return false;
|
|
127
|
-
if (state.status === 'running' && state.healthy) return true;
|
|
128
|
-
if (await adoptRunning()) return true;
|
|
129
|
-
if (!isInstalled()) { state.lastError = 'not installed'; state.status = 'unavailable'; return false; }
|
|
130
|
-
state.starting = true; state.shutdownRequested = false;
|
|
131
|
-
const requestedVoice = voicePath || state.voicePath;
|
|
132
|
-
try {
|
|
133
|
-
killProcess();
|
|
134
|
-
let proc = spawnSidecar(requestedVoice);
|
|
135
|
-
attachProc(proc);
|
|
136
|
-
let ready = await waitForReady(proc, 120);
|
|
137
|
-
if (!ready && requestedVoice && requestedVoice !== FALLBACK_VOICE) {
|
|
138
|
-
console.log('[POCKET-TTS] Custom voice failed, trying predefined voice:', FALLBACK_VOICE);
|
|
139
|
-
killProcess();
|
|
140
|
-
proc = spawnSidecar(FALLBACK_VOICE);
|
|
141
|
-
attachProc(proc);
|
|
142
|
-
state.voiceCloning = false;
|
|
143
|
-
ready = await waitForReady(proc, 120);
|
|
144
|
-
if (ready) state.voicePath = FALLBACK_VOICE;
|
|
145
|
-
} else if (ready) {
|
|
146
|
-
state.voicePath = requestedVoice;
|
|
147
|
-
state.voiceCloning = !!requestedVoice && !['alba','marius','javert','jean','fantine','cosette','eponine','azelma'].includes(requestedVoice);
|
|
148
|
-
}
|
|
149
|
-
if (ready) {
|
|
150
|
-
state.status = 'running'; state.restartCount = 0; state.failureCount = 0; state.lastError = null;
|
|
151
|
-
proc.on('exit', (code, sig) => {
|
|
152
|
-
console.log(`[POCKET-TTS] Exited: code=${code} signal=${sig}`);
|
|
153
|
-
state.process = null; state.pid = null; state.healthy = false; state.status = 'stopped';
|
|
154
|
-
if (!state.shutdownRequested) scheduleRestart();
|
|
155
|
-
});
|
|
156
|
-
if (!state.healthTimer) state.healthTimer = setInterval(async () => {
|
|
157
|
-
if (state.status !== 'running') return;
|
|
158
|
-
const ok = await healthCheck();
|
|
159
|
-
if (!ok && !state.shutdownRequested) {
|
|
160
|
-
state.failureCount++;
|
|
161
|
-
if (state.failureCount >= 3) scheduleRestart();
|
|
162
|
-
} else if (ok) state.failureCount = 0;
|
|
163
|
-
}, 10000);
|
|
164
|
-
console.log('[POCKET-TTS] Ready on port', PORT, '(voice cloning:', state.voiceCloning + ')');
|
|
165
|
-
return true;
|
|
166
|
-
}
|
|
167
|
-
state.lastError = 'Start timeout'; state.status = 'error'; killProcess(); return false;
|
|
168
|
-
} catch (err) {
|
|
169
|
-
state.lastError = err.message; state.status = 'error'; return false;
|
|
170
|
-
} finally { state.starting = false; }
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
async function stop() {
|
|
174
|
-
state.shutdownRequested = true;
|
|
175
|
-
if (state.healthTimer) { clearInterval(state.healthTimer); state.healthTimer = null; }
|
|
176
|
-
if (state.restartTimer) { clearTimeout(state.restartTimer); state.restartTimer = null; }
|
|
177
|
-
killProcess();
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
async function synthesize(text, voicePath) {
|
|
181
|
-
if (!state.healthy) throw new Error('pocket-tts not ready');
|
|
182
|
-
const boundary = '----PocketTTS' + Date.now();
|
|
183
|
-
const parts = [];
|
|
184
|
-
parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="text"\r\n\r\n${text}\r\n`);
|
|
185
|
-
if (state.voiceCloning && voicePath && voicePath !== state.voicePath) {
|
|
186
|
-
const data = fs.readFileSync(voicePath);
|
|
187
|
-
const name = path.basename(voicePath);
|
|
188
|
-
parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="voice_wav"; filename="${name}"\r\nContent-Type: audio/wav\r\n\r\n`);
|
|
189
|
-
parts.push(data); parts.push('\r\n');
|
|
190
|
-
}
|
|
191
|
-
parts.push(`--${boundary}--\r\n`);
|
|
192
|
-
const body = Buffer.concat(parts.map(p => Buffer.isBuffer(p) ? p : Buffer.from(p)));
|
|
193
|
-
return new Promise((resolve, reject) => {
|
|
194
|
-
const req = http.request({
|
|
195
|
-
hostname: '127.0.0.1', port: PORT, path: '/tts', method: 'POST',
|
|
196
|
-
headers: { 'Content-Type': `multipart/form-data; boundary=${boundary}`, 'Content-Length': body.length },
|
|
197
|
-
timeout: 60000,
|
|
198
|
-
}, res => {
|
|
199
|
-
if (res.statusCode !== 200) {
|
|
200
|
-
let e = ''; res.on('data', d => e += d);
|
|
201
|
-
res.on('end', () => reject(new Error(`pocket-tts HTTP ${res.statusCode}: ${e}`)));
|
|
202
|
-
return;
|
|
203
|
-
}
|
|
204
|
-
const chunks = []; res.on('data', d => chunks.push(d));
|
|
205
|
-
res.on('end', () => resolve(Buffer.concat(chunks)));
|
|
206
|
-
});
|
|
207
|
-
req.on('error', reject);
|
|
208
|
-
req.on('timeout', () => { req.destroy(); reject(new Error('pocket-tts timeout')); });
|
|
209
|
-
req.write(body); req.end();
|
|
210
|
-
});
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
function getState() {
|
|
214
|
-
return {
|
|
215
|
-
status: state.status, healthy: state.healthy, pid: state.pid, port: state.port,
|
|
216
|
-
restartCount: state.restartCount, failureCount: state.failureCount,
|
|
217
|
-
lastError: state.lastError, installed: isInstalled(), voiceCloning: state.voiceCloning,
|
|
218
|
-
};
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
export { start, stop, synthesize, healthCheck, getState, isInstalled, findVoiceFile };
|