agentgui 1.0.178 → 1.0.179

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,190 @@
1
+ import { spawn } from 'child_process';
2
+ import path from 'path';
3
+ import fs from 'fs';
4
+ import os from 'os';
5
+ import { fileURLToPath } from 'url';
6
+ import http from 'http';
7
+
8
+ const ROOT = path.dirname(path.dirname(fileURLToPath(import.meta.url)));
9
+ const POCKET_BIN = path.join(ROOT, 'data', 'pocket-venv', 'bin', 'pocket-tts');
10
+ const PORT = 8787;
11
+
12
+ const FALLBACK_VOICE = 'alba';
13
+ const state = {
14
+ process: null, port: PORT, status: 'stopped', pid: null,
15
+ restartCount: 0, failureCount: 0, lastError: null,
16
+ healthy: false, voicePath: null, starting: false,
17
+ shutdownRequested: false, healthTimer: null, restartTimer: null,
18
+ voiceCloning: false,
19
+ };
20
+ globalThis.__pocketSidecar = state;
21
+
22
+ function isInstalled() { return fs.existsSync(POCKET_BIN); }
23
+
24
+ function findVoiceFile(voiceId) {
25
+ if (!voiceId || voiceId === 'default') return null;
26
+ const baseName = voiceId.replace(/^custom_/, '');
27
+ const dirs = [
28
+ path.join(process.env.STARTUP_CWD || process.cwd(), 'voices'),
29
+ path.join(ROOT, 'voices'), path.join(os.homedir(), 'voices'), '/config/voices',
30
+ ];
31
+ for (const dir of dirs)
32
+ for (const ext of ['.wav', '.mp3', '.ogg', '.flac']) {
33
+ const p = path.join(dir, baseName + ext);
34
+ if (fs.existsSync(p)) return p;
35
+ }
36
+ return null;
37
+ }
38
+
39
+ function healthCheck() {
40
+ return new Promise((resolve) => {
41
+ const req = http.get(`http://127.0.0.1:${PORT}/health`, { timeout: 3000 }, (res) => {
42
+ res.resume();
43
+ res.on('end', () => { state.healthy = res.statusCode === 200; resolve(state.healthy); });
44
+ });
45
+ req.on('error', () => { state.healthy = false; resolve(false); });
46
+ req.on('timeout', () => { req.destroy(); state.healthy = false; resolve(false); });
47
+ });
48
+ }
49
+
50
+ function killProcess() {
51
+ if (state.process) { try { state.process.kill('SIGTERM'); } catch (_) {} }
52
+ state.process = null; state.pid = null; state.healthy = false; state.status = 'stopped';
53
+ }
54
+
55
+ function scheduleRestart() {
56
+ if (state.shutdownRequested) return;
57
+ killProcess();
58
+ const delay = Math.min(1000 * Math.pow(2, state.restartCount), 30000);
59
+ state.restartCount++;
60
+ console.log(`[POCKET-TTS] Restart in ${delay}ms (attempt ${state.restartCount})`);
61
+ state.restartTimer = setTimeout(() => {
62
+ state.restartTimer = null;
63
+ start(state.voicePath).catch(e => console.error('[POCKET-TTS] Restart failed:', e.message));
64
+ }, delay);
65
+ }
66
+
67
+ function spawnSidecar(voice) {
68
+ const args = ['serve', '--host', '0.0.0.0', '--port', String(PORT)];
69
+ if (voice) args.push('--voice', voice);
70
+ console.log('[POCKET-TTS] Starting:', POCKET_BIN, args.join(' '));
71
+ return spawn(POCKET_BIN, args, {
72
+ stdio: ['ignore', 'pipe', 'pipe'],
73
+ env: { ...process.env, PYTHONUNBUFFERED: '1' },
74
+ });
75
+ }
76
+
77
+ function attachProc(proc) {
78
+ state.process = proc; state.pid = proc.pid; state.status = 'starting';
79
+ proc.stdout.on('data', d => { const l = d.toString().trim(); if (l) console.log('[POCKET-TTS]', l); });
80
+ proc.stderr.on('data', d => { const l = d.toString().trim(); if (l) console.error('[POCKET-TTS]', l); });
81
+ proc.on('error', e => { state.lastError = e.message; });
82
+ }
83
+
84
+ async function waitForReady(proc, timeoutSec) {
85
+ let exited = false;
86
+ proc.on('exit', () => { exited = true; });
87
+ for (let i = 0; i < timeoutSec; i++) {
88
+ if (exited) return false;
89
+ await new Promise(r => setTimeout(r, 1000));
90
+ if (await healthCheck()) return true;
91
+ }
92
+ return false;
93
+ }
94
+
95
+ async function start(voicePath) {
96
+ if (!isInstalled()) { state.lastError = 'not installed'; state.status = 'unavailable'; return false; }
97
+ if (state.starting) return false;
98
+ if (state.status === 'running' && state.healthy) return true;
99
+ state.starting = true; state.shutdownRequested = false;
100
+ const requestedVoice = voicePath || state.voicePath;
101
+ try {
102
+ killProcess();
103
+ let proc = spawnSidecar(requestedVoice);
104
+ attachProc(proc);
105
+ let ready = await waitForReady(proc, 120);
106
+ if (!ready && requestedVoice && requestedVoice !== FALLBACK_VOICE) {
107
+ console.log('[POCKET-TTS] Custom voice failed, trying predefined voice:', FALLBACK_VOICE);
108
+ killProcess();
109
+ proc = spawnSidecar(FALLBACK_VOICE);
110
+ attachProc(proc);
111
+ state.voiceCloning = false;
112
+ ready = await waitForReady(proc, 120);
113
+ if (ready) state.voicePath = FALLBACK_VOICE;
114
+ } else if (ready) {
115
+ state.voicePath = requestedVoice;
116
+ state.voiceCloning = !!requestedVoice && !['alba','marius','javert','jean','fantine','cosette','eponine','azelma'].includes(requestedVoice);
117
+ }
118
+ if (ready) {
119
+ state.status = 'running'; state.restartCount = 0; state.failureCount = 0; state.lastError = null;
120
+ proc.on('exit', (code, sig) => {
121
+ console.log(`[POCKET-TTS] Exited: code=${code} signal=${sig}`);
122
+ state.process = null; state.pid = null; state.healthy = false; state.status = 'stopped';
123
+ if (!state.shutdownRequested) scheduleRestart();
124
+ });
125
+ if (!state.healthTimer) state.healthTimer = setInterval(async () => {
126
+ if (state.status !== 'running') return;
127
+ const ok = await healthCheck();
128
+ if (!ok && !state.shutdownRequested) {
129
+ state.failureCount++;
130
+ if (state.failureCount >= 3) scheduleRestart();
131
+ } else if (ok) state.failureCount = 0;
132
+ }, 10000);
133
+ console.log('[POCKET-TTS] Ready on port', PORT, '(voice cloning:', state.voiceCloning + ')');
134
+ return true;
135
+ }
136
+ state.lastError = 'Start timeout'; state.status = 'error'; killProcess(); return false;
137
+ } catch (err) {
138
+ state.lastError = err.message; state.status = 'error'; return false;
139
+ } finally { state.starting = false; }
140
+ }
141
+
142
+ async function stop() {
143
+ state.shutdownRequested = true;
144
+ if (state.healthTimer) { clearInterval(state.healthTimer); state.healthTimer = null; }
145
+ if (state.restartTimer) { clearTimeout(state.restartTimer); state.restartTimer = null; }
146
+ killProcess();
147
+ }
148
+
149
+ async function synthesize(text, voicePath) {
150
+ if (!state.healthy) throw new Error('pocket-tts not ready');
151
+ const boundary = '----PocketTTS' + Date.now();
152
+ const parts = [];
153
+ parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="text"\r\n\r\n${text}\r\n`);
154
+ if (state.voiceCloning && voicePath && voicePath !== state.voicePath) {
155
+ const data = fs.readFileSync(voicePath);
156
+ const name = path.basename(voicePath);
157
+ parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="voice_wav"; filename="${name}"\r\nContent-Type: audio/wav\r\n\r\n`);
158
+ parts.push(data); parts.push('\r\n');
159
+ }
160
+ parts.push(`--${boundary}--\r\n`);
161
+ const body = Buffer.concat(parts.map(p => Buffer.isBuffer(p) ? p : Buffer.from(p)));
162
+ return new Promise((resolve, reject) => {
163
+ const req = http.request({
164
+ hostname: '127.0.0.1', port: PORT, path: '/tts', method: 'POST',
165
+ headers: { 'Content-Type': `multipart/form-data; boundary=${boundary}`, 'Content-Length': body.length },
166
+ timeout: 60000,
167
+ }, res => {
168
+ if (res.statusCode !== 200) {
169
+ let e = ''; res.on('data', d => e += d);
170
+ res.on('end', () => reject(new Error(`pocket-tts HTTP ${res.statusCode}: ${e}`)));
171
+ return;
172
+ }
173
+ const chunks = []; res.on('data', d => chunks.push(d));
174
+ res.on('end', () => resolve(Buffer.concat(chunks)));
175
+ });
176
+ req.on('error', reject);
177
+ req.on('timeout', () => { req.destroy(); reject(new Error('pocket-tts timeout')); });
178
+ req.write(body); req.end();
179
+ });
180
+ }
181
+
182
+ function getState() {
183
+ return {
184
+ status: state.status, healthy: state.healthy, pid: state.pid, port: state.port,
185
+ restartCount: state.restartCount, failureCount: state.failureCount,
186
+ lastError: state.lastError, installed: isInstalled(), voiceCloning: state.voiceCloning,
187
+ };
188
+ }
189
+
190
+ export { start, stop, synthesize, healthCheck, getState, isInstalled, findVoiceFile };
package/lib/speech.js CHANGED
@@ -3,6 +3,7 @@ import fs from 'fs';
3
3
  import os from 'os';
4
4
  import path from 'path';
5
5
  import { fileURLToPath } from 'url';
6
+ import * as pocket from './pocket-sidecar.js';
6
7
 
7
8
  const require = createRequire(import.meta.url);
8
9
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
@@ -510,6 +511,24 @@ function cachePut(key, buf) {
510
511
  ttsCacheBytes += buf.length;
511
512
  }
512
513
 
514
+ function resolveVoicePath(voiceId) {
515
+ if (!voiceId || voiceId === 'default') return null;
516
+ return pocket.findVoiceFile(voiceId) || findCustomVoiceFile(voiceId);
517
+ }
518
+
519
+ async function synthesizeViaPocket(text, voiceId) {
520
+ const pState = pocket.getState();
521
+ if (!pState.healthy) return null;
522
+ try {
523
+ const voicePath = resolveVoicePath(voiceId);
524
+ const wav = await pocket.synthesize(text, voicePath);
525
+ if (wav && wav.length > 44) return wav;
526
+ } catch (err) {
527
+ console.error('[TTS] pocket-tts failed, falling back:', err.message);
528
+ }
529
+ return null;
530
+ }
531
+
513
532
  async function synthesize(text, voiceId) {
514
533
  const cacheKey = (voiceId || 'default') + ':' + text;
515
534
  const cached = ttsCache.get(cacheKey);
@@ -521,6 +540,8 @@ async function synthesize(text, voiceId) {
521
540
  const inflight = ttsInflight.get(cacheKey);
522
541
  if (inflight) return inflight;
523
542
  const promise = (async () => {
543
+ const pocketWav = await synthesizeViaPocket(text, voiceId);
544
+ if (pocketWav) { cachePut(cacheKey, pocketWav); return pocketWav; }
524
545
  const tts = await getTTS();
525
546
  const embeddings = await loadVoiceEmbedding(voiceId);
526
547
  const result = await tts(text, { speaker_embeddings: embeddings });
@@ -534,8 +555,12 @@ async function synthesize(text, voiceId) {
534
555
 
535
556
  async function* synthesizeStream(text, voiceId) {
536
557
  const sentences = splitSentences(text);
537
- const tts = await getTTS();
538
- const embeddings = await loadVoiceEmbedding(voiceId);
558
+ const usePocket = pocket.getState().healthy;
559
+ let tts, embeddings;
560
+ if (!usePocket) {
561
+ tts = await getTTS();
562
+ embeddings = await loadVoiceEmbedding(voiceId);
563
+ }
539
564
  for (const sentence of sentences) {
540
565
  const cacheKey = (voiceId || 'default') + ':' + sentence;
541
566
  const cached = ttsCache.get(cacheKey);
@@ -545,6 +570,11 @@ async function* synthesizeStream(text, voiceId) {
545
570
  yield cached;
546
571
  continue;
547
572
  }
573
+ if (usePocket) {
574
+ const pocketWav = await synthesizeViaPocket(sentence, voiceId);
575
+ if (pocketWav) { cachePut(cacheKey, pocketWav); yield pocketWav; continue; }
576
+ }
577
+ if (!tts) { tts = await getTTS(); embeddings = await loadVoiceEmbedding(voiceId); }
548
578
  const result = await tts(sentence, { speaker_embeddings: embeddings });
549
579
  const wav = encodeWav(result.audio, result.sampling_rate || SAMPLE_RATE_TTS);
550
580
  cachePut(cacheKey, wav);
@@ -554,18 +584,30 @@ async function* synthesizeStream(text, voiceId) {
554
584
 
555
585
  function getStatus() {
556
586
  const ttsRetryExpired = ttsLoadError && (Date.now() - ttsLoadErrorTime >= TTS_ERROR_RETRY_MS);
587
+ const pState = pocket.getState();
557
588
  return {
558
589
  sttReady: !!sttPipeline,
559
- ttsReady: !!ttsPipeline,
590
+ ttsReady: !!ttsPipeline || pState.healthy,
560
591
  sttLoading,
561
592
  ttsLoading,
562
593
  sttError: sttLoadError ? sttLoadError.message : null,
563
- ttsError: (ttsLoadError && !ttsRetryExpired) ? ttsLoadError.message : null,
594
+ ttsError: (ttsLoadError && !ttsRetryExpired && !pState.healthy) ? ttsLoadError.message : null,
595
+ pocketTts: pState,
564
596
  };
565
597
  }
566
598
 
567
599
  function preloadTTS() {
568
- getTTS().catch(err => console.error('[TTS] Preload failed:', err.message));
600
+ const defaultVoice = findCustomVoiceFile('custom_cleetus') || '/config/voices/cleetus.wav';
601
+ const voicePath = fs.existsSync(defaultVoice) ? defaultVoice : null;
602
+ pocket.start(voicePath).then(ok => {
603
+ if (ok) console.log('[TTS] pocket-tts sidecar started');
604
+ else {
605
+ console.log('[TTS] pocket-tts unavailable, falling back to SpeechT5');
606
+ getTTS().catch(err => console.error('[TTS] SpeechT5 preload failed:', err.message));
607
+ }
608
+ }).catch(() => {
609
+ getTTS().catch(err => console.error('[TTS] SpeechT5 preload failed:', err.message));
610
+ });
569
611
  }
570
612
 
571
613
  function ttsCacheKey(text, voiceId) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.178",
3
+ "version": "1.0.179",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
package/server.js CHANGED
@@ -1414,7 +1414,7 @@ function onServerReady() {
1414
1414
  // Recover stale active sessions from previous run
1415
1415
  recoverStaleSessions();
1416
1416
 
1417
- getSpeech().then(s => s.getTTS()).then(() => debugLog('[TTS] Model preloaded')).catch(e => debugLog('[TTS] Preload failed: ' + e.message));
1417
+ getSpeech().then(s => s.preloadTTS()).catch(e => debugLog('[TTS] Preload failed: ' + e.message));
1418
1418
 
1419
1419
  performAutoImport();
1420
1420