agentgui 1.0.288 → 1.0.290

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/lib/speech.js +22 -123
  2. package/package.json +1 -2
  3. package/server.js +12 -5
package/lib/speech.js CHANGED
@@ -1,7 +1,6 @@
1
1
  import { createRequire } from 'module';
2
2
  import fs from 'fs';
3
3
  import path from 'path';
4
- import os from 'os';
5
4
  import http from 'http';
6
5
  import { fileURLToPath } from 'url';
7
6
 
@@ -9,12 +8,11 @@ const require = createRequire(import.meta.url);
9
8
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
9
  const ROOT = path.dirname(__dirname);
11
10
 
12
- let serverSTT = null, serverTTS = null, edgeTTS = null;
11
+ let serverSTT = null;
13
12
  try { serverSTT = require('webtalk/server-stt'); } catch(e) { console.warn('[STT] webtalk/server-stt unavailable:', e.message); }
14
- try { serverTTS = require('webtalk/server-tts'); } catch(e) { console.warn('[TTS] webtalk/server-tts unavailable:', e.message); }
15
- try { edgeTTS = require('edge-tts-universal'); } catch(e) { console.warn('[TTS] edge-tts-universal unavailable:', e.message); }
16
13
 
17
- const EXTRA_VOICE_DIRS = [path.join(ROOT, 'voices')];
14
+ const VOICE_DIRS = [path.join(ROOT, 'voices')];
15
+ const POCKET_PORT = 8787;
18
16
 
19
17
  const POCKET_TTS_VOICES = [
20
18
  { id: 'default', name: 'Default', gender: 'female', accent: 'French' },
@@ -28,31 +26,7 @@ const POCKET_TTS_VOICES = [
28
26
  { id: 'azelma', name: 'Azelma', gender: 'female', accent: 'French' },
29
27
  ];
30
28
 
31
- const EDGE_VOICE_MAP = {
32
- default: 'fr-FR-DeniseNeural', alba: 'fr-FR-DeniseNeural',
33
- marius: 'fr-FR-HenriNeural', javert: 'fr-FR-HenriNeural',
34
- jean: 'fr-FR-HenriNeural', fantine: 'fr-FR-DeniseNeural',
35
- cosette: 'fr-FR-DeniseNeural', eponine: 'fr-FR-DeniseNeural',
36
- azelma: 'fr-FR-DeniseNeural',
37
- };
38
-
39
29
  const PREDEFINED_IDS = new Set(POCKET_TTS_VOICES.filter(v => v.id !== 'default').map(v => v.id));
40
- const POCKET_PORT = 8787;
41
-
42
- // Detect if serverTTS has the expected API (getVoices = old server-tts, not the ONNX version)
43
- // The ONNX server-tts-onnx has a different API (synthesize takes modelDir not extraDirs)
44
- // and is incompatible with our voice-based approach - skip it and use edge-tts instead
45
- if (serverTTS && typeof serverTTS.getVoices !== 'function') {
46
- console.warn('[TTS] webtalk/server-tts has incompatible API (ONNX version), disabling it');
47
- serverTTS = null;
48
- }
49
-
50
- let needsPatch = true;
51
- try {
52
- if (serverTTS && typeof serverTTS.getVoices === 'function') {
53
- needsPatch = !serverTTS.getVoices(EXTRA_VOICE_DIRS).some(v => v.id === 'alba' && !v.isCustom);
54
- }
55
- } catch(e) { needsPatch = true; }
56
30
 
57
31
  function getSttOptions() {
58
32
  if (process.env.PORTABLE_DATA_DIR) {
@@ -61,25 +35,16 @@ function getSttOptions() {
61
35
  return {};
62
36
  }
63
37
 
64
- async function edgeSynthesize(text, voiceId) {
65
- if (!edgeTTS) throw new Error('edge-tts-universal not available');
66
- const voice = EDGE_VOICE_MAP[voiceId] || EDGE_VOICE_MAP.default;
67
- const c = new edgeTTS.Communicate(text, voice);
68
- const chunks = [];
69
- const timeout = new Promise((_, rej) => setTimeout(() => rej(new Error('edge-tts timeout')), 30000));
70
- const collect = (async () => {
71
- for await (const chunk of c.stream()) {
72
- if (chunk.type === 'audio' && chunk.data) chunks.push(Buffer.from(chunk.data));
73
- }
74
- })();
75
- await Promise.race([collect, timeout]);
76
- if (!chunks.length) throw new Error('edge-tts returned no audio');
77
- return Buffer.concat(chunks);
38
+ function findVoiceFile(voiceId) {
39
+ for (const dir of VOICE_DIRS) {
40
+ const p = path.join(dir, `custom_${voiceId}.wav`);
41
+ if (fs.existsSync(p)) return p;
42
+ }
43
+ return null;
78
44
  }
79
45
 
80
- function synthesizeDirect(text, voiceId) {
81
- const voicePath = serverTTS && typeof serverTTS.findVoiceFile === 'function'
82
- ? serverTTS.findVoiceFile(voiceId, EXTRA_VOICE_DIRS) : null;
46
+ function synthesize(text, voiceId) {
47
+ const voicePath = voiceId ? findVoiceFile(voiceId) : null;
83
48
  const isPredefined = voiceId && PREDEFINED_IDS.has(voiceId);
84
49
  const boundary = '----PocketTTS' + Date.now();
85
50
  const parts = [];
@@ -118,6 +83,10 @@ function synthesizeDirect(text, voiceId) {
118
83
  });
119
84
  }
120
85
 
86
+ async function* synthesizeStream(text, voiceId) {
87
+ yield await synthesize(text, voiceId);
88
+ }
89
+
121
90
  function transcribe(audioBuffer) {
122
91
  if (!serverSTT) throw new Error('STT not available');
123
92
  return serverSTT.transcribe(audioBuffer, getSttOptions());
@@ -128,101 +97,31 @@ function getSTT() {
128
97
  return serverSTT.getSTT(getSttOptions());
129
98
  }
130
99
 
131
- async function synthesize(text, voiceId) {
132
- if (serverTTS) {
133
- try {
134
- if (needsPatch && voiceId && PREDEFINED_IDS.has(voiceId)) {
135
- return await synthesizeDirect(text, voiceId);
136
- }
137
- return await serverTTS.synthesize(text, voiceId, EXTRA_VOICE_DIRS);
138
- } catch(e) {
139
- console.warn('[TTS] webtalk synthesize failed, falling back to edge-tts:', e.message);
140
- }
141
- }
142
- return edgeSynthesize(text, voiceId);
143
- }
144
-
145
- async function* synthesizeStream(text, voiceId) {
146
- if (serverTTS) {
147
- try {
148
- if (needsPatch && voiceId && PREDEFINED_IDS.has(voiceId)) {
149
- yield await synthesizeDirect(text, voiceId);
150
- return;
151
- }
152
- for await (const chunk of serverTTS.synthesizeStream(text, voiceId, EXTRA_VOICE_DIRS)) {
153
- yield chunk;
154
- }
155
- return;
156
- } catch(e) {
157
- console.warn('[TTS] webtalk stream failed, falling back to edge-tts:', e.message);
158
- }
159
- }
160
- yield await edgeSynthesize(text, voiceId);
161
- }
162
-
163
100
  function getVoices() {
164
- try {
165
- const upstream = serverTTS && typeof serverTTS.getVoices === 'function'
166
- ? serverTTS.getVoices(EXTRA_VOICE_DIRS) : [];
167
- const custom = upstream.filter(v => v.isCustom);
168
- return [...POCKET_TTS_VOICES, ...custom];
169
- } catch(e) { return POCKET_TTS_VOICES; }
101
+ return POCKET_TTS_VOICES;
170
102
  }
171
103
 
172
104
  function getStatus() {
173
105
  const sttStatus = serverSTT ? serverSTT.getStatus() : { ready: false, loading: false, error: 'STT unavailable' };
174
- const ttsStatus = serverTTS ? serverTTS.getStatus() : { ready: false, lastError: 'TTS unavailable' };
175
106
  return {
176
107
  sttReady: sttStatus.ready,
177
- ttsReady: ttsStatus.ready || !!edgeTTS,
108
+ ttsReady: true,
178
109
  sttLoading: sttStatus.loading,
179
110
  ttsLoading: false,
180
111
  sttError: sttStatus.error,
181
- ttsError: (ttsStatus.ready || edgeTTS) ? null : (ttsStatus.lastError || 'TTS not available'),
182
- pocketTts: ttsStatus,
183
- edgeTtsAvailable: !!edgeTTS,
112
+ ttsError: null,
184
113
  };
185
114
  }
186
115
 
187
116
  function preloadTTS() {
188
- if (!serverTTS || typeof serverTTS.start !== 'function') {
189
- if (edgeTTS) console.log('[TTS] Using edge-tts fallback');
190
- return;
191
- }
192
- if (typeof serverTTS.isInstalled === 'function' && !serverTTS.isInstalled()) {
193
- console.log('[TTS] pocket-tts not installed yet - will install on first use');
194
- return;
195
- }
196
- const portableDataDir = process.env.PORTABLE_DATA_DIR;
197
- const binaryPaths = portableDataDir ? [
198
- path.join(portableDataDir, 'pocket-venv', 'Scripts', 'pocket-tts.exe'),
199
- path.join(portableDataDir, 'pocket-venv', 'bin', 'pocket-tts'),
200
- ] : undefined;
201
- let voicePath = null;
202
- try {
203
- const defaultVoice = typeof serverTTS.findVoiceFile === 'function'
204
- ? (serverTTS.findVoiceFile('custom_cleetus', EXTRA_VOICE_DIRS) || '/config/voices/cleetus.wav')
205
- : '/config/voices/cleetus.wav';
206
- voicePath = fs.existsSync(defaultVoice) ? defaultVoice : null;
207
- } catch(e) {}
208
- serverTTS.start(voicePath, binaryPaths ? { binaryPaths } : {}).then(ok => {
209
- if (ok) console.log('[TTS] pocket-tts sidecar started');
210
- else console.log('[TTS] pocket-tts unavailable, edge-tts fallback active:', !!edgeTTS);
211
- }).catch(err => {
212
- console.error('[TTS] pocket-tts start error:', err.message);
213
- });
117
+ // pocket-tts is managed externally; nothing to preload
214
118
  }
215
119
 
216
- function ttsCacheKey(text, voiceId) {
217
- return serverTTS && typeof serverTTS.ttsCacheKey === 'function' ? serverTTS.ttsCacheKey(text, voiceId) : null;
218
- }
219
-
220
- function ttsCacheGet(key) {
221
- return serverTTS && typeof serverTTS.ttsCacheGet === 'function' ? serverTTS.ttsCacheGet(key) : null;
222
- }
120
+ function ttsCacheKey(text, voiceId) { return null; }
121
+ function ttsCacheGet(key) { return null; }
223
122
 
224
123
  function splitSentences(text) {
225
- return serverTTS && typeof serverTTS.splitSentences === 'function' ? serverTTS.splitSentences(text) : [text];
124
+ return text.match(/[^.!?]+[.!?]*/g)?.map(s => s.trim()).filter(Boolean) || [text];
226
125
  }
227
126
 
228
127
  export { transcribe, synthesize, synthesizeStream, getSTT, getStatus, getVoices, preloadTTS, ttsCacheKey, ttsCacheGet, splitSentences };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.288",
3
+ "version": "1.0.290",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
@@ -26,7 +26,6 @@
26
26
  "audio-decode": "^2.2.3",
27
27
  "better-sqlite3": "^12.6.2",
28
28
  "busboy": "^1.6.0",
29
- "edge-tts-universal": "^1.0.1",
30
29
  "express": "^5.2.1",
31
30
  "fsbrowse": "^0.2.18",
32
31
  "google-auth-library": "^10.5.0",
package/server.js CHANGED
@@ -2611,13 +2611,20 @@ const server = http.createServer(async (req, res) => {
2611
2611
  try {
2612
2612
  const { getStatus } = await getSpeech();
2613
2613
  const baseStatus = getStatus();
2614
- const r = createRequire(import.meta.url);
2615
- const serverTTS = r('webtalk/server-tts');
2616
- const pyInfo = serverTTS.detectPython();
2614
+ let pythonDetected = false, pythonVersion = null;
2615
+ try {
2616
+ const r = createRequire(import.meta.url);
2617
+ const serverTTS = r('webtalk/server-tts');
2618
+ if (typeof serverTTS.detectPython === 'function') {
2619
+ const pyInfo = serverTTS.detectPython();
2620
+ pythonDetected = pyInfo.found;
2621
+ pythonVersion = pyInfo.version || null;
2622
+ }
2623
+ } catch(e) {}
2617
2624
  sendJSON(req, res, 200, {
2618
2625
  ...baseStatus,
2619
- pythonDetected: pyInfo.found,
2620
- pythonVersion: pyInfo.version || null,
2626
+ pythonDetected,
2627
+ pythonVersion,
2621
2628
  setupMessage: baseStatus.ttsReady ? 'pocket-tts ready' : 'Will setup on first TTS request',
2622
2629
  modelsDownloading: modelDownloadState.downloading,
2623
2630
  modelsComplete: modelDownloadState.complete,