agentgui 1.0.287 → 1.0.289
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/speech.js +22 -123
- package/package.json +1 -2
- package/server.js +2 -1
package/lib/speech.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { createRequire } from 'module';
|
|
2
2
|
import fs from 'fs';
|
|
3
3
|
import path from 'path';
|
|
4
|
-
import os from 'os';
|
|
5
4
|
import http from 'http';
|
|
6
5
|
import { fileURLToPath } from 'url';
|
|
7
6
|
|
|
@@ -9,12 +8,11 @@ const require = createRequire(import.meta.url);
|
|
|
9
8
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
10
9
|
const ROOT = path.dirname(__dirname);
|
|
11
10
|
|
|
12
|
-
let serverSTT = null
|
|
11
|
+
let serverSTT = null;
|
|
13
12
|
try { serverSTT = require('webtalk/server-stt'); } catch(e) { console.warn('[STT] webtalk/server-stt unavailable:', e.message); }
|
|
14
|
-
try { serverTTS = require('webtalk/server-tts'); } catch(e) { console.warn('[TTS] webtalk/server-tts unavailable:', e.message); }
|
|
15
|
-
try { edgeTTS = require('edge-tts-universal'); } catch(e) { console.warn('[TTS] edge-tts-universal unavailable:', e.message); }
|
|
16
13
|
|
|
17
|
-
const
|
|
14
|
+
const VOICE_DIRS = [path.join(ROOT, 'voices')];
|
|
15
|
+
const POCKET_PORT = 8787;
|
|
18
16
|
|
|
19
17
|
const POCKET_TTS_VOICES = [
|
|
20
18
|
{ id: 'default', name: 'Default', gender: 'female', accent: 'French' },
|
|
@@ -28,31 +26,7 @@ const POCKET_TTS_VOICES = [
|
|
|
28
26
|
{ id: 'azelma', name: 'Azelma', gender: 'female', accent: 'French' },
|
|
29
27
|
];
|
|
30
28
|
|
|
31
|
-
const EDGE_VOICE_MAP = {
|
|
32
|
-
default: 'fr-FR-DeniseNeural', alba: 'fr-FR-DeniseNeural',
|
|
33
|
-
marius: 'fr-FR-HenriNeural', javert: 'fr-FR-HenriNeural',
|
|
34
|
-
jean: 'fr-FR-HenriNeural', fantine: 'fr-FR-DeniseNeural',
|
|
35
|
-
cosette: 'fr-FR-DeniseNeural', eponine: 'fr-FR-DeniseNeural',
|
|
36
|
-
azelma: 'fr-FR-DeniseNeural',
|
|
37
|
-
};
|
|
38
|
-
|
|
39
29
|
const PREDEFINED_IDS = new Set(POCKET_TTS_VOICES.filter(v => v.id !== 'default').map(v => v.id));
|
|
40
|
-
const POCKET_PORT = 8787;
|
|
41
|
-
|
|
42
|
-
// Detect if serverTTS has the expected API (getVoices = old server-tts, not the ONNX version)
|
|
43
|
-
// The ONNX server-tts-onnx has a different API (synthesize takes modelDir not extraDirs)
|
|
44
|
-
// and is incompatible with our voice-based approach - skip it and use edge-tts instead
|
|
45
|
-
if (serverTTS && typeof serverTTS.getVoices !== 'function') {
|
|
46
|
-
console.warn('[TTS] webtalk/server-tts has incompatible API (ONNX version), disabling it');
|
|
47
|
-
serverTTS = null;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
let needsPatch = true;
|
|
51
|
-
try {
|
|
52
|
-
if (serverTTS && typeof serverTTS.getVoices === 'function') {
|
|
53
|
-
needsPatch = !serverTTS.getVoices(EXTRA_VOICE_DIRS).some(v => v.id === 'alba' && !v.isCustom);
|
|
54
|
-
}
|
|
55
|
-
} catch(e) { needsPatch = true; }
|
|
56
30
|
|
|
57
31
|
function getSttOptions() {
|
|
58
32
|
if (process.env.PORTABLE_DATA_DIR) {
|
|
@@ -61,25 +35,16 @@ function getSttOptions() {
|
|
|
61
35
|
return {};
|
|
62
36
|
}
|
|
63
37
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
const collect = (async () => {
|
|
71
|
-
for await (const chunk of c.stream()) {
|
|
72
|
-
if (chunk.type === 'audio' && chunk.data) chunks.push(Buffer.from(chunk.data));
|
|
73
|
-
}
|
|
74
|
-
})();
|
|
75
|
-
await Promise.race([collect, timeout]);
|
|
76
|
-
if (!chunks.length) throw new Error('edge-tts returned no audio');
|
|
77
|
-
return Buffer.concat(chunks);
|
|
38
|
+
function findVoiceFile(voiceId) {
|
|
39
|
+
for (const dir of VOICE_DIRS) {
|
|
40
|
+
const p = path.join(dir, `custom_${voiceId}.wav`);
|
|
41
|
+
if (fs.existsSync(p)) return p;
|
|
42
|
+
}
|
|
43
|
+
return null;
|
|
78
44
|
}
|
|
79
45
|
|
|
80
|
-
function
|
|
81
|
-
const voicePath =
|
|
82
|
-
? serverTTS.findVoiceFile(voiceId, EXTRA_VOICE_DIRS) : null;
|
|
46
|
+
function synthesize(text, voiceId) {
|
|
47
|
+
const voicePath = voiceId ? findVoiceFile(voiceId) : null;
|
|
83
48
|
const isPredefined = voiceId && PREDEFINED_IDS.has(voiceId);
|
|
84
49
|
const boundary = '----PocketTTS' + Date.now();
|
|
85
50
|
const parts = [];
|
|
@@ -118,6 +83,10 @@ function synthesizeDirect(text, voiceId) {
|
|
|
118
83
|
});
|
|
119
84
|
}
|
|
120
85
|
|
|
86
|
+
async function* synthesizeStream(text, voiceId) {
|
|
87
|
+
yield await synthesize(text, voiceId);
|
|
88
|
+
}
|
|
89
|
+
|
|
121
90
|
function transcribe(audioBuffer) {
|
|
122
91
|
if (!serverSTT) throw new Error('STT not available');
|
|
123
92
|
return serverSTT.transcribe(audioBuffer, getSttOptions());
|
|
@@ -128,101 +97,31 @@ function getSTT() {
|
|
|
128
97
|
return serverSTT.getSTT(getSttOptions());
|
|
129
98
|
}
|
|
130
99
|
|
|
131
|
-
async function synthesize(text, voiceId) {
|
|
132
|
-
if (serverTTS) {
|
|
133
|
-
try {
|
|
134
|
-
if (needsPatch && voiceId && PREDEFINED_IDS.has(voiceId)) {
|
|
135
|
-
return await synthesizeDirect(text, voiceId);
|
|
136
|
-
}
|
|
137
|
-
return await serverTTS.synthesize(text, voiceId, EXTRA_VOICE_DIRS);
|
|
138
|
-
} catch(e) {
|
|
139
|
-
console.warn('[TTS] webtalk synthesize failed, falling back to edge-tts:', e.message);
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
return edgeSynthesize(text, voiceId);
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
async function* synthesizeStream(text, voiceId) {
|
|
146
|
-
if (serverTTS) {
|
|
147
|
-
try {
|
|
148
|
-
if (needsPatch && voiceId && PREDEFINED_IDS.has(voiceId)) {
|
|
149
|
-
yield await synthesizeDirect(text, voiceId);
|
|
150
|
-
return;
|
|
151
|
-
}
|
|
152
|
-
for await (const chunk of serverTTS.synthesizeStream(text, voiceId, EXTRA_VOICE_DIRS)) {
|
|
153
|
-
yield chunk;
|
|
154
|
-
}
|
|
155
|
-
return;
|
|
156
|
-
} catch(e) {
|
|
157
|
-
console.warn('[TTS] webtalk stream failed, falling back to edge-tts:', e.message);
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
yield await edgeSynthesize(text, voiceId);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
100
|
function getVoices() {
|
|
164
|
-
|
|
165
|
-
const upstream = serverTTS && typeof serverTTS.getVoices === 'function'
|
|
166
|
-
? serverTTS.getVoices(EXTRA_VOICE_DIRS) : [];
|
|
167
|
-
const custom = upstream.filter(v => v.isCustom);
|
|
168
|
-
return [...POCKET_TTS_VOICES, ...custom];
|
|
169
|
-
} catch(e) { return POCKET_TTS_VOICES; }
|
|
101
|
+
return POCKET_TTS_VOICES;
|
|
170
102
|
}
|
|
171
103
|
|
|
172
104
|
function getStatus() {
|
|
173
105
|
const sttStatus = serverSTT ? serverSTT.getStatus() : { ready: false, loading: false, error: 'STT unavailable' };
|
|
174
|
-
const ttsStatus = serverTTS ? serverTTS.getStatus() : { ready: false, lastError: 'TTS unavailable' };
|
|
175
106
|
return {
|
|
176
107
|
sttReady: sttStatus.ready,
|
|
177
|
-
ttsReady:
|
|
108
|
+
ttsReady: true,
|
|
178
109
|
sttLoading: sttStatus.loading,
|
|
179
110
|
ttsLoading: false,
|
|
180
111
|
sttError: sttStatus.error,
|
|
181
|
-
ttsError:
|
|
182
|
-
pocketTts: ttsStatus,
|
|
183
|
-
edgeTtsAvailable: !!edgeTTS,
|
|
112
|
+
ttsError: null,
|
|
184
113
|
};
|
|
185
114
|
}
|
|
186
115
|
|
|
187
116
|
function preloadTTS() {
|
|
188
|
-
|
|
189
|
-
if (edgeTTS) console.log('[TTS] Using edge-tts fallback');
|
|
190
|
-
return;
|
|
191
|
-
}
|
|
192
|
-
if (typeof serverTTS.isInstalled === 'function' && !serverTTS.isInstalled()) {
|
|
193
|
-
console.log('[TTS] pocket-tts not installed yet - will install on first use');
|
|
194
|
-
return;
|
|
195
|
-
}
|
|
196
|
-
const portableDataDir = process.env.PORTABLE_DATA_DIR;
|
|
197
|
-
const binaryPaths = portableDataDir ? [
|
|
198
|
-
path.join(portableDataDir, 'pocket-venv', 'Scripts', 'pocket-tts.exe'),
|
|
199
|
-
path.join(portableDataDir, 'pocket-venv', 'bin', 'pocket-tts'),
|
|
200
|
-
] : undefined;
|
|
201
|
-
let voicePath = null;
|
|
202
|
-
try {
|
|
203
|
-
const defaultVoice = typeof serverTTS.findVoiceFile === 'function'
|
|
204
|
-
? (serverTTS.findVoiceFile('custom_cleetus', EXTRA_VOICE_DIRS) || '/config/voices/cleetus.wav')
|
|
205
|
-
: '/config/voices/cleetus.wav';
|
|
206
|
-
voicePath = fs.existsSync(defaultVoice) ? defaultVoice : null;
|
|
207
|
-
} catch(e) {}
|
|
208
|
-
serverTTS.start(voicePath, binaryPaths ? { binaryPaths } : {}).then(ok => {
|
|
209
|
-
if (ok) console.log('[TTS] pocket-tts sidecar started');
|
|
210
|
-
else console.log('[TTS] pocket-tts unavailable, edge-tts fallback active:', !!edgeTTS);
|
|
211
|
-
}).catch(err => {
|
|
212
|
-
console.error('[TTS] pocket-tts start error:', err.message);
|
|
213
|
-
});
|
|
117
|
+
// pocket-tts is managed externally; nothing to preload
|
|
214
118
|
}
|
|
215
119
|
|
|
216
|
-
function ttsCacheKey(text, voiceId) {
|
|
217
|
-
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
function ttsCacheGet(key) {
|
|
221
|
-
return serverTTS && typeof serverTTS.ttsCacheGet === 'function' ? serverTTS.ttsCacheGet(key) : null;
|
|
222
|
-
}
|
|
120
|
+
function ttsCacheKey(text, voiceId) { return null; }
|
|
121
|
+
function ttsCacheGet(key) { return null; }
|
|
223
122
|
|
|
224
123
|
function splitSentences(text) {
|
|
225
|
-
return
|
|
124
|
+
return text.match(/[^.!?]+[.!?]*/g)?.map(s => s.trim()).filter(Boolean) || [text];
|
|
226
125
|
}
|
|
227
126
|
|
|
228
127
|
export { transcribe, synthesize, synthesizeStream, getSTT, getStatus, getVoices, preloadTTS, ttsCacheKey, ttsCacheGet, splitSentences };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentgui",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.289",
|
|
4
4
|
"description": "Multi-agent ACP client with real-time communication",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "server.js",
|
|
@@ -26,7 +26,6 @@
|
|
|
26
26
|
"audio-decode": "^2.2.3",
|
|
27
27
|
"better-sqlite3": "^12.6.2",
|
|
28
28
|
"busboy": "^1.6.0",
|
|
29
|
-
"edge-tts-universal": "^1.0.1",
|
|
30
29
|
"express": "^5.2.1",
|
|
31
30
|
"fsbrowse": "^0.2.18",
|
|
32
31
|
"google-auth-library": "^10.5.0",
|
package/server.js
CHANGED
|
@@ -296,12 +296,13 @@ const debugLog = (msg) => {
|
|
|
296
296
|
};
|
|
297
297
|
|
|
298
298
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
299
|
+
const rootDir = process.env.PORTABLE_EXE_DIR || __dirname;
|
|
299
300
|
const PORT = process.env.PORT || 3000;
|
|
300
301
|
const BASE_URL = (process.env.BASE_URL || '/gm').replace(/\/+$/, '');
|
|
301
302
|
const watch = process.argv.includes('--no-watch') ? false : (process.argv.includes('--watch') || process.env.HOT_RELOAD !== 'false');
|
|
302
303
|
|
|
303
304
|
const STARTUP_CWD = process.env.STARTUP_CWD || process.cwd();
|
|
304
|
-
const staticDir = path.join(
|
|
305
|
+
const staticDir = path.join(rootDir, 'static');
|
|
305
306
|
if (!fs.existsSync(staticDir)) fs.mkdirSync(staticDir, { recursive: true });
|
|
306
307
|
|
|
307
308
|
// Express sub-app for fsbrowse file browser and file upload
|