agentgui 1.0.290 → 1.0.292
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build-portable.js +11 -1
- package/lib/speech.js +186 -54
- package/package.json +1 -1
- package/server.js +4 -2
package/build-portable.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import fs from 'fs';
|
|
2
2
|
import path from 'path';
|
|
3
|
+
import os from 'os';
|
|
3
4
|
import { fileURLToPath } from 'url';
|
|
4
5
|
import { execSync } from 'child_process';
|
|
5
6
|
|
|
@@ -51,7 +52,7 @@ fs.mkdirSync(out, { recursive: true });
|
|
|
51
52
|
|
|
52
53
|
log('Compiling Windows executable...');
|
|
53
54
|
execSync(
|
|
54
|
-
|
|
55
|
+
`"${path.join(os.homedir(), '.bun', 'bin', 'bun')}" build --compile --target=bun-windows-x64 --outfile="${path.join(out, 'agentgui.exe')}" "${path.join(src, 'portable-entry.js')}"`,
|
|
55
56
|
{ stdio: 'inherit', cwd: src }
|
|
56
57
|
);
|
|
57
58
|
|
|
@@ -119,6 +120,15 @@ copyDir(path.join(claudeSrc, 'vendor', 'ripgrep', 'x64-win32'), path.join(claude
|
|
|
119
120
|
log('Creating data directory...');
|
|
120
121
|
fs.mkdirSync(path.join(out, 'data'), { recursive: true });
|
|
121
122
|
|
|
123
|
+
log('Bundling AI models...');
|
|
124
|
+
const userModels = path.join(os.homedir(), '.gmgui', 'models');
|
|
125
|
+
if (fs.existsSync(userModels)) {
|
|
126
|
+
copyDir(userModels, path.join(out, 'models'));
|
|
127
|
+
log(`Models bundled: ${Math.round(sizeOf(path.join(out, 'models')) / 1024 / 1024)}MB`);
|
|
128
|
+
} else {
|
|
129
|
+
log('WARNING: No models found at ~/.gmgui/models - portable build will download on first use');
|
|
130
|
+
}
|
|
131
|
+
|
|
122
132
|
fs.writeFileSync(path.join(out, 'README.txt'), [
|
|
123
133
|
'# AgentGUI Portable',
|
|
124
134
|
'',
|
package/lib/speech.js
CHANGED
|
@@ -1,18 +1,37 @@
|
|
|
1
1
|
import { createRequire } from 'module';
|
|
2
2
|
import fs from 'fs';
|
|
3
3
|
import path from 'path';
|
|
4
|
-
import
|
|
4
|
+
import os from 'os';
|
|
5
5
|
import { fileURLToPath } from 'url';
|
|
6
6
|
|
|
7
7
|
const require = createRequire(import.meta.url);
|
|
8
8
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
9
9
|
const ROOT = path.dirname(__dirname);
|
|
10
10
|
|
|
11
|
+
// Load modules
|
|
12
|
+
let serverTTS = null;
|
|
11
13
|
let serverSTT = null;
|
|
14
|
+
let audioDecode = null;
|
|
15
|
+
let sttttsmodels = null;
|
|
16
|
+
|
|
17
|
+
try { serverTTS = require('webtalk/server-tts'); } catch(e) { console.warn('[TTS] webtalk/server-tts unavailable:', e.message); }
|
|
12
18
|
try { serverSTT = require('webtalk/server-stt'); } catch(e) { console.warn('[STT] webtalk/server-stt unavailable:', e.message); }
|
|
19
|
+
try { audioDecode = require('audio-decode'); } catch(e) { console.warn('[TTS] audio-decode unavailable:', e.message); }
|
|
20
|
+
try { sttttsmodels = require('sttttsmodels'); } catch(e) { console.warn('[TTS] sttttsmodels unavailable:', e.message); }
|
|
21
|
+
|
|
22
|
+
// Detect webtalk API type: old (server-tts.js with getVoices/synthesizeViaPocket)
|
|
23
|
+
// vs new ONNX (server-tts-onnx.js with encodeVoiceAudio)
|
|
24
|
+
const isOnnxApi = serverTTS && typeof serverTTS.encodeVoiceAudio === 'function';
|
|
25
|
+
const isPocketApi = serverTTS && typeof serverTTS.getVoices === 'function';
|
|
26
|
+
|
|
27
|
+
// Voice directories to scan
|
|
28
|
+
const VOICE_DIRS = [
|
|
29
|
+
path.join(os.homedir(), 'voices'),
|
|
30
|
+
path.join(ROOT, 'voices'),
|
|
31
|
+
'/config/voices',
|
|
32
|
+
];
|
|
13
33
|
|
|
14
|
-
const
|
|
15
|
-
const POCKET_PORT = 8787;
|
|
34
|
+
const AUDIO_EXTENSIONS = ['.wav', '.mp3', '.ogg', '.flac', '.m4a'];
|
|
16
35
|
|
|
17
36
|
const POCKET_TTS_VOICES = [
|
|
18
37
|
{ id: 'default', name: 'Default', gender: 'female', accent: 'French' },
|
|
@@ -26,65 +45,146 @@ const POCKET_TTS_VOICES = [
|
|
|
26
45
|
{ id: 'azelma', name: 'Azelma', gender: 'female', accent: 'French' },
|
|
27
46
|
];
|
|
28
47
|
|
|
29
|
-
const
|
|
48
|
+
const SAMPLE_RATE = 24000;
|
|
30
49
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
50
|
+
// Embedding cache: voiceId -> {data, shape}
|
|
51
|
+
const voiceEmbeddingCache = new Map();
|
|
52
|
+
|
|
53
|
+
function getModelDir() {
|
|
54
|
+
if (sttttsmodels && sttttsmodels.ttsDir && fs.existsSync(sttttsmodels.ttsDir)) {
|
|
55
|
+
return sttttsmodels.ttsDir;
|
|
34
56
|
}
|
|
35
|
-
|
|
57
|
+
// Fallback to persistent cache dir
|
|
58
|
+
return path.join(os.homedir(), '.gmgui', 'models', 'tts');
|
|
36
59
|
}
|
|
37
60
|
|
|
38
61
|
function findVoiceFile(voiceId) {
|
|
62
|
+
if (!voiceId || voiceId === 'default') return null;
|
|
63
|
+
const baseName = voiceId.replace(/^custom_/, '');
|
|
39
64
|
for (const dir of VOICE_DIRS) {
|
|
40
|
-
const
|
|
41
|
-
|
|
65
|
+
for (const ext of AUDIO_EXTENSIONS) {
|
|
66
|
+
const p = path.join(dir, baseName + ext);
|
|
67
|
+
if (fs.existsSync(p)) return p;
|
|
68
|
+
}
|
|
42
69
|
}
|
|
43
70
|
return null;
|
|
44
71
|
}
|
|
45
72
|
|
|
46
|
-
function
|
|
47
|
-
const
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
73
|
+
function scanVoiceDir(dir) {
|
|
74
|
+
const voices = [];
|
|
75
|
+
try {
|
|
76
|
+
if (!fs.existsSync(dir)) return voices;
|
|
77
|
+
const seen = new Set();
|
|
78
|
+
for (const file of fs.readdirSync(dir)) {
|
|
79
|
+
const ext = path.extname(file).toLowerCase();
|
|
80
|
+
if (!AUDIO_EXTENSIONS.includes(ext)) continue;
|
|
81
|
+
const baseName = path.basename(file, ext);
|
|
82
|
+
if (seen.has(baseName)) continue;
|
|
83
|
+
seen.add(baseName);
|
|
84
|
+
voices.push({
|
|
85
|
+
id: 'custom_' + baseName.replace(/[^a-zA-Z0-9_-]/g, '_'),
|
|
86
|
+
name: baseName.replace(/_/g, ' '),
|
|
87
|
+
gender: 'custom', accent: 'custom', isCustom: true,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
} catch (_) {}
|
|
91
|
+
return voices;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Encode a voice WAV file to an ONNX voice embedding
|
|
95
|
+
async function getVoiceEmbedding(voiceId) {
|
|
96
|
+
if (voiceEmbeddingCache.has(voiceId)) return voiceEmbeddingCache.get(voiceId);
|
|
97
|
+
const voicePath = findVoiceFile(voiceId);
|
|
98
|
+
if (!voicePath) return null;
|
|
99
|
+
if (!audioDecode || !serverTTS || !isOnnxApi) return null;
|
|
100
|
+
|
|
101
|
+
const raw = fs.readFileSync(voicePath);
|
|
102
|
+
const decoded = await audioDecode.default(raw);
|
|
103
|
+
// Get mono float32 PCM, resample to 24kHz if needed
|
|
104
|
+
let pcm = decoded.getChannelData(0);
|
|
105
|
+
if (decoded.sampleRate !== SAMPLE_RATE) {
|
|
106
|
+
// Simple linear resampling
|
|
107
|
+
const ratio = decoded.sampleRate / SAMPLE_RATE;
|
|
108
|
+
const outLen = Math.floor(pcm.length / ratio);
|
|
109
|
+
const resampled = new Float32Array(outLen);
|
|
110
|
+
for (let i = 0; i < outLen; i++) resampled[i] = pcm[Math.floor(i * ratio)];
|
|
111
|
+
pcm = resampled;
|
|
60
112
|
}
|
|
61
|
-
|
|
62
|
-
const
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
113
|
+
|
|
114
|
+
const embedding = await serverTTS.encodeVoiceAudio(pcm);
|
|
115
|
+
voiceEmbeddingCache.set(voiceId, embedding);
|
|
116
|
+
return embedding;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Convert Float32Array PCM to WAV buffer
|
|
120
|
+
function pcmToWav(samples, sampleRate = SAMPLE_RATE) {
|
|
121
|
+
const numSamples = samples.length;
|
|
122
|
+
const numChannels = 1;
|
|
123
|
+
const bitsPerSample = 16;
|
|
124
|
+
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
|
125
|
+
const blockAlign = numChannels * bitsPerSample / 8;
|
|
126
|
+
const dataSize = numSamples * blockAlign;
|
|
127
|
+
const buf = Buffer.alloc(44 + dataSize);
|
|
128
|
+
|
|
129
|
+
buf.write('RIFF', 0); buf.writeUInt32LE(36 + dataSize, 4);
|
|
130
|
+
buf.write('WAVE', 8); buf.write('fmt ', 12);
|
|
131
|
+
buf.writeUInt32LE(16, 16); buf.writeUInt16LE(1, 20);
|
|
132
|
+
buf.writeUInt16LE(numChannels, 22); buf.writeUInt32LE(sampleRate, 24);
|
|
133
|
+
buf.writeUInt32LE(byteRate, 28); buf.writeUInt16LE(blockAlign, 32);
|
|
134
|
+
buf.writeUInt16LE(bitsPerSample, 34); buf.write('data', 36);
|
|
135
|
+
buf.writeUInt32LE(dataSize, 40);
|
|
136
|
+
|
|
137
|
+
for (let i = 0; i < numSamples; i++) {
|
|
138
|
+
const s = Math.max(-1, Math.min(1, samples[i]));
|
|
139
|
+
buf.writeInt16LE(Math.round(s * 32767), 44 + i * 2);
|
|
140
|
+
}
|
|
141
|
+
return buf;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function getSttOptions() {
|
|
145
|
+
if (process.env.PORTABLE_EXE_DIR) {
|
|
146
|
+
return { cacheDir: path.join(process.env.PORTABLE_EXE_DIR, 'models') };
|
|
147
|
+
}
|
|
148
|
+
if (process.env.PORTABLE_DATA_DIR) {
|
|
149
|
+
return { cacheDir: path.join(process.env.PORTABLE_DATA_DIR, 'models') };
|
|
150
|
+
}
|
|
151
|
+
return {};
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
async function synthesize(text, voiceId) {
|
|
155
|
+
if (isOnnxApi) {
|
|
156
|
+
// Node.js ONNX TTS - no Python required
|
|
157
|
+
const modelDir = getModelDir();
|
|
158
|
+
const embedding = voiceId ? await getVoiceEmbedding(voiceId) : null;
|
|
159
|
+
const pcm = await serverTTS.synthesize(text, embedding, modelDir);
|
|
160
|
+
return pcmToWav(pcm);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if (isPocketApi) {
|
|
164
|
+
// Old server-tts.js with pocket-tts sidecar
|
|
165
|
+
return serverTTS.synthesize(text, voiceId, VOICE_DIRS);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
throw new Error('No TTS backend available');
|
|
84
169
|
}
|
|
85
170
|
|
|
86
171
|
async function* synthesizeStream(text, voiceId) {
|
|
87
|
-
|
|
172
|
+
if (isOnnxApi) {
|
|
173
|
+
const modelDir = getModelDir();
|
|
174
|
+
const embedding = voiceId ? await getVoiceEmbedding(voiceId) : null;
|
|
175
|
+
const pcm = await serverTTS.synthesize(text, embedding, modelDir);
|
|
176
|
+
yield pcmToWav(pcm);
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (isPocketApi) {
|
|
181
|
+
for await (const chunk of serverTTS.synthesizeStream(text, voiceId, VOICE_DIRS)) {
|
|
182
|
+
yield chunk;
|
|
183
|
+
}
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
throw new Error('No TTS backend available');
|
|
88
188
|
}
|
|
89
189
|
|
|
90
190
|
function transcribe(audioBuffer) {
|
|
@@ -98,29 +198,61 @@ function getSTT() {
|
|
|
98
198
|
}
|
|
99
199
|
|
|
100
200
|
function getVoices() {
|
|
101
|
-
|
|
201
|
+
const seen = new Set();
|
|
202
|
+
const custom = [];
|
|
203
|
+
for (const dir of VOICE_DIRS) {
|
|
204
|
+
for (const v of scanVoiceDir(dir)) {
|
|
205
|
+
if (seen.has(v.id)) continue;
|
|
206
|
+
seen.add(v.id);
|
|
207
|
+
custom.push(v);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
// Include built-in voices from old server-tts if available
|
|
211
|
+
if (isPocketApi) {
|
|
212
|
+
const upstream = serverTTS.getVoices(VOICE_DIRS).filter(v => v.isCustom);
|
|
213
|
+
for (const v of upstream) {
|
|
214
|
+
if (!seen.has(v.id)) { seen.add(v.id); custom.push(v); }
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return [...POCKET_TTS_VOICES, ...custom];
|
|
102
218
|
}
|
|
103
219
|
|
|
104
220
|
function getStatus() {
|
|
105
221
|
const sttStatus = serverSTT ? serverSTT.getStatus() : { ready: false, loading: false, error: 'STT unavailable' };
|
|
222
|
+
const ttsBackend = isOnnxApi ? 'onnx-node' : isPocketApi ? 'pocket-tts' : 'none';
|
|
106
223
|
return {
|
|
107
224
|
sttReady: sttStatus.ready,
|
|
108
|
-
ttsReady:
|
|
225
|
+
ttsReady: isOnnxApi || isPocketApi,
|
|
109
226
|
sttLoading: sttStatus.loading,
|
|
110
227
|
ttsLoading: false,
|
|
111
228
|
sttError: sttStatus.error,
|
|
112
|
-
ttsError: null,
|
|
229
|
+
ttsError: (!isOnnxApi && !isPocketApi) ? 'No TTS backend available' : null,
|
|
230
|
+
ttsBackend,
|
|
113
231
|
};
|
|
114
232
|
}
|
|
115
233
|
|
|
116
234
|
function preloadTTS() {
|
|
117
|
-
|
|
235
|
+
if (isOnnxApi) {
|
|
236
|
+
// Pre-load ONNX models in background
|
|
237
|
+
const modelDir = getModelDir();
|
|
238
|
+
if (serverTTS.loadModels) {
|
|
239
|
+
serverTTS.loadModels(modelDir).catch(e => console.warn('[TTS] ONNX preload failed:', e.message));
|
|
240
|
+
}
|
|
241
|
+
} else if (isPocketApi && serverTTS.preload) {
|
|
242
|
+
serverTTS.preload(null, {});
|
|
243
|
+
}
|
|
118
244
|
}
|
|
119
245
|
|
|
120
|
-
function ttsCacheKey(text, voiceId) {
|
|
121
|
-
|
|
246
|
+
function ttsCacheKey(text, voiceId) {
|
|
247
|
+
return isPocketApi && serverTTS.ttsCacheKey ? serverTTS.ttsCacheKey(text, voiceId) : null;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
function ttsCacheGet(key) {
|
|
251
|
+
return isPocketApi && serverTTS.ttsCacheGet ? serverTTS.ttsCacheGet(key) : null;
|
|
252
|
+
}
|
|
122
253
|
|
|
123
254
|
function splitSentences(text) {
|
|
255
|
+
if (isPocketApi && serverTTS.splitSentences) return serverTTS.splitSentences(text);
|
|
124
256
|
return text.match(/[^.!?]+[.!?]*/g)?.map(s => s.trim()).filter(Boolean) || [text];
|
|
125
257
|
}
|
|
126
258
|
|
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -75,9 +75,11 @@ async function ensureModelsDownloaded() {
|
|
|
75
75
|
const { createRequire: cr } = await import('module');
|
|
76
76
|
const r = cr(import.meta.url);
|
|
77
77
|
|
|
78
|
+
const bundledModels = process.env.PORTABLE_EXE_DIR ? path.join(process.env.PORTABLE_EXE_DIR, 'models') : null;
|
|
78
79
|
const gmguiModels = path.join(os.homedir(), '.gmgui', 'models');
|
|
79
|
-
const
|
|
80
|
-
const
|
|
80
|
+
const modelsBase = (bundledModels && fs.existsSync(path.join(bundledModels, 'onnx-community'))) ? bundledModels : gmguiModels;
|
|
81
|
+
const sttDir = path.join(modelsBase, 'onnx-community', 'whisper-base');
|
|
82
|
+
const ttsDir = path.join(modelsBase, 'tts');
|
|
81
83
|
|
|
82
84
|
const sttOk = fs.existsSync(sttDir) && fs.readdirSync(sttDir).length > 0;
|
|
83
85
|
const ttsOk = fs.existsSync(ttsDir) && fs.readdirSync(ttsDir).length > 0;
|