agentgui 1.0.290 → 1.0.292

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build-portable.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import fs from 'fs';
2
2
  import path from 'path';
3
+ import os from 'os';
3
4
  import { fileURLToPath } from 'url';
4
5
  import { execSync } from 'child_process';
5
6
 
@@ -51,7 +52,7 @@ fs.mkdirSync(out, { recursive: true });
51
52
 
52
53
  log('Compiling Windows executable...');
53
54
  execSync(
54
- `~/.bun/bin/bun build --compile --target=bun-windows-x64 --outfile=${path.join(out, 'agentgui.exe')} ${path.join(src, 'portable-entry.js')}`,
55
+ `"${path.join(os.homedir(), '.bun', 'bin', 'bun')}" build --compile --target=bun-windows-x64 --outfile="${path.join(out, 'agentgui.exe')}" "${path.join(src, 'portable-entry.js')}"`,
55
56
  { stdio: 'inherit', cwd: src }
56
57
  );
57
58
 
@@ -119,6 +120,15 @@ copyDir(path.join(claudeSrc, 'vendor', 'ripgrep', 'x64-win32'), path.join(claude
119
120
  log('Creating data directory...');
120
121
  fs.mkdirSync(path.join(out, 'data'), { recursive: true });
121
122
 
123
+ log('Bundling AI models...');
124
+ const userModels = path.join(os.homedir(), '.gmgui', 'models');
125
+ if (fs.existsSync(userModels)) {
126
+ copyDir(userModels, path.join(out, 'models'));
127
+ log(`Models bundled: ${Math.round(sizeOf(path.join(out, 'models')) / 1024 / 1024)}MB`);
128
+ } else {
129
+ log('WARNING: No models found at ~/.gmgui/models - portable build will download on first use');
130
+ }
131
+
122
132
  fs.writeFileSync(path.join(out, 'README.txt'), [
123
133
  '# AgentGUI Portable',
124
134
  '',
package/lib/speech.js CHANGED
@@ -1,18 +1,37 @@
1
1
  import { createRequire } from 'module';
2
2
  import fs from 'fs';
3
3
  import path from 'path';
4
- import http from 'http';
4
+ import os from 'os';
5
5
  import { fileURLToPath } from 'url';
6
6
 
7
7
  const require = createRequire(import.meta.url);
8
8
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
9
9
  const ROOT = path.dirname(__dirname);
10
10
 
11
+ // Load modules
12
+ let serverTTS = null;
11
13
  let serverSTT = null;
14
+ let audioDecode = null;
15
+ let sttttsmodels = null;
16
+
17
+ try { serverTTS = require('webtalk/server-tts'); } catch(e) { console.warn('[TTS] webtalk/server-tts unavailable:', e.message); }
12
18
  try { serverSTT = require('webtalk/server-stt'); } catch(e) { console.warn('[STT] webtalk/server-stt unavailable:', e.message); }
19
+ try { audioDecode = require('audio-decode'); } catch(e) { console.warn('[TTS] audio-decode unavailable:', e.message); }
20
+ try { sttttsmodels = require('sttttsmodels'); } catch(e) { console.warn('[TTS] sttttsmodels unavailable:', e.message); }
21
+
22
+ // Detect webtalk API type: old (server-tts.js with getVoices/synthesizeViaPocket)
23
+ // vs new ONNX (server-tts-onnx.js with encodeVoiceAudio)
24
+ const isOnnxApi = serverTTS && typeof serverTTS.encodeVoiceAudio === 'function';
25
+ const isPocketApi = serverTTS && typeof serverTTS.getVoices === 'function';
26
+
27
+ // Voice directories to scan
28
+ const VOICE_DIRS = [
29
+ path.join(os.homedir(), 'voices'),
30
+ path.join(ROOT, 'voices'),
31
+ '/config/voices',
32
+ ];
13
33
 
14
- const VOICE_DIRS = [path.join(ROOT, 'voices')];
15
- const POCKET_PORT = 8787;
34
+ const AUDIO_EXTENSIONS = ['.wav', '.mp3', '.ogg', '.flac', '.m4a'];
16
35
 
17
36
  const POCKET_TTS_VOICES = [
18
37
  { id: 'default', name: 'Default', gender: 'female', accent: 'French' },
@@ -26,65 +45,146 @@ const POCKET_TTS_VOICES = [
26
45
  { id: 'azelma', name: 'Azelma', gender: 'female', accent: 'French' },
27
46
  ];
28
47
 
29
- const PREDEFINED_IDS = new Set(POCKET_TTS_VOICES.filter(v => v.id !== 'default').map(v => v.id));
48
+ const SAMPLE_RATE = 24000;
30
49
 
31
- function getSttOptions() {
32
- if (process.env.PORTABLE_DATA_DIR) {
33
- return { cacheDir: path.join(process.env.PORTABLE_DATA_DIR, 'models') };
50
+ // Embedding cache: voiceId -> {data, shape}
51
+ const voiceEmbeddingCache = new Map();
52
+
53
+ function getModelDir() {
54
+ if (sttttsmodels && sttttsmodels.ttsDir && fs.existsSync(sttttsmodels.ttsDir)) {
55
+ return sttttsmodels.ttsDir;
34
56
  }
35
- return {};
57
+ // Fallback to persistent cache dir
58
+ return path.join(os.homedir(), '.gmgui', 'models', 'tts');
36
59
  }
37
60
 
38
61
  function findVoiceFile(voiceId) {
62
+ if (!voiceId || voiceId === 'default') return null;
63
+ const baseName = voiceId.replace(/^custom_/, '');
39
64
  for (const dir of VOICE_DIRS) {
40
- const p = path.join(dir, `custom_${voiceId}.wav`);
41
- if (fs.existsSync(p)) return p;
65
+ for (const ext of AUDIO_EXTENSIONS) {
66
+ const p = path.join(dir, baseName + ext);
67
+ if (fs.existsSync(p)) return p;
68
+ }
42
69
  }
43
70
  return null;
44
71
  }
45
72
 
46
- function synthesize(text, voiceId) {
47
- const voicePath = voiceId ? findVoiceFile(voiceId) : null;
48
- const isPredefined = voiceId && PREDEFINED_IDS.has(voiceId);
49
- const boundary = '----PocketTTS' + Date.now();
50
- const parts = [];
51
- parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="text"\r\n\r\n${text}\r\n`);
52
- if (voicePath) {
53
- const data = fs.readFileSync(voicePath);
54
- const name = path.basename(voicePath);
55
- parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="voice_wav"; filename="${name}"\r\nContent-Type: audio/wav\r\n\r\n`);
56
- parts.push(data);
57
- parts.push('\r\n');
58
- } else if (isPredefined) {
59
- parts.push(`--${boundary}\r\nContent-Disposition: form-data; name="voice_url"\r\n\r\n${voiceId}\r\n`);
73
+ function scanVoiceDir(dir) {
74
+ const voices = [];
75
+ try {
76
+ if (!fs.existsSync(dir)) return voices;
77
+ const seen = new Set();
78
+ for (const file of fs.readdirSync(dir)) {
79
+ const ext = path.extname(file).toLowerCase();
80
+ if (!AUDIO_EXTENSIONS.includes(ext)) continue;
81
+ const baseName = path.basename(file, ext);
82
+ if (seen.has(baseName)) continue;
83
+ seen.add(baseName);
84
+ voices.push({
85
+ id: 'custom_' + baseName.replace(/[^a-zA-Z0-9_-]/g, '_'),
86
+ name: baseName.replace(/_/g, ' '),
87
+ gender: 'custom', accent: 'custom', isCustom: true,
88
+ });
89
+ }
90
+ } catch (_) {}
91
+ return voices;
92
+ }
93
+
94
+ // Encode a voice WAV file to an ONNX voice embedding
95
+ async function getVoiceEmbedding(voiceId) {
96
+ if (voiceEmbeddingCache.has(voiceId)) return voiceEmbeddingCache.get(voiceId);
97
+ const voicePath = findVoiceFile(voiceId);
98
+ if (!voicePath) return null;
99
+ if (!audioDecode || !serverTTS || !isOnnxApi) return null;
100
+
101
+ const raw = fs.readFileSync(voicePath);
102
+ const decoded = await audioDecode.default(raw);
103
+ // Get mono float32 PCM, resample to 24kHz if needed
104
+ let pcm = decoded.getChannelData(0);
105
+ if (decoded.sampleRate !== SAMPLE_RATE) {
106
+ // Simple linear resampling
107
+ const ratio = decoded.sampleRate / SAMPLE_RATE;
108
+ const outLen = Math.floor(pcm.length / ratio);
109
+ const resampled = new Float32Array(outLen);
110
+ for (let i = 0; i < outLen; i++) resampled[i] = pcm[Math.floor(i * ratio)];
111
+ pcm = resampled;
60
112
  }
61
- parts.push(`--${boundary}--\r\n`);
62
- const body = Buffer.concat(parts.map(p => Buffer.isBuffer(p) ? p : Buffer.from(p)));
63
- return new Promise((resolve, reject) => {
64
- const req = http.request({
65
- hostname: '127.0.0.1', port: POCKET_PORT, path: '/tts', method: 'POST',
66
- headers: { 'Content-Type': `multipart/form-data; boundary=${boundary}`, 'Content-Length': body.length },
67
- timeout: 60000,
68
- }, res => {
69
- if (res.statusCode !== 200) {
70
- let e = '';
71
- res.on('data', d => e += d);
72
- res.on('end', () => reject(new Error(`pocket-tts HTTP ${res.statusCode}: ${e}`)));
73
- return;
74
- }
75
- const chunks = [];
76
- res.on('data', d => chunks.push(d));
77
- res.on('end', () => resolve(Buffer.concat(chunks)));
78
- });
79
- req.on('error', reject);
80
- req.on('timeout', () => { req.destroy(); reject(new Error('pocket-tts timeout')); });
81
- req.write(body);
82
- req.end();
83
- });
113
+
114
+ const embedding = await serverTTS.encodeVoiceAudio(pcm);
115
+ voiceEmbeddingCache.set(voiceId, embedding);
116
+ return embedding;
117
+ }
118
+
119
+ // Convert Float32Array PCM to WAV buffer
120
+ function pcmToWav(samples, sampleRate = SAMPLE_RATE) {
121
+ const numSamples = samples.length;
122
+ const numChannels = 1;
123
+ const bitsPerSample = 16;
124
+ const byteRate = sampleRate * numChannels * bitsPerSample / 8;
125
+ const blockAlign = numChannels * bitsPerSample / 8;
126
+ const dataSize = numSamples * blockAlign;
127
+ const buf = Buffer.alloc(44 + dataSize);
128
+
129
+ buf.write('RIFF', 0); buf.writeUInt32LE(36 + dataSize, 4);
130
+ buf.write('WAVE', 8); buf.write('fmt ', 12);
131
+ buf.writeUInt32LE(16, 16); buf.writeUInt16LE(1, 20);
132
+ buf.writeUInt16LE(numChannels, 22); buf.writeUInt32LE(sampleRate, 24);
133
+ buf.writeUInt32LE(byteRate, 28); buf.writeUInt16LE(blockAlign, 32);
134
+ buf.writeUInt16LE(bitsPerSample, 34); buf.write('data', 36);
135
+ buf.writeUInt32LE(dataSize, 40);
136
+
137
+ for (let i = 0; i < numSamples; i++) {
138
+ const s = Math.max(-1, Math.min(1, samples[i]));
139
+ buf.writeInt16LE(Math.round(s * 32767), 44 + i * 2);
140
+ }
141
+ return buf;
142
+ }
143
+
144
+ function getSttOptions() {
145
+ if (process.env.PORTABLE_EXE_DIR) {
146
+ return { cacheDir: path.join(process.env.PORTABLE_EXE_DIR, 'models') };
147
+ }
148
+ if (process.env.PORTABLE_DATA_DIR) {
149
+ return { cacheDir: path.join(process.env.PORTABLE_DATA_DIR, 'models') };
150
+ }
151
+ return {};
152
+ }
153
+
154
+ async function synthesize(text, voiceId) {
155
+ if (isOnnxApi) {
156
+ // Node.js ONNX TTS - no Python required
157
+ const modelDir = getModelDir();
158
+ const embedding = voiceId ? await getVoiceEmbedding(voiceId) : null;
159
+ const pcm = await serverTTS.synthesize(text, embedding, modelDir);
160
+ return pcmToWav(pcm);
161
+ }
162
+
163
+ if (isPocketApi) {
164
+ // Old server-tts.js with pocket-tts sidecar
165
+ return serverTTS.synthesize(text, voiceId, VOICE_DIRS);
166
+ }
167
+
168
+ throw new Error('No TTS backend available');
84
169
  }
85
170
 
86
171
  async function* synthesizeStream(text, voiceId) {
87
- yield await synthesize(text, voiceId);
172
+ if (isOnnxApi) {
173
+ const modelDir = getModelDir();
174
+ const embedding = voiceId ? await getVoiceEmbedding(voiceId) : null;
175
+ const pcm = await serverTTS.synthesize(text, embedding, modelDir);
176
+ yield pcmToWav(pcm);
177
+ return;
178
+ }
179
+
180
+ if (isPocketApi) {
181
+ for await (const chunk of serverTTS.synthesizeStream(text, voiceId, VOICE_DIRS)) {
182
+ yield chunk;
183
+ }
184
+ return;
185
+ }
186
+
187
+ throw new Error('No TTS backend available');
88
188
  }
89
189
 
90
190
  function transcribe(audioBuffer) {
@@ -98,29 +198,61 @@ function getSTT() {
98
198
  }
99
199
 
100
200
  function getVoices() {
101
- return POCKET_TTS_VOICES;
201
+ const seen = new Set();
202
+ const custom = [];
203
+ for (const dir of VOICE_DIRS) {
204
+ for (const v of scanVoiceDir(dir)) {
205
+ if (seen.has(v.id)) continue;
206
+ seen.add(v.id);
207
+ custom.push(v);
208
+ }
209
+ }
210
+ // Include built-in voices from old server-tts if available
211
+ if (isPocketApi) {
212
+ const upstream = serverTTS.getVoices(VOICE_DIRS).filter(v => v.isCustom);
213
+ for (const v of upstream) {
214
+ if (!seen.has(v.id)) { seen.add(v.id); custom.push(v); }
215
+ }
216
+ }
217
+ return [...POCKET_TTS_VOICES, ...custom];
102
218
  }
103
219
 
104
220
  function getStatus() {
105
221
  const sttStatus = serverSTT ? serverSTT.getStatus() : { ready: false, loading: false, error: 'STT unavailable' };
222
+ const ttsBackend = isOnnxApi ? 'onnx-node' : isPocketApi ? 'pocket-tts' : 'none';
106
223
  return {
107
224
  sttReady: sttStatus.ready,
108
- ttsReady: true,
225
+ ttsReady: isOnnxApi || isPocketApi,
109
226
  sttLoading: sttStatus.loading,
110
227
  ttsLoading: false,
111
228
  sttError: sttStatus.error,
112
- ttsError: null,
229
+ ttsError: (!isOnnxApi && !isPocketApi) ? 'No TTS backend available' : null,
230
+ ttsBackend,
113
231
  };
114
232
  }
115
233
 
116
234
  function preloadTTS() {
117
- // pocket-tts is managed externally; nothing to preload
235
+ if (isOnnxApi) {
236
+ // Pre-load ONNX models in background
237
+ const modelDir = getModelDir();
238
+ if (serverTTS.loadModels) {
239
+ serverTTS.loadModels(modelDir).catch(e => console.warn('[TTS] ONNX preload failed:', e.message));
240
+ }
241
+ } else if (isPocketApi && serverTTS.preload) {
242
+ serverTTS.preload(null, {});
243
+ }
118
244
  }
119
245
 
120
- function ttsCacheKey(text, voiceId) { return null; }
121
- function ttsCacheGet(key) { return null; }
246
+ function ttsCacheKey(text, voiceId) {
247
+ return isPocketApi && serverTTS.ttsCacheKey ? serverTTS.ttsCacheKey(text, voiceId) : null;
248
+ }
249
+
250
+ function ttsCacheGet(key) {
251
+ return isPocketApi && serverTTS.ttsCacheGet ? serverTTS.ttsCacheGet(key) : null;
252
+ }
122
253
 
123
254
  function splitSentences(text) {
255
+ if (isPocketApi && serverTTS.splitSentences) return serverTTS.splitSentences(text);
124
256
  return text.match(/[^.!?]+[.!?]*/g)?.map(s => s.trim()).filter(Boolean) || [text];
125
257
  }
126
258
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.290",
3
+ "version": "1.0.292",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
package/server.js CHANGED
@@ -75,9 +75,11 @@ async function ensureModelsDownloaded() {
75
75
  const { createRequire: cr } = await import('module');
76
76
  const r = cr(import.meta.url);
77
77
 
78
+ const bundledModels = process.env.PORTABLE_EXE_DIR ? path.join(process.env.PORTABLE_EXE_DIR, 'models') : null;
78
79
  const gmguiModels = path.join(os.homedir(), '.gmgui', 'models');
79
- const sttDir = path.join(gmguiModels, 'onnx-community', 'whisper-base');
80
- const ttsDir = path.join(gmguiModels, 'tts');
80
+ const modelsBase = (bundledModels && fs.existsSync(path.join(bundledModels, 'onnx-community'))) ? bundledModels : gmguiModels;
81
+ const sttDir = path.join(modelsBase, 'onnx-community', 'whisper-base');
82
+ const ttsDir = path.join(modelsBase, 'tts');
81
83
 
82
84
  const sttOk = fs.existsSync(sttDir) && fs.readdirSync(sttDir).length > 0;
83
85
  const ttsOk = fs.existsSync(ttsDir) && fs.readdirSync(ttsDir).length > 0;