agentgui 1.0.301 → 1.0.303

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/speech.js CHANGED
@@ -1,278 +1,4 @@
1
1
  import { createRequire } from 'module';
2
- import fs from 'fs';
3
- import path from 'path';
4
- import os from 'os';
5
- import { fileURLToPath } from 'url';
6
-
7
2
  const require = createRequire(import.meta.url);
8
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
9
- const ROOT = path.dirname(__dirname);
10
-
11
- // Load modules
12
- let serverTTS = null;
13
- let serverSTT = null;
14
- let audioDecode = null;
15
- let ttsUtils = null;
16
-
17
- try { serverTTS = require('webtalk/server-tts'); } catch(e) { console.warn('[TTS] webtalk/server-tts unavailable:', e.message); }
18
- try { serverSTT = require('webtalk/server-stt'); } catch(e) { console.warn('[STT] webtalk/server-stt unavailable:', e.message); }
19
- try { audioDecode = require('audio-decode'); } catch(e) { console.warn('[TTS] audio-decode unavailable:', e.message); }
20
- try { ttsUtils = require('webtalk/tts-utils'); } catch(e) {}
21
-
22
- // Detect webtalk API type: old (server-tts.js with getVoices/synthesizeViaPocket)
23
- // vs new ONNX (server-tts-onnx.js with encodeVoiceAudio)
24
- const isOnnxApi = serverTTS && typeof serverTTS.encodeVoiceAudio === 'function';
25
- const isPocketApi = serverTTS && typeof serverTTS.getVoices === 'function';
26
-
27
- // Voice directories to scan
28
- const VOICE_DIRS = [
29
- path.join(os.homedir(), 'voices'),
30
- path.join(ROOT, 'voices'),
31
- '/config/voices',
32
- ];
33
-
34
- const AUDIO_EXTENSIONS = ['.wav', '.mp3', '.ogg', '.flac', '.m4a'];
35
-
36
- const POCKET_TTS_VOICES = [
37
- { id: 'default', name: 'Default', gender: 'female', accent: 'French' },
38
- { id: 'alba', name: 'Alba', gender: 'female', accent: 'French' },
39
- { id: 'marius', name: 'Marius', gender: 'male', accent: 'French' },
40
- { id: 'javert', name: 'Javert', gender: 'male', accent: 'French' },
41
- { id: 'jean', name: 'Jean', gender: 'male', accent: 'French' },
42
- { id: 'fantine', name: 'Fantine', gender: 'female', accent: 'French' },
43
- { id: 'cosette', name: 'Cosette', gender: 'female', accent: 'French' },
44
- { id: 'eponine', name: 'Eponine', gender: 'female', accent: 'French' },
45
- { id: 'azelma', name: 'Azelma', gender: 'female', accent: 'French' },
46
- ];
47
-
48
- const SAMPLE_RATE = 24000;
49
-
50
- // Embedding cache: voiceId -> {data, shape}
51
- const voiceEmbeddingCache = new Map();
52
-
53
- function getModelDir() {
54
- return path.join(os.homedir(), '.gmgui', 'models', 'tts');
55
- }
56
-
57
- function findVoiceFile(voiceId) {
58
- if (!voiceId || voiceId === 'default') return null;
59
- const baseName = voiceId.replace(/^custom_/, '');
60
- for (const dir of VOICE_DIRS) {
61
- for (const ext of AUDIO_EXTENSIONS) {
62
- const p = path.join(dir, baseName + ext);
63
- if (fs.existsSync(p)) return p;
64
- }
65
- }
66
- return null;
67
- }
68
-
69
- function scanVoiceDir(dir) {
70
- const voices = [];
71
- try {
72
- if (!fs.existsSync(dir)) return voices;
73
- const seen = new Set();
74
- for (const file of fs.readdirSync(dir)) {
75
- const ext = path.extname(file).toLowerCase();
76
- if (!AUDIO_EXTENSIONS.includes(ext)) continue;
77
- const baseName = path.basename(file, ext);
78
- if (seen.has(baseName)) continue;
79
- seen.add(baseName);
80
- voices.push({
81
- id: 'custom_' + baseName.replace(/[^a-zA-Z0-9_-]/g, '_'),
82
- name: baseName.replace(/_/g, ' '),
83
- gender: 'custom', accent: 'custom', isCustom: true,
84
- });
85
- }
86
- } catch (_) {}
87
- return voices;
88
- }
89
-
90
- // Encode a voice WAV file to an ONNX voice embedding
91
- async function getVoiceEmbedding(voiceId) {
92
- if (voiceEmbeddingCache.has(voiceId)) return voiceEmbeddingCache.get(voiceId);
93
- const voicePath = findVoiceFile(voiceId);
94
- if (!voicePath) return null;
95
- if (!audioDecode || !serverTTS || !isOnnxApi) return null;
96
-
97
- const modelDir = getModelDir();
98
- if (serverTTS.loadModels) await serverTTS.loadModels(modelDir);
99
-
100
- const raw = fs.readFileSync(voicePath);
101
- const decoded = await audioDecode.default(raw);
102
- let pcm = decoded.getChannelData(0);
103
- if (decoded.sampleRate !== SAMPLE_RATE) {
104
- pcm = ttsUtils ? ttsUtils.resample(pcm, decoded.sampleRate, SAMPLE_RATE)
105
- : (() => {
106
- const ratio = decoded.sampleRate / SAMPLE_RATE;
107
- const out = new Float32Array(Math.round(pcm.length / ratio));
108
- for (let i = 0; i < out.length; i++) out[i] = pcm[Math.floor(i * ratio)];
109
- return out;
110
- })();
111
- }
112
-
113
- const embedding = await serverTTS.encodeVoiceAudio(pcm);
114
- voiceEmbeddingCache.set(voiceId, embedding);
115
- return embedding;
116
- }
117
-
118
- // Convert Float32Array PCM to WAV buffer
119
- function pcmToWav(samples, sampleRate = SAMPLE_RATE) {
120
- const numSamples = samples.length;
121
- const numChannels = 1;
122
- const bitsPerSample = 16;
123
- const byteRate = sampleRate * numChannels * bitsPerSample / 8;
124
- const blockAlign = numChannels * bitsPerSample / 8;
125
- const dataSize = numSamples * blockAlign;
126
- const buf = Buffer.alloc(44 + dataSize);
127
-
128
- buf.write('RIFF', 0); buf.writeUInt32LE(36 + dataSize, 4);
129
- buf.write('WAVE', 8); buf.write('fmt ', 12);
130
- buf.writeUInt32LE(16, 16); buf.writeUInt16LE(1, 20);
131
- buf.writeUInt16LE(numChannels, 22); buf.writeUInt32LE(sampleRate, 24);
132
- buf.writeUInt32LE(byteRate, 28); buf.writeUInt16LE(blockAlign, 32);
133
- buf.writeUInt16LE(bitsPerSample, 34); buf.write('data', 36);
134
- buf.writeUInt32LE(dataSize, 40);
135
-
136
- for (let i = 0; i < numSamples; i++) {
137
- const s = Math.max(-1, Math.min(1, samples[i]));
138
- buf.writeInt16LE(Math.round(s * 32767), 44 + i * 2);
139
- }
140
- return buf;
141
- }
142
-
143
- function getSttOptions() {
144
- if (process.env.PORTABLE_EXE_DIR) {
145
- return { cacheDir: path.join(process.env.PORTABLE_EXE_DIR, 'models') };
146
- }
147
- if (process.env.PORTABLE_DATA_DIR) {
148
- return { cacheDir: path.join(process.env.PORTABLE_DATA_DIR, 'models') };
149
- }
150
- return {};
151
- }
152
-
153
- async function getEmbeddingForVoice(voiceId) {
154
- if (voiceId && voiceId !== 'default') {
155
- const emb = await getVoiceEmbedding(voiceId);
156
- if (emb) return emb;
157
- }
158
- // Fall back to first available voice file
159
- for (const dir of VOICE_DIRS) {
160
- for (const ext of AUDIO_EXTENSIONS) {
161
- const entries = fs.existsSync(dir) ? fs.readdirSync(dir).filter(f => f.endsWith(ext)) : [];
162
- if (entries.length) {
163
- const emb = await getVoiceEmbedding('custom_' + entries[0].replace(new RegExp(`\\${ext}$`), ''));
164
- if (emb) return emb;
165
- }
166
- }
167
- }
168
- return null;
169
- }
170
-
171
- async function synthesize(text, voiceId) {
172
- if (isOnnxApi) {
173
- // Node.js ONNX TTS - no Python required
174
- const modelDir = getModelDir();
175
- const embedding = await getEmbeddingForVoice(voiceId);
176
- if (!embedding) throw new Error('No voice file available for TTS - add a WAV file to ~/voices/');
177
- const pcm = await serverTTS.synthesize(text, embedding, modelDir);
178
- return pcmToWav(pcm);
179
- }
180
-
181
- if (isPocketApi) {
182
- // Old server-tts.js with pocket-tts sidecar
183
- return serverTTS.synthesize(text, voiceId, VOICE_DIRS);
184
- }
185
-
186
- throw new Error('No TTS backend available');
187
- }
188
-
189
- async function* synthesizeStream(text, voiceId) {
190
- if (isOnnxApi) {
191
- const modelDir = getModelDir();
192
- const embedding = await getEmbeddingForVoice(voiceId);
193
- if (!embedding) throw new Error('No voice file available for TTS - add a WAV file to ~/voices/');
194
- const pcm = await serverTTS.synthesize(text, embedding, modelDir);
195
- yield pcmToWav(pcm);
196
- return;
197
- }
198
-
199
- if (isPocketApi) {
200
- for await (const chunk of serverTTS.synthesizeStream(text, voiceId, VOICE_DIRS)) {
201
- yield chunk;
202
- }
203
- return;
204
- }
205
-
206
- throw new Error('No TTS backend available');
207
- }
208
-
209
- function transcribe(audioBuffer) {
210
- if (!serverSTT) throw new Error('STT not available');
211
- return serverSTT.transcribe(audioBuffer, getSttOptions());
212
- }
213
-
214
- function getSTT() {
215
- if (!serverSTT) throw new Error('STT not available');
216
- return serverSTT.getSTT(getSttOptions());
217
- }
218
-
219
- function getVoices() {
220
- const seen = new Set();
221
- const custom = [];
222
- for (const dir of VOICE_DIRS) {
223
- for (const v of scanVoiceDir(dir)) {
224
- if (seen.has(v.id)) continue;
225
- seen.add(v.id);
226
- custom.push(v);
227
- }
228
- }
229
- // Include built-in voices from old server-tts if available
230
- if (isPocketApi) {
231
- const upstream = serverTTS.getVoices(VOICE_DIRS).filter(v => v.isCustom);
232
- for (const v of upstream) {
233
- if (!seen.has(v.id)) { seen.add(v.id); custom.push(v); }
234
- }
235
- }
236
- return [...POCKET_TTS_VOICES, ...custom];
237
- }
238
-
239
- function getStatus() {
240
- const sttStatus = serverSTT ? serverSTT.getStatus() : { ready: false, loading: false, error: 'STT unavailable' };
241
- const ttsBackend = isOnnxApi ? 'onnx-node' : isPocketApi ? 'pocket-tts' : 'none';
242
- return {
243
- sttReady: sttStatus.ready,
244
- ttsReady: isOnnxApi || isPocketApi,
245
- sttLoading: sttStatus.loading,
246
- ttsLoading: false,
247
- sttError: sttStatus.error,
248
- ttsError: (!isOnnxApi && !isPocketApi) ? 'No TTS backend available' : null,
249
- ttsBackend,
250
- };
251
- }
252
-
253
- function preloadTTS() {
254
- if (isOnnxApi) {
255
- // Pre-load ONNX models in background
256
- const modelDir = getModelDir();
257
- if (serverTTS.loadModels) {
258
- serverTTS.loadModels(modelDir).catch(e => console.warn('[TTS] ONNX preload failed:', e.message));
259
- }
260
- } else if (isPocketApi && serverTTS.preload) {
261
- serverTTS.preload(null, {});
262
- }
263
- }
264
-
265
- function ttsCacheKey(text, voiceId) {
266
- return isPocketApi && serverTTS.ttsCacheKey ? serverTTS.ttsCacheKey(text, voiceId) : null;
267
- }
268
-
269
- function ttsCacheGet(key) {
270
- return isPocketApi && serverTTS.ttsCacheGet ? serverTTS.ttsCacheGet(key) : null;
271
- }
272
-
273
- function splitSentences(text) {
274
- if (isPocketApi && serverTTS.splitSentences) return serverTTS.splitSentences(text);
275
- return text.match(/[^.!?]+[.!?]*/g)?.map(s => s.trim()).filter(Boolean) || [text];
276
- }
277
-
278
- export { transcribe, synthesize, synthesizeStream, getSTT, getStatus, getVoices, preloadTTS, ttsCacheKey, ttsCacheGet, splitSentences };
3
+ const speech = require('webtalk/speech');
4
+ export const { transcribe, synthesize, synthesizeStream, getSTT, getStatus, getVoices, preloadTTS, ttsCacheKey, ttsCacheGet, splitSentences } = speech;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentgui",
3
- "version": "1.0.301",
3
+ "version": "1.0.303",
4
4
  "description": "Multi-agent ACP client with real-time communication",
5
5
  "type": "module",
6
6
  "main": "server.js",
@@ -30,7 +30,7 @@
30
30
  "fsbrowse": "^0.2.18",
31
31
  "google-auth-library": "^10.5.0",
32
32
  "onnxruntime-node": "^1.24.1",
33
- "webtalk": "^1.0.17",
33
+ "webtalk": "file:../webtalk",
34
34
  "ws": "^8.14.2"
35
35
  },
36
36
  "overrides": {
package/static/index.html CHANGED
@@ -2257,35 +2257,28 @@
2257
2257
  }
2258
2258
 
2259
2259
  /* --- Inline Tool Result (nested inside tool_use) --- */
2260
- .tool-result-inline {
2261
- border-top: 1px solid #bbf7d0;
2262
- overflow: hidden;
2260
+ /* Tool Result Blocks - Folded by default with status icons */
2261
+ .folded-tool.block-type-tool_result {
2262
+ margin-bottom: 0.375rem;
2263
2263
  }
2264
- html.dark .tool-result-inline { border-top-color: #166534; }
2265
- .tool-result-status {
2266
- display: flex;
2267
- align-items: center;
2268
- gap: 0.375rem;
2269
- padding: 0.3rem 0.625rem;
2270
- font-size: 0.75rem;
2271
- line-height: 1.3;
2272
- cursor: default;
2273
- user-select: none;
2264
+ .folded-tool.folded-tool-success {
2265
+ background: #f0fdf4;
2266
+ }
2267
+ html.dark .folded-tool.folded-tool-success {
2268
+ background: #0a1f0f;
2269
+ }
2270
+ .folded-tool.folded-tool-success > .folded-tool-bar {
2271
+ background: #dcfce7;
2272
+ }
2273
+ html.dark .folded-tool.folded-tool-success > .folded-tool-bar {
2274
+ background: #0f2b1a;
2275
+ }
2276
+ .folded-tool.folded-tool-success > .folded-tool-bar::before {
2277
+ color: #16a34a;
2278
+ }
2279
+ html.dark .folded-tool.folded-tool-success > .folded-tool-bar::before {
2280
+ color: #4ade80;
2274
2281
  }
2275
- .tool-result-status::-webkit-details-marker { display: none; }
2276
- .tool-result-status::marker { display: none; content: ''; }
2277
- .tool-result-inline > .folded-tool-body { border-top: 1px solid #bbf7d0; }
2278
- html.dark .tool-result-inline > .folded-tool-body { border-top-color: #166534; }
2279
- .tool-result-error { background: #fef2f2; border-top-color: #fecaca; }
2280
- html.dark .tool-result-error { background: #1f0a0a; border-top-color: #991b1b; }
2281
- .tool-result-error .folded-tool-icon { color: #dc2626; }
2282
- html.dark .tool-result-error .folded-tool-icon { color: #f87171; }
2283
- .tool-result-error .folded-tool-name { color: #991b1b; }
2284
- html.dark .tool-result-error .folded-tool-name { color: #fca5a5; }
2285
- .tool-result-error .folded-tool-desc { color: #b91c1c; }
2286
- html.dark .tool-result-error .folded-tool-desc { color: #f87171; }
2287
- .tool-result-error > .folded-tool-body { border-top-color: #fecaca; }
2288
- html.dark .tool-result-error > .folded-tool-body { border-top-color: #991b1b; }
2289
2282
 
2290
2283
  /* --- Consecutive Block Joining --- */
2291
2284
  .streaming-blocks > * + *,
@@ -1210,35 +1210,31 @@ class StreamingRenderer {
1210
1210
  const isError = block.is_error || false;
1211
1211
  const content = block.content || '';
1212
1212
  const contentStr = typeof content === 'string' ? content : JSON.stringify(content, null, 2);
1213
- const parentIsOpen = context.parentIsOpen !== undefined ? context.parentIsOpen : true;
1214
1213
 
1215
- const wrapper = document.createElement('div');
1216
- wrapper.className = 'tool-result-inline' + (isError ? ' tool-result-error' : ' tool-result-success');
1217
- wrapper.dataset.eventType = 'tool_result';
1218
- if (block.tool_use_id) wrapper.dataset.toolUseId = block.tool_use_id;
1219
- wrapper.classList.add(this._getBlockTypeClass('tool_result'));
1214
+ const details = document.createElement('details');
1215
+ details.className = 'folded-tool' + (isError ? ' folded-tool-error' : ' folded-tool-success');
1216
+ details.dataset.eventType = 'tool_result';
1217
+ if (block.tool_use_id) details.dataset.toolUseId = block.tool_use_id;
1218
+ details.classList.add(this._getBlockTypeClass('tool_result'));
1220
1219
 
1221
- const header = document.createElement('div');
1222
- header.className = 'tool-result-status';
1220
+ const summary = document.createElement('summary');
1221
+ summary.className = 'folded-tool-bar';
1223
1222
  const iconSvg = isError
1224
1223
  ? '<svg viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zM8.707 7.293a1 1 0 00-1.414 1.414L8.586 10l-1.293 1.293a1 1 0 101.414 1.414L10 11.414l1.293 1.293a1 1 0 001.414-1.414L11.414 10l1.293-1.293a1 1 0 00-1.414-1.414L10 8.586 8.707 7.293z" clip-rule="evenodd"/></svg>'
1225
1224
  : '<svg viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zm3.707-9.293a1 1 0 00-1.414-1.414L9 10.586 7.707 9.293a1 1 0 00-1.414 1.414l2 2a1 1 0 001.414 0l4-4z" clip-rule="evenodd"/></svg>';
1226
- header.innerHTML = `
1225
+ summary.innerHTML = `
1227
1226
  <span class="folded-tool-icon">${iconSvg}</span>
1228
1227
  <span class="folded-tool-name">${isError ? 'Error' : 'Success'}</span>
1229
1228
  `;
1230
- wrapper.appendChild(header);
1229
+ details.appendChild(summary);
1231
1230
 
1232
1231
  const renderedContent = StreamingRenderer.renderSmartContentHTML(contentStr, this.escapeHtml.bind(this), true);
1233
1232
  const body = document.createElement('div');
1234
1233
  body.className = 'folded-tool-body';
1235
- if (!parentIsOpen) {
1236
- body.style.display = 'none';
1237
- }
1238
1234
  body.innerHTML = renderedContent;
1239
- wrapper.appendChild(body);
1235
+ details.appendChild(body);
1240
1236
 
1241
- return wrapper;
1237
+ return details;
1242
1238
  }
1243
1239
 
1244
1240
  /**