npm - agentgui - Versions diffs - 1.0.143 → 1.0.145 - Mend

agentgui 1.0.143 → 1.0.145

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/database.js +7 -0
package/lib/claude-runner.js +8 -0
package/lib/speech.js +192 -0
package/package.json +3 -1
package/server.js +122 -81
package/static/js/client.js +14 -2
package/static/js/streaming-renderer.js +22 -4
package/static/js/voice.js +124 -173

package/database.js CHANGED Viewed

@@ -510,6 +510,13 @@ export const queries = {
     return stmt.all(conversationId, status);
   },
+  getActiveSessions() {
+    const stmt = db.prepare(
+      "SELECT * FROM sessions WHERE status IN ('active', 'pending') ORDER BY started_at DESC"
+    );
+    return stmt.all();
+  },
   createEvent(type, data, conversationId = null, sessionId = null) {
     const id = generateId('evt');
     const now = Date.now();

package/lib/claude-runner.js CHANGED Viewed

@@ -52,6 +52,10 @@ class AgentRunner {
       const args = this.buildArgs(prompt, config);
       const proc = spawn(this.command, args, { cwd });
+      if (config.onPid) {
+        try { config.onPid(proc.pid); } catch (e) {}
+      }
       let jsonBuffer = '';
       const outputs = [];
       let timedOut = false;
@@ -150,6 +154,10 @@ class AgentRunner {
       const proc = spawn(cmd, args, { cwd });
+      if (config.onPid) {
+        try { config.onPid(proc.pid); } catch (e) {}
+      }
       const outputs = [];
       let timedOut = false;
       let sessionId = null;

package/lib/speech.js ADDED Viewed

@@ -0,0 +1,192 @@
+import { pipeline, env } from '@huggingface/transformers';
+import { createRequire } from 'module';
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+const require = createRequire(import.meta.url);
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ROOT = path.dirname(__dirname);
+const DATA_DIR = path.join(ROOT, 'data');
+const SPEAKER_EMBEDDINGS_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
+const SPEAKER_EMBEDDINGS_PATH = path.join(DATA_DIR, 'speaker_embeddings.bin');
+const SAMPLE_RATE_TTS = 16000;
+const SAMPLE_RATE_STT = 16000;
+let sttPipeline = null;
+let ttsPipeline = null;
+let speakerEmbeddings = null;
+let sttLoading = false;
+let ttsLoading = false;
+function whisperModelPath() {
+  try {
+    const webtalkDir = path.dirname(require.resolve('webtalk'));
+    const p = path.join(webtalkDir, 'models', 'onnx-community', 'whisper-base');
+    if (fs.existsSync(p)) return p;
+  } catch (_) {}
+  return 'onnx-community/whisper-base';
+}
+async function ensureSpeakerEmbeddings() {
+  if (speakerEmbeddings) return speakerEmbeddings;
+  if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
+  if (!fs.existsSync(SPEAKER_EMBEDDINGS_PATH)) {
+    const resp = await fetch(SPEAKER_EMBEDDINGS_URL);
+    if (!resp.ok) throw new Error('Failed to download speaker embeddings');
+    fs.writeFileSync(SPEAKER_EMBEDDINGS_PATH, Buffer.from(await resp.arrayBuffer()));
+  }
+  const buf = fs.readFileSync(SPEAKER_EMBEDDINGS_PATH);
+  speakerEmbeddings = new Float32Array(new Uint8Array(buf).buffer);
+  return speakerEmbeddings;
+}
+async function getSTT() {
+  if (sttPipeline) return sttPipeline;
+  if (sttLoading) {
+    while (sttLoading) await new Promise(r => setTimeout(r, 100));
+    return sttPipeline;
+  }
+  sttLoading = true;
+  try {
+    const modelPath = whisperModelPath();
+    const isLocal = !modelPath.includes('/') || fs.existsSync(modelPath);
+    env.allowLocalModels = true;
+    env.allowRemoteModels = !isLocal;
+    if (isLocal) env.localModelPath = '';
+    sttPipeline = await pipeline('automatic-speech-recognition', modelPath, {
+      device: 'cpu',
+      local_files_only: isLocal,
+    });
+    return sttPipeline;
+  } finally {
+    sttLoading = false;
+  }
+}
+async function getTTS() {
+  if (ttsPipeline) return ttsPipeline;
+  if (ttsLoading) {
+    while (ttsLoading) await new Promise(r => setTimeout(r, 100));
+    return ttsPipeline;
+  }
+  ttsLoading = true;
+  try {
+    env.allowRemoteModels = true;
+    ttsPipeline = await pipeline('text-to-speech', 'Xenova/speecht5_tts', {
+      device: 'cpu',
+      dtype: 'fp32',
+    });
+    await ensureSpeakerEmbeddings();
+    return ttsPipeline;
+  } finally {
+    ttsLoading = false;
+  }
+}
+function decodeWavToFloat32(buffer) {
+  const view = new DataView(buffer.buffer || buffer);
+  const riff = String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3));
+  if (riff !== 'RIFF') throw new Error('Not a WAV file');
+  const numChannels = view.getUint16(22, true);
+  const sampleRate = view.getUint32(24, true);
+  const bitsPerSample = view.getUint16(34, true);
+  let dataOffset = 44;
+  for (let i = 36; i < view.byteLength - 8; i++) {
+    if (view.getUint8(i) === 0x64 && view.getUint8(i+1) === 0x61 &&
+        view.getUint8(i+2) === 0x74 && view.getUint8(i+3) === 0x61) {
+      dataOffset = i + 8;
+      break;
+    }
+  }
+  const bytesPerSample = bitsPerSample / 8;
+  const numSamples = Math.floor((view.byteLength - dataOffset) / (bytesPerSample * numChannels));
+  const audio = new Float32Array(numSamples);
+  for (let i = 0; i < numSamples; i++) {
+    const offset = dataOffset + i * bytesPerSample * numChannels;
+    if (bitsPerSample === 16) {
+      audio[i] = view.getInt16(offset, true) / 32768;
+    } else if (bitsPerSample === 32) {
+      audio[i] = view.getFloat32(offset, true);
+    } else {
+      audio[i] = (view.getUint8(offset) - 128) / 128;
+    }
+  }
+  return { audio, sampleRate };
+}
+function resampleTo16k(audio, fromRate) {
+  if (fromRate === SAMPLE_RATE_STT) return audio;
+  const ratio = fromRate / SAMPLE_RATE_STT;
+  const newLen = Math.round(audio.length / ratio);
+  const result = new Float32Array(newLen);
+  for (let i = 0; i < newLen; i++) {
+    const srcIdx = i * ratio;
+    const lo = Math.floor(srcIdx);
+    const hi = Math.min(lo + 1, audio.length - 1);
+    const frac = srcIdx - lo;
+    result[i] = audio[lo] * (1 - frac) + audio[hi] * frac;
+  }
+  return result;
+}
+function encodeWav(float32Audio, sampleRate) {
+  const numSamples = float32Audio.length;
+  const bytesPerSample = 2;
+  const dataSize = numSamples * bytesPerSample;
+  const buffer = new ArrayBuffer(44 + dataSize);
+  const view = new DataView(buffer);
+  const writeStr = (off, str) => { for (let i = 0; i < str.length; i++) view.setUint8(off + i, str.charCodeAt(i)); };
+  writeStr(0, 'RIFF');
+  view.setUint32(4, 36 + dataSize, true);
+  writeStr(8, 'WAVE');
+  writeStr(12, 'fmt ');
+  view.setUint32(16, 16, true);
+  view.setUint16(20, 1, true);
+  view.setUint16(22, 1, true);
+  view.setUint32(24, sampleRate, true);
+  view.setUint32(28, sampleRate * bytesPerSample, true);
+  view.setUint16(32, bytesPerSample, true);
+  view.setUint16(34, 16, true);
+  writeStr(36, 'data');
+  view.setUint32(40, dataSize, true);
+  for (let i = 0; i < numSamples; i++) {
+    const s = Math.max(-1, Math.min(1, float32Audio[i]));
+    view.setInt16(44 + i * 2, s < 0 ? s * 32768 : s * 32767, true);
+  }
+  return Buffer.from(buffer);
+}
+async function transcribe(audioBuffer) {
+  const stt = await getSTT();
+  let audio;
+  const buf = Buffer.isBuffer(audioBuffer) ? audioBuffer : Buffer.from(audioBuffer);
+  const isWav = buf.length > 4 && buf.toString('ascii', 0, 4) === 'RIFF';
+  if (isWav) {
+    const decoded = decodeWavToFloat32(buf);
+    audio = resampleTo16k(decoded.audio, decoded.sampleRate);
+  } else {
+    audio = new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
+  }
+  const result = await stt(audio);
+  return result.text || '';
+}
+async function synthesize(text) {
+  const tts = await getTTS();
+  const embeddings = await ensureSpeakerEmbeddings();
+  const result = await tts(text, { speaker_embeddings: embeddings });
+  return encodeWav(result.audio, result.sampling_rate || SAMPLE_RATE_TTS);
+}
+function getStatus() {
+  return {
+    sttReady: !!sttPipeline,
+    ttsReady: !!ttsPipeline,
+    sttLoading,
+    ttsLoading,
+  };
+}
+export { transcribe, synthesize, getSTT, getTTS, getStatus };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentgui",
-  "version": "1.0.143",
+  "version": "1.0.145",
   "description": "Multi-agent ACP client with real-time communication",
   "type": "module",
   "main": "server.js",
@@ -22,10 +22,12 @@
   },
   "dependencies": {
     "@anthropic-ai/claude-code": "^2.1.37",
+    "@huggingface/transformers": "^3.8.1",
     "better-sqlite3": "^12.6.2",
     "busboy": "^1.6.0",
     "express": "^5.2.1",
     "fsbrowse": "^0.2.13",
+    "onnxruntime-node": "^1.24.1",
     "webtalk": "github:anEntrypoint/realtime-whisper-webgpu",
     "ws": "^8.14.2"
   }

package/server.js CHANGED Viewed

@@ -8,12 +8,12 @@ import { execSync } from 'child_process';
 import { createRequire } from 'module';
 import { queries } from './database.js';
 import { runClaudeWithStreaming } from './lib/claude-runner.js';
+import { transcribe, synthesize, getStatus as getSpeechStatus } from './lib/speech.js';
 const require = createRequire(import.meta.url);
 const express = require('express');
 const Busboy = require('busboy');
 const fsbrowse = require('fsbrowse');
-const { webtalk } = require('webtalk');
 const SYSTEM_PROMPT = `Always write your responses in ripple-ui enhanced HTML. Avoid overriding light/dark mode CSS variables. Use all the benefits of HTML to express technical details with proper semantic markup, tables, code blocks, headings, and lists. Write clean, well-structured HTML that respects the existing design system.`;
@@ -37,28 +37,6 @@ if (!fs.existsSync(staticDir)) fs.mkdirSync(staticDir, { recursive: true });
 // Express sub-app for fsbrowse file browser and file upload
 const expressApp = express();
-// Separate Express app for webtalk (STT/TTS) - isolated to contain COEP/COOP headers
-const webtalkApp = express();
-const webtalkInstance = webtalk(webtalkApp, { path: '/webtalk' });
-const webtalkSdkDir = path.dirname(require.resolve('webtalk'));
-const WASM_MIN_BYTES = 1000000;
-const webtalkCriticalFiles = [
-  { path: path.join(webtalkSdkDir, 'assets', 'ort-wasm-simd-threaded.jsep.wasm'), minBytes: WASM_MIN_BYTES }
-];
-for (const file of webtalkCriticalFiles) {
-  try {
-    if (fs.existsSync(file.path)) {
-      const stat = fs.statSync(file.path);
-      if (stat.size < file.minBytes) {
-        debugLog(`Removing corrupt file ${path.basename(file.path)} (${stat.size} bytes, need ${file.minBytes}+)`);
-        fs.unlinkSync(file.path);
-      }
-    }
-  } catch (e) { debugLog(`File check error: ${e.message}`); }
-}
-webtalkInstance.init().catch(err => debugLog('Webtalk init: ' + err.message));
 // File upload endpoint - copies dropped files to conversation workingDirectory
 expressApp.post(BASE_URL + '/api/upload/:conversationId', (req, res) => {
@@ -165,63 +143,9 @@ const server = http.createServer(async (req, res) => {
   res.setHeader('Access-Control-Allow-Origin', '*');
   res.setHeader('Access-Control-Allow-Methods', 'GET, POST, PUT, DELETE, OPTIONS');
   res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
-  res.setHeader('Cross-Origin-Embedder-Policy', 'credentialless');
-  res.setHeader('Cross-Origin-Opener-Policy', 'same-origin');
-  res.setHeader('Cross-Origin-Resource-Policy', 'cross-origin');
   if (req.method === 'OPTIONS') { res.writeHead(200); res.end(); return; }
   const pathOnly = req.url.split('?')[0];
-  const webtalkPrefix = BASE_URL + '/webtalk';
-  const isWebtalkRoute = pathOnly.startsWith(webtalkPrefix) ||
-    pathOnly.startsWith(BASE_URL + '/api/tts-status') ||
-    pathOnly.startsWith(BASE_URL + '/assets/') ||
-    pathOnly.startsWith(BASE_URL + '/tts/') ||
-    pathOnly.startsWith(BASE_URL + '/models/') ||
-    pathOnly.startsWith('/webtalk') ||
-    pathOnly.startsWith('/assets/') ||
-    pathOnly.startsWith('/tts/') ||
-    pathOnly.startsWith('/models/');
-  if (isWebtalkRoute) {
-    const webtalkSdkDir = path.dirname(require.resolve('webtalk'));
-    const sdkFiles = { '/demo': 'app.html', '/sdk.js': 'sdk.js', '/stt.js': 'stt.js', '/tts.js': 'tts.js', '/tts-utils.js': 'tts-utils.js' };
-    let stripped = pathOnly.startsWith(webtalkPrefix) ? pathOnly.slice(webtalkPrefix.length) : (pathOnly.startsWith('/webtalk') ? pathOnly.slice('/webtalk'.length) : null);
-    if (stripped !== null && !sdkFiles[stripped] && !stripped.endsWith('.js') && sdkFiles[stripped + '.js']) stripped += '.js';
-    if (stripped !== null && sdkFiles[stripped]) {
-      const filePath = path.join(webtalkSdkDir, sdkFiles[stripped]);
-      return fs.readFile(filePath, 'utf-8', (err, content) => {
-        if (err) { res.writeHead(404); res.end('Not found'); return; }
-        if (stripped === '/demo') {
-          let patched = content
-            .replace(/from\s+['"](\/webtalk\/[^'"]+)['"]/g, (_, p) => `from '${BASE_URL}${p}'`)
-            .replace(/from\s+['"]\.\/([^'"]+)['"]/g, (_, p) => `from '${BASE_URL}/webtalk/${p}'`)
-            .replace('<head>', `<head>\n    <script>window.__WEBTALK_BASE='${BASE_URL}';</script>`);
-          res.writeHead(200, { 'Content-Type': 'text/html; charset=utf-8', 'Cross-Origin-Embedder-Policy': 'credentialless', 'Cross-Origin-Opener-Policy': 'same-origin', 'Cross-Origin-Resource-Policy': 'cross-origin' });
-          return res.end(patched);
-        }
-        let js = content;
-        const ensureExt = (mod) => mod.endsWith('.js') ? mod : mod + '.js';
-        if (js.includes('require(') || js.includes('module.exports')) {
-          js = js.replace(/const\s*\{([^}]+)\}\s*=\s*require\(['"]\.\/([^'"]+)['"]\);?/g, (_, names, mod) => `import {${names}} from '${BASE_URL}/webtalk/${ensureExt(mod)}';`);
-          js = js.replace(/const\s+(\w+)\s*=\s*require\(['"]\.\/([^'"]+)['"]\);?/g, (_, name, mod) => `import ${name} from '${BASE_URL}/webtalk/${ensureExt(mod)}';`);
-          js = js.replace(/module\.exports\s*=\s*\{([^}]+)\};?/, (_, names) => `export {${names.trim().replace(/\s+/g, ' ')} };`);
-        }
-        js = js.replace(/from\s+['"]\.\/([^'"]+)['"]/g, (_, p) => `from '${BASE_URL}/webtalk/${ensureExt(p)}'`);
-        res.writeHead(200, { 'Content-Type': 'application/javascript; charset=utf-8', 'Cross-Origin-Resource-Policy': 'cross-origin' });
-        res.end(js);
-      });
-    }
-    if (req.url.startsWith(BASE_URL)) req.url = req.url.slice(BASE_URL.length) || '/';
-    const isModelOrAsset = pathOnly.includes('/models/') || pathOnly.includes('/assets/') || pathOnly.endsWith('.wasm') || pathOnly.endsWith('.onnx');
-    if (isModelOrAsset) {
-      res.setHeader('Cache-Control', 'public, max-age=604800, immutable');
-    }
-    const origSetHeader = res.setHeader.bind(res);
-    res.setHeader = (name, value) => {
-      if (name.toLowerCase() === 'cross-origin-embedder-policy') return;
-      origSetHeader(name, value);
-    };
-    return webtalkApp(req, res);
-  }
   // Route file upload and fsbrowse requests through Express sub-app
   if (pathOnly.startsWith(BASE_URL + '/api/upload/') || pathOnly.startsWith(BASE_URL + '/files/')) {
@@ -516,6 +440,53 @@ const server = http.createServer(async (req, res) => {
       return;
     }
+    if (routePath === '/api/stt' && req.method === 'POST') {
+      try {
+        const chunks = [];
+        for await (const chunk of req) chunks.push(chunk);
+        const audioBuffer = Buffer.concat(chunks);
+        if (audioBuffer.length === 0) {
+          res.writeHead(400, { 'Content-Type': 'application/json' });
+          res.end(JSON.stringify({ error: 'No audio data' }));
+          return;
+        }
+        const text = await transcribe(audioBuffer);
+        res.writeHead(200, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ text: text.trim() }));
+      } catch (err) {
+        debugLog('[STT] Error: ' + err.message);
+        res.writeHead(500, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ error: err.message }));
+      }
+      return;
+    }
+    if (routePath === '/api/tts' && req.method === 'POST') {
+      try {
+        const body = await parseBody(req);
+        const text = body.text || '';
+        if (!text) {
+          res.writeHead(400, { 'Content-Type': 'application/json' });
+          res.end(JSON.stringify({ error: 'No text provided' }));
+          return;
+        }
+        const wavBuffer = await synthesize(text);
+        res.writeHead(200, { 'Content-Type': 'audio/wav', 'Content-Length': wavBuffer.length });
+        res.end(wavBuffer);
+      } catch (err) {
+        debugLog('[TTS] Error: ' + err.message);
+        res.writeHead(500, { 'Content-Type': 'application/json' });
+        res.end(JSON.stringify({ error: err.message }));
+      }
+      return;
+    }
+    if (routePath === '/api/speech-status' && req.method === 'GET') {
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify(getSpeechStatus()));
+      return;
+    }
     if (routePath === '/api/folders' && req.method === 'POST') {
       const body = await parseBody(req);
       const folderPath = body.path || STARTUP_CWD;
@@ -591,7 +562,7 @@ const server = http.createServer(async (req, res) => {
   }
 });
-const MIME_TYPES = { '.html': 'text/html; charset=utf-8', '.js': 'application/javascript; charset=utf-8', '.css': 'text/css; charset=utf-8', '.json': 'application/json', '.png': 'image/png', '.jpg': 'image/jpeg', '.svg': 'image/svg+xml', '.wasm': 'application/wasm', '.onnx': 'application/octet-stream' };
+const MIME_TYPES = { '.html': 'text/html; charset=utf-8', '.js': 'application/javascript; charset=utf-8', '.css': 'text/css; charset=utf-8', '.json': 'application/json', '.png': 'image/png', '.jpg': 'image/jpeg', '.svg': 'image/svg+xml' };
 function serveFile(filePath, res) {
   const ext = path.extname(filePath).toLowerCase();
@@ -613,7 +584,7 @@ function serveFile(filePath, res) {
   fs.readFile(filePath, (err, data) => {
     if (err) { res.writeHead(500); res.end('Server error'); return; }
     let content = data.toString();
-    const baseTag = `<script>window.__BASE_URL='${BASE_URL}';</script>\n  <script type="importmap">{"imports":{"webtalk-sdk":"${BASE_URL}/webtalk/sdk.js"}}</script>`;
+    const baseTag = `<script>window.__BASE_URL='${BASE_URL}';</script>`;
     content = content.replace('<head>', '<head>\n  ' + baseTag);
     if (watch) {
       content += `\n<script>(function(){const ws=new WebSocket('ws://'+location.host+'${BASE_URL}/hot-reload');ws.onmessage=e=>{if(JSON.parse(e.data).type==='reload')location.reload()};})();</script>`;
@@ -640,7 +611,7 @@ function persistChunkWithRetry(sessionId, conversationId, sequence, blockType, b
 async function processMessageWithStreaming(conversationId, messageId, sessionId, content, agentId) {
   const startTime = Date.now();
-  activeExecutions.set(conversationId, true);
+  activeExecutions.set(conversationId, { pid: null, startTime, sessionId });
   queries.setIsStreaming(conversationId, true);
   queries.updateSession(sessionId, { status: 'active' });
@@ -756,7 +727,11 @@ async function processMessageWithStreaming(conversationId, messageId, sessionId,
       print: true,
       resumeSessionId,
       systemPrompt: SYSTEM_PROMPT,
-      onEvent
+      onEvent,
+      onPid: (pid) => {
+        const entry = activeExecutions.get(conversationId);
+        if (entry) entry.pid = pid;
+      }
     };
     const { outputs, sessionId: claudeSessionId } = await runClaudeWithStreaming(content, cwd, agentId || 'claude-code', config);
@@ -1030,6 +1005,66 @@ server.on('error', (err) => {
   }
 });
+function recoverStaleSessions() {
+  try {
+    const staleSessions = queries.getActiveSessions ? queries.getActiveSessions() : [];
+    let recoveredCount = 0;
+    for (const session of staleSessions) {
+      if (!activeExecutions.has(session.conversationId)) {
+        queries.updateSession(session.id, {
+          status: 'error',
+          error: 'Agent died unexpectedly (server restart)',
+          completed_at: Date.now()
+        });
+        queries.setIsStreaming(session.conversationId, false);
+        broadcastSync({
+          type: 'streaming_error',
+          sessionId: session.id,
+          conversationId: session.conversationId,
+          error: 'Agent died unexpectedly (server restart)',
+          recoverable: false,
+          timestamp: Date.now()
+        });
+        recoveredCount++;
+      }
+    }
+    if (recoveredCount > 0) {
+      console.log(`[RECOVERY] Recovered ${recoveredCount} stale active session(s)`);
+    }
+  } catch (err) {
+    console.error('[RECOVERY] Stale session recovery error:', err.message);
+  }
+}
+function performAgentHealthCheck() {
+  for (const [conversationId, entry] of activeExecutions) {
+    if (!entry || !entry.pid) continue;
+    try {
+      process.kill(entry.pid, 0);
+    } catch (err) {
+      debugLog(`[HEALTH] Agent PID ${entry.pid} for conv ${conversationId} is dead`);
+      activeExecutions.delete(conversationId);
+      queries.setIsStreaming(conversationId, false);
+      if (entry.sessionId) {
+        queries.updateSession(entry.sessionId, {
+          status: 'error',
+          error: 'Agent process died unexpectedly',
+          completed_at: Date.now()
+        });
+      }
+      broadcastSync({
+        type: 'streaming_error',
+        sessionId: entry.sessionId,
+        conversationId,
+        error: 'Agent process died unexpectedly',
+        recoverable: false,
+        timestamp: Date.now()
+      });
+      drainMessageQueue(conversationId);
+    }
+  }
+}
 function onServerReady() {
   console.log(`GMGUI running on http://localhost:${PORT}${BASE_URL}/`);
   console.log(`Agents: ${discoveredAgents.map(a => a.name).join(', ') || 'none'}`);
@@ -1041,12 +1076,18 @@ function onServerReady() {
     console.log(`Cleaned up ${deletedCount} empty conversation(s) on startup`);
   }
+  // Recover stale active sessions from previous run
+  recoverStaleSessions();
   // Run auto-import immediately
   performAutoImport();
   // Then run it every 30 seconds (constant automatic importing)
   setInterval(performAutoImport, 30000);
+  // Agent health check every 30 seconds
+  setInterval(performAgentHealthCheck, 30000);
 }
 function performAutoImport() {

package/static/js/client.js CHANGED Viewed

@@ -792,7 +792,13 @@ class AgentGUIClient {
               const inputStr = JSON.stringify(block.input, null, 2);
               inputHtml = `<details class="tool-input-details"><summary class="tool-input-summary">Input</summary><pre class="tool-input-pre">${this.escapeHtml(inputStr)}</pre></details>`;
             }
-            html += `<div class="streaming-block-tool-use"><div class="tool-use-header"><span class="tool-use-icon">&#9881;</span> <span class="tool-use-name">${this.escapeHtml(block.name || 'unknown')}</span></div>${inputHtml}</div>`;
+            const tn = block.name || 'unknown';
+            const foldable = tn.startsWith('mcp__') || tn === 'Edit';
+            if (foldable) {
+              html += `<details class="streaming-block-tool-use"><summary class="tool-use-header" style="cursor:pointer;user-select:none;list-style:none;"><span class="tool-use-icon">&#9881;</span> <span class="tool-use-name">${this.escapeHtml(tn)}</span></summary>${inputHtml}</details>`;
+            } else {
+              html += `<div class="streaming-block-tool-use"><div class="tool-use-header"><span class="tool-use-icon">&#9881;</span> <span class="tool-use-name">${this.escapeHtml(tn)}</span></div>${inputHtml}</div>`;
+            }
           } else if (block.type === 'tool_result') {
             const content = typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
             const smartHtml = typeof StreamingRenderer !== 'undefined' ? StreamingRenderer.renderSmartContentHTML(content, this.escapeHtml.bind(this)) : `<pre class="tool-result-pre">${this.escapeHtml(content.length > 2000 ? content.substring(0, 2000) + '\n... (truncated)' : content)}</pre>`;
@@ -1433,7 +1439,13 @@ class AgentGUIClient {
                 const inputStr = JSON.stringify(block.input, null, 2);
                 inputHtml = `<details class="tool-input-details"><summary class="tool-input-summary">Input</summary><pre class="tool-input-pre">${this.escapeHtml(inputStr)}</pre></details>`;
               }
-              contentHtml += `<div class="streaming-block-tool-use"><div class="tool-use-header"><span class="tool-use-icon">&#9881;</span> <span class="tool-use-name">${this.escapeHtml(block.name || 'unknown')}</span></div>${inputHtml}</div>`;
+              const tn2 = block.name || 'unknown';
+              const foldable2 = tn2.startsWith('mcp__') || tn2 === 'Edit';
+              if (foldable2) {
+                contentHtml += `<details class="streaming-block-tool-use"><summary class="tool-use-header" style="cursor:pointer;user-select:none;list-style:none;"><span class="tool-use-icon">&#9881;</span> <span class="tool-use-name">${this.escapeHtml(tn2)}</span></summary>${inputHtml}</details>`;
+              } else {
+                contentHtml += `<div class="streaming-block-tool-use"><div class="tool-use-header"><span class="tool-use-icon">&#9881;</span> <span class="tool-use-name">${this.escapeHtml(tn2)}</span></div>${inputHtml}</div>`;
+              }
             } else if (block.type === 'tool_result') {
               const content = typeof block.content === 'string' ? block.content : JSON.stringify(block.content);
               const smartHtml = typeof StreamingRenderer !== 'undefined' ? StreamingRenderer.renderSmartContentHTML(content, this.escapeHtml.bind(this)) : `<pre class="tool-result-pre">${this.escapeHtml(content.length > 2000 ? content.substring(0, 2000) + '\n... (truncated)' : content)}</pre>`;

package/static/js/streaming-renderer.js CHANGED Viewed

@@ -624,12 +624,31 @@ class StreamingRenderer {
    * Render tool use block with smart parameter display
    */
   renderBlockToolUse(block, context) {
-    const div = document.createElement('div');
-    div.className = 'block-tool-use';
     const toolName = block.name || 'unknown';
     const input = block.input || {};
+    const shouldFold = toolName.startsWith('mcp__') || toolName === 'Edit';
+    if (shouldFold) {
+      const details = document.createElement('details');
+      details.className = 'block-tool-use';
+      const summary = document.createElement('summary');
+      summary.className = 'tool-header';
+      summary.style.cssText = 'cursor:pointer;user-select:none;list-style:none;';
+      summary.innerHTML = `
+        <span class="tool-icon">${this.getToolIcon(toolName)}</span>
+        <span class="tool-name"><code>${this.escapeHtml(toolName)}</code></span>
+      `;
+      details.appendChild(summary);
+      if (Object.keys(input).length > 0) {
+        const paramsDiv = document.createElement('div');
+        paramsDiv.innerHTML = this.renderSmartParams(toolName, input);
+        details.appendChild(paramsDiv);
+      }
+      return details;
+    }
+    const div = document.createElement('div');
+    div.className = 'block-tool-use';
     div.innerHTML = `
       <div class="tool-header">
         <span class="tool-icon">${this.getToolIcon(toolName)}</span>
@@ -637,7 +656,6 @@ class StreamingRenderer {
       </div>
       ${Object.keys(input).length > 0 ? this.renderSmartParams(toolName, input) : ''}
     `;
     return div;
   }

package/static/js/voice.js CHANGED Viewed

@@ -1,167 +1,24 @@
 (function() {
-  const BASE = window.__BASE_URL || '';
-  let STT = null;
-  let TTS = null;
+  var BASE = window.__BASE_URL || '';
+  var isRecording = false;
+  var ttsEnabled = true;
+  var voiceActive = false;
+  var lastSpokenBlockIndex = -1;
+  var currentConversationId = null;
+  var speechQueue = [];
+  var isSpeaking = false;
+  var currentAudio = null;
+  var mediaStream = null;
+  var audioContext = null;
+  var scriptNode = null;
+  var recordedChunks = [];
+  var TARGET_SAMPLE_RATE = 16000;
-  async function loadSDK() {
-    try {
-      const mod = await import(BASE + '/webtalk/sdk.js');
-      STT = mod.STT;
-      TTS = mod.TTS;
-      return true;
-    } catch (e) {
-      console.warn('Webtalk SDK load failed:', e.message);
-      return false;
-    }
-  }
-  let stt = null;
-  let tts = null;
-  let isRecording = false;
-  let ttsEnabled = true;
-  let voiceActive = false;
-  let lastSpokenBlockIndex = -1;
-  let currentConversationId = null;
-  let sttReady = false;
-  let ttsReady = false;
-  let speechQueue = [];
-  let isSpeaking = false;
-  async function init() {
+  function init() {
     setupTTSToggle();
     setupUI();
     setupStreamingListener();
     setupAgentSelector();
-    var sdkLoaded = await loadSDK();
-    if (sdkLoaded) {
-      initSTT();
-      initTTS();
-    } else {
-      sttLoadPhase = 'failed';
-      updateMicState();
-    }
-  }
-  var sttLoadPhase = 'starting';
-  async function initSTT() {
-    try {
-      stt = new STT({
-        basePath: BASE + '/webtalk',
-        onTranscript: function(text) {
-          var el = document.getElementById('voiceTranscript');
-          if (el) {
-            el.textContent = text;
-            el.setAttribute('data-final', text);
-          }
-        },
-        onPartial: function(text) {
-          var el = document.getElementById('voiceTranscript');
-          if (el) {
-            var existing = el.getAttribute('data-final') || '';
-            el.textContent = existing + text;
-          }
-        },
-        onStatus: function(status) {
-          var micBtn = document.getElementById('voiceMicBtn');
-          if (!micBtn) return;
-          if (status === 'recording') {
-            micBtn.classList.add('recording');
-          } else {
-            micBtn.classList.remove('recording');
-          }
-        }
-      });
-      var origInit = stt.init.bind(stt);
-      var initPromise = new Promise(function(resolve, reject) {
-        origInit().then(resolve).catch(reject);
-        if (stt.worker) {
-          var origHandler = stt.worker.onmessage;
-          stt.worker.onmessage = function(e) {
-            var msg = e.data;
-            if (msg && msg.status) {
-              if (msg.status === 'progress' || msg.status === 'download') {
-                if (sttLoadPhase !== 'downloading') {
-                  sttLoadPhase = 'downloading';
-                  updateMicState();
-                }
-              } else if (msg.status === 'done' && msg.file && msg.file.endsWith('.onnx')) {
-                sttLoadPhase = 'compiling';
-                updateMicState();
-              }
-            }
-            if (origHandler) origHandler.call(stt.worker, e);
-          };
-        }
-      });
-      await initPromise;
-      sttReady = true;
-      updateMicState();
-    } catch (e) {
-      console.warn('STT init failed:', e.message);
-      sttLoadPhase = 'failed';
-      updateMicState();
-    }
-  }
-  function updateMicState() {
-    var micBtn = document.getElementById('voiceMicBtn');
-    if (!micBtn) return;
-    if (sttReady) {
-      micBtn.removeAttribute('disabled');
-      micBtn.title = 'Click to record';
-      micBtn.classList.remove('loading');
-    } else if (sttLoadPhase === 'failed') {
-      micBtn.setAttribute('disabled', 'true');
-      micBtn.title = 'Speech recognition failed to load';
-      micBtn.classList.remove('loading');
-    } else {
-      micBtn.setAttribute('disabled', 'true');
-      micBtn.classList.add('loading');
-      if (sttLoadPhase === 'downloading') {
-        micBtn.title = 'Downloading speech models...';
-      } else if (sttLoadPhase === 'compiling') {
-        micBtn.title = 'Compiling speech models (may take a minute)...';
-      } else {
-        micBtn.title = 'Loading speech recognition...';
-      }
-    }
-  }
-  async function initTTS(retries) {
-    var maxRetries = retries || 3;
-    for (var attempt = 0; attempt < maxRetries; attempt++) {
-      try {
-        tts = new TTS({
-          basePath: BASE + '/webtalk',
-          apiBasePath: BASE,
-          onStatus: function() {},
-          onAudioReady: function(url) {
-            var audio = new Audio(url);
-            audio.onended = function() {
-              isSpeaking = false;
-              processQueue();
-            };
-            audio.onerror = function() {
-              isSpeaking = false;
-              processQueue();
-            };
-            audio.play().catch(function() {
-              isSpeaking = false;
-              processQueue();
-            });
-          }
-        });
-        await tts.init();
-        ttsReady = true;
-        return;
-      } catch (e) {
-        console.warn('TTS init attempt ' + (attempt + 1) + '/' + maxRetries + ' failed:', e.message);
-        tts = null;
-        if (attempt < maxRetries - 1) {
-          await new Promise(function(r) { setTimeout(r, 3000 * (attempt + 1)); });
-        }
-      }
-    }
   }
   function setupAgentSelector() {
@@ -203,6 +60,8 @@
   function setupUI() {
     var micBtn = document.getElementById('voiceMicBtn');
     if (micBtn) {
+      micBtn.removeAttribute('disabled');
+      micBtn.title = 'Click to record';
       micBtn.addEventListener('click', function(e) {
         e.preventDefault();
         if (!isRecording) {
@@ -216,43 +75,104 @@
     if (sendBtn) {
       sendBtn.addEventListener('click', sendVoiceMessage);
     }
-    updateMicState();
+  }
+  function resampleBuffer(inputBuffer, fromRate, toRate) {
+    if (fromRate === toRate) return inputBuffer;
+    var ratio = fromRate / toRate;
+    var newLen = Math.round(inputBuffer.length / ratio);
+    var result = new Float32Array(newLen);
+    for (var i = 0; i < newLen; i++) {
+      var srcIdx = i * ratio;
+      var lo = Math.floor(srcIdx);
+      var hi = Math.min(lo + 1, inputBuffer.length - 1);
+      var frac = srcIdx - lo;
+      result[i] = inputBuffer[lo] * (1 - frac) + inputBuffer[hi] * frac;
+    }
+    return result;
   }
   async function startRecording() {
     if (isRecording) return;
     var el = document.getElementById('voiceTranscript');
-    if (!stt || !sttReady) {
-      if (el) el.textContent = 'Speech recognition still loading, please wait...';
-      return;
-    }
     if (el) {
       el.textContent = '';
       el.setAttribute('data-final', '');
     }
-    isRecording = true;
     try {
-      await stt.startRecording();
+      mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      audioContext = new (window.AudioContext || window.webkitAudioContext)();
+      var source = audioContext.createMediaStreamSource(mediaStream);
+      scriptNode = audioContext.createScriptProcessor(4096, 1, 1);
+      recordedChunks = [];
+      scriptNode.onaudioprocess = function(e) {
+        var data = e.inputBuffer.getChannelData(0);
+        recordedChunks.push(new Float32Array(data));
+      };
+      source.connect(scriptNode);
+      scriptNode.connect(audioContext.destination);
+      isRecording = true;
+      var micBtn = document.getElementById('voiceMicBtn');
+      if (micBtn) micBtn.classList.add('recording');
     } catch (e) {
       isRecording = false;
       if (el) el.textContent = 'Mic access denied or unavailable: ' + e.message;
-      console.warn('Recording start failed:', e.message);
     }
   }
   async function stopRecording() {
-    if (!stt || !isRecording) return;
+    if (!isRecording) return;
     isRecording = false;
+    var micBtn = document.getElementById('voiceMicBtn');
+    if (micBtn) micBtn.classList.remove('recording');
+    var el = document.getElementById('voiceTranscript');
+    if (scriptNode) { scriptNode.disconnect(); scriptNode = null; }
+    if (mediaStream) {
+      mediaStream.getTracks().forEach(function(t) { t.stop(); });
+      mediaStream = null;
+    }
+    var sourceSampleRate = audioContext ? audioContext.sampleRate : 48000;
+    if (audioContext) { audioContext.close().catch(function() {}); audioContext = null; }
+    if (recordedChunks.length === 0) return;
+    var totalLen = 0;
+    for (var i = 0; i < recordedChunks.length; i++) totalLen += recordedChunks[i].length;
+    var merged = new Float32Array(totalLen);
+    var offset = 0;
+    for (var j = 0; j < recordedChunks.length; j++) {
+      merged.set(recordedChunks[j], offset);
+      offset += recordedChunks[j].length;
+    }
+    recordedChunks = [];
+    var resampled = resampleBuffer(merged, sourceSampleRate, TARGET_SAMPLE_RATE);
+    if (el) el.textContent = 'Transcribing...';
     try {
-      await stt.stopRecording();
-    } catch (e) {}
+      var pcmBuffer = resampled.buffer;
+      var resp = await fetch(BASE + '/api/stt', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/octet-stream' },
+        body: pcmBuffer
+      });
+      var data = await resp.json();
+      if (data.text) {
+        if (el) {
+          el.textContent = data.text;
+          el.setAttribute('data-final', data.text);
+        }
+      } else if (data.error) {
+        if (el) el.textContent = 'Error: ' + data.error;
+      } else {
+        if (el) el.textContent = '';
+      }
+    } catch (e) {
+      if (el) el.textContent = 'Transcription failed: ' + e.message;
+    }
   }
   function sendVoiceMessage() {
     var el = document.getElementById('voiceTranscript');
     if (!el) return;
     var text = el.textContent.trim();
-    if (!text) return;
+    if (!text || text.startsWith('Transcribing') || text.startsWith('Error')) return;
     addVoiceBlock(text, true);
     el.textContent = '';
     el.setAttribute('data-final', '');
@@ -266,7 +186,7 @@
   }
   function speak(text) {
-    if (!ttsEnabled || !tts || !ttsReady) return;
+    if (!ttsEnabled) return;
     var clean = text.replace(/<[^>]*>/g, '').trim();
     if (!clean) return;
     speechQueue.push(clean);
@@ -277,7 +197,35 @@
     if (isSpeaking || speechQueue.length === 0) return;
     isSpeaking = true;
     var text = speechQueue.shift();
-    tts.generate(text).catch(function() {
+    fetch(BASE + '/api/tts', {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ text: text })
+    }).then(function(resp) {
+      if (!resp.ok) throw new Error('TTS failed');
+      return resp.blob();
+    }).then(function(blob) {
+      var url = URL.createObjectURL(blob);
+      currentAudio = new Audio(url);
+      currentAudio.onended = function() {
+        URL.revokeObjectURL(url);
+        currentAudio = null;
+        isSpeaking = false;
+        processQueue();
+      };
+      currentAudio.onerror = function() {
+        URL.revokeObjectURL(url);
+        currentAudio = null;
+        isSpeaking = false;
+        processQueue();
+      };
+      currentAudio.play().catch(function() {
+        URL.revokeObjectURL(url);
+        currentAudio = null;
+        isSpeaking = false;
+        processQueue();
+      });
+    }).catch(function() {
       isSpeaking = false;
       processQueue();
     });
@@ -286,7 +234,10 @@
   function stopSpeaking() {
     speechQueue = [];
     isSpeaking = false;
-    if (tts) tts.stop();
+    if (currentAudio) {
+      currentAudio.pause();
+      currentAudio = null;
+    }
   }
   function addVoiceBlock(text, isUser) {