npm - agentgui - Versions diffs - 1.0.207 → 1.0.209 - Mend

agentgui 1.0.207 → 1.0.209

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CLAUDE.md +50 -0
package/lib/speech.js +12 -1
package/lib/webtalk-patch.js +33 -0
package/lib/windows-pocket-tts-setup.js +209 -0
package/package.json +1 -1
package/readme.md +22 -0
package/server.js +75 -2

package/CLAUDE.md CHANGED Viewed

@@ -89,3 +89,53 @@ Server broadcasts:
 - `streaming_complete` - Execution finished
 - `streaming_error` - Execution failed
 - `conversation_created`, `conversation_updated`, `conversation_deleted`
+- `tts_setup_progress` - Windows pocket-tts setup progress (step, status, message)
+## Pocket-TTS Windows Setup (Reliability for Slow/Bad Internet)
+On Windows, text-to-speech uses pocket-tts which requires Python and pip install. The setup process is now resilient to slow/unreliable connections:
+### Features
+- **Extended timeouts**: 120s for pip install (accommodates slow connections)
+- **Retry logic**: 3 attempts with exponential backoff (1s, 2s delays)
+- **Progress reporting**: Real-time updates via WebSocket to UI
+- **Partial install cleanup**: Failed venvs are removed to allow retry
+- **Installation verification**: Binary validation via `--version` check
+- **Concurrent waiting**: Multiple simultaneous requests wait for single setup (600s timeout)
+### Configuration (lib/windows-pocket-tts-setup.js)
+```javascript
+const CONFIG = {
+  PIP_TIMEOUT: 120000,           // 2 minutes
+  VENV_CREATION_TIMEOUT: 30000,  // 30 seconds
+  MAX_RETRIES: 3,                 // 3 attempts
+  RETRY_DELAY_MS: 1000,          // 1 second initial
+  RETRY_BACKOFF_MULTIPLIER: 2,   // 2x exponential
+};
+```
+### Network Requirements
+- **Minimum**: 50 kbps sustained, < 5s latency, < 10% packet loss
+- **Recommended**: 256+ kbps, < 2s latency, < 1% packet loss
+- **Expected time on slow connection**: 2-6 minutes with retries
+### Progress Messages
+During TTS setup on first use, WebSocket broadcasts:
+```json
+{
+  "type": "tts_setup_progress",
+  "step": "detecting-python|creating-venv|installing|verifying",
+  "status": "in-progress|success|error",
+  "message": "descriptive status message with retry count if applicable"
+}
+```
+### Recovery Behavior
+1. Network timeout → auto-retry with backoff
+2. Partial venv → auto-cleanup before retry
+3. Failed verification → auto-cleanup and error
+4. Concurrent requests → first starts setup, others wait up to 600s
+5. Interrupted setup → cleanup allows fresh retry
+### Testing
+Setup validates by running pocket-tts binary with `--version` flag to confirm functional installation, not just file existence.

package/lib/speech.js CHANGED Viewed

@@ -1,8 +1,10 @@
 import { createRequire } from 'module';
 import fs from 'fs';
 import path from 'path';
+import os from 'os';
 import http from 'http';
 import { fileURLToPath } from 'url';
+import { patchWebtalkForWindows } from './webtalk-patch.js';
 const require = createRequire(import.meta.url);
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
@@ -11,6 +13,8 @@ const ROOT = path.dirname(__dirname);
 const serverSTT = require('webtalk/server-stt');
 const serverTTS = require('webtalk/server-tts');
+patchWebtalkForWindows(serverTTS);
 const EXTRA_VOICE_DIRS = [path.join(ROOT, 'voices')];
 const POCKET_TTS_VOICES = [
@@ -120,7 +124,14 @@ function getStatus() {
 function preloadTTS() {
   const defaultVoice = serverTTS.findVoiceFile('custom_cleetus', EXTRA_VOICE_DIRS) || '/config/voices/cleetus.wav';
   const voicePath = fs.existsSync(defaultVoice) ? defaultVoice : null;
-  serverTTS.start(voicePath).then(ok => {
+  const options = {
+    binaryPaths: [
+      path.join(os.homedir(), '.gmgui', 'pocket-venv', 'Scripts', 'pocket-tts.exe'),
+      path.join(os.homedir(), '.gmgui', 'pocket-venv', 'bin', 'pocket-tts.exe'),
+      path.join(os.homedir(), '.gmgui', 'pocket-venv', 'bin', 'pocket-tts'),
+    ]
+  };
+  serverTTS.start(voicePath, options).then(ok => {
     if (ok) console.log('[TTS] pocket-tts sidecar started');
     else console.log('[TTS] pocket-tts failed to start');
   }).catch(err => {

package/lib/webtalk-patch.js ADDED Viewed

@@ -0,0 +1,33 @@
+import fs from 'fs';
+import path from 'path';
+import os from 'os';
+export function patchWebtalkForWindows(serverTTS) {
+  if (process.platform !== 'win32') return;
+  const venvDir = path.join(os.homedir(), '.gmgui', 'pocket-venv');
+  // Check if pocket-tts exists at Windows paths
+  const windowsBinaries = [
+    path.join(venvDir, 'Scripts', 'pocket-tts.exe'),
+    path.join(venvDir, 'bin', 'pocket-tts.exe'),
+    path.join(venvDir, 'bin', 'pocket-tts'),
+  ];
+  const found = windowsBinaries.find(p => fs.existsSync(p));
+  if (found) {
+    // Patch the start function to use the correct binary
+    const originalStart = serverTTS.start;
+    serverTTS.start = function(voicePath, options) {
+      if (!options) options = {};
+      if (!options.binaryPaths) options.binaryPaths = [];
+      // Ensure Windows paths are first
+      options.binaryPaths = [...windowsBinaries, ...options.binaryPaths];
+      return originalStart.call(this, voicePath, options);
+    };
+  }
+}

package/lib/windows-pocket-tts-setup.js ADDED Viewed

@@ -0,0 +1,209 @@
+import { execSync, spawnSync } from 'child_process';
+import fs from 'fs';
+import path from 'path';
+import os from 'os';
+const PYTHON_VERSION_MIN = [3, 9];
+const VENV_DIR = path.join(os.homedir(), '.gmgui', 'pocket-venv');
+const isWin = process.platform === 'win32';
+const EXECUTABLE_NAME = isWin ? 'pocket-tts.exe' : 'pocket-tts';
+const CONFIG = {
+  PIP_TIMEOUT: 120000,
+  VENV_CREATION_TIMEOUT: 30000,
+  MAX_RETRIES: 3,
+  RETRY_DELAY_MS: 1000,
+  RETRY_BACKOFF_MULTIPLIER: 2,
+};
+function getPocketTtsPath() {
+  if (isWin) {
+    return path.join(VENV_DIR, 'Scripts', EXECUTABLE_NAME);
+  }
+  return path.join(VENV_DIR, 'bin', EXECUTABLE_NAME);
+}
+function detectPython() {
+  try {
+    const versionOutput = execSync('python --version', { encoding: 'utf-8', timeout: 10000 }).trim();
+    const match = versionOutput.match(/(\d+)\.(\d+)/);
+    if (!match) return { found: false, version: null, error: 'Could not parse version' };
+    const major = parseInt(match[1], 10);
+    const minor = parseInt(match[2], 10);
+    const versionOk = major > PYTHON_VERSION_MIN[0] || (major === PYTHON_VERSION_MIN[0] && minor >= PYTHON_VERSION_MIN[1]);
+    if (!versionOk) {
+      return { found: true, version: `${major}.${minor}`, error: `Python ${major}.${minor} found but ${PYTHON_VERSION_MIN[0]}.${PYTHON_VERSION_MIN[1]}+ required` };
+    }
+    return { found: true, version: `${major}.${minor}`, error: null };
+  } catch (e) {
+    return { found: false, version: null, error: 'Python not found in PATH' };
+  }
+}
+function isSetup() {
+  const exePath = getPocketTtsPath();
+  return fs.existsSync(exePath);
+}
+function cleanupPartialInstall() {
+  try {
+    if (fs.existsSync(VENV_DIR)) {
+      fs.rmSync(VENV_DIR, { recursive: true, force: true });
+      return true;
+    }
+  } catch (e) {
+    console.error(`Failed to cleanup partial install: ${e.message}`);
+  }
+  return false;
+}
+function verifyInstallation() {
+  const exePath = getPocketTtsPath();
+  if (!fs.existsSync(exePath)) {
+    return { valid: false, error: `Binary not found at ${exePath}` };
+  }
+  try {
+    const versionOutput = execSync(`"${exePath}" --version`, { encoding: 'utf-8', timeout: 10000, stdio: 'pipe' });
+    return { valid: true, version: versionOutput.trim() };
+  } catch (e) {
+    return { valid: false, error: `Binary exists but failed verification: ${e.message}` };
+  }
+}
+async function executeWithRetry(fn, stepName, maxRetries = CONFIG.MAX_RETRIES) {
+  let lastError = null;
+  let delayMs = CONFIG.RETRY_DELAY_MS;
+  for (let attempt = 1; attempt <= maxRetries; attempt++) {
+    try {
+      return await fn(attempt);
+    } catch (e) {
+      lastError = e;
+      if (attempt < maxRetries) {
+        console.log(`Attempt ${attempt}/${maxRetries} failed for ${stepName}, retrying in ${delayMs}ms`);
+        await new Promise(r => setTimeout(r, delayMs));
+        delayMs *= CONFIG.RETRY_BACKOFF_MULTIPLIER;
+      }
+    }
+  }
+  const msg = `${stepName} failed after ${maxRetries} attempts: ${lastError.message || lastError}`;
+  throw new Error(msg);
+}
+async function install(onProgress) {
+  const pythonDetect = detectPython();
+  if (!pythonDetect.found) {
+    const msg = pythonDetect.error || 'Python not found';
+    if (onProgress) onProgress({ step: 'detecting-python', status: 'error', message: msg });
+    return { success: false, error: msg };
+  }
+  if (pythonDetect.error) {
+    if (onProgress) onProgress({ step: 'detecting-python', status: 'error', message: pythonDetect.error });
+    return { success: false, error: pythonDetect.error };
+  }
+  if (onProgress) onProgress({ step: 'detecting-python', status: 'success', message: `Found Python ${pythonDetect.version}` });
+  if (isSetup()) {
+    const verify = verifyInstallation();
+    if (verify.valid) {
+      if (onProgress) onProgress({ step: 'verifying', status: 'success', message: 'pocket-tts already installed' });
+      return { success: true };
+    }
+  }
+  if (onProgress) onProgress({ step: 'creating-venv', status: 'in-progress', message: `Creating virtual environment at ${VENV_DIR}` });
+  try {
+    await executeWithRetry(async (attempt) => {
+      return execSync(`python -m venv "${VENV_DIR}"`, {
+        encoding: 'utf-8',
+        stdio: 'pipe',
+        timeout: CONFIG.VENV_CREATION_TIMEOUT,
+      });
+    }, 'venv creation', 2);
+    if (onProgress) onProgress({ step: 'creating-venv', status: 'success', message: 'Virtual environment created' });
+  } catch (e) {
+    const msg = `Failed to create venv: ${e.message || e}`;
+    if (onProgress) onProgress({ step: 'creating-venv', status: 'error', message: msg });
+    cleanupPartialInstall();
+    return { success: false, error: msg };
+  }
+  if (onProgress) onProgress({ step: 'installing', status: 'in-progress', message: 'Installing pocket-tts via pip (this may take 2-5 minutes on slow connections)' });
+  try {
+    await executeWithRetry(async (attempt) => {
+      if (attempt > 1 && onProgress) {
+        onProgress({ step: 'installing', status: 'in-progress', message: `Installing pocket-tts (attempt ${attempt}/${CONFIG.MAX_RETRIES})` });
+      }
+      const pipCmd = isWin
+        ? `"${path.join(VENV_DIR, 'Scripts', 'pip')}" install --no-cache-dir pocket-tts`
+        : `"${path.join(VENV_DIR, 'bin', 'pip')}" install --no-cache-dir pocket-tts`;
+      return execSync(pipCmd, {
+        encoding: 'utf-8',
+        stdio: 'pipe',
+        timeout: CONFIG.PIP_TIMEOUT,
+        env: { ...process.env, PIP_DEFAULT_TIMEOUT: '120' },
+      });
+    }, 'pip install', CONFIG.MAX_RETRIES);
+    if (onProgress) onProgress({ step: 'installing', status: 'success', message: 'pocket-tts installed successfully' });
+  } catch (e) {
+    const msg = `Failed to install pocket-tts: ${e.message || e}`;
+    if (onProgress) onProgress({ step: 'installing', status: 'error', message: msg });
+    cleanupPartialInstall();
+    return { success: false, error: msg };
+  }
+  if (onProgress) onProgress({ step: 'verifying', status: 'in-progress', message: 'Verifying installation' });
+  const verify = verifyInstallation();
+  if (!verify.valid) {
+    const msg = verify.error || 'Installation verification failed';
+    if (onProgress) onProgress({ step: 'verifying', status: 'error', message: msg });
+    cleanupPartialInstall();
+    return { success: false, error: msg };
+  }
+  const exePath = getPocketTtsPath();
+  const binDir = path.join(VENV_DIR, 'bin');
+  const binExePath = path.join(binDir, 'pocket-tts');
+  if (isWin) {
+    try {
+      fs.mkdirSync(binDir, { recursive: true });
+    } catch (e) {}
+    const exeWithExt = path.join(binDir, 'pocket-tts.exe');
+    if (fs.existsSync(exePath) && !fs.existsSync(exeWithExt)) {
+      try {
+        fs.copyFileSync(exePath, exeWithExt);
+      } catch (e) {}
+    }
+    const batchFile = path.join(binDir, 'pocket-tts.bat');
+    if (!fs.existsSync(batchFile) && fs.existsSync(exeWithExt)) {
+      try {
+        const batchContent = `@echo off\nsetlocal enabledelayedexpansion\nset PYTHONUNBUFFERED=1\nset HF_HUB_DISABLE_SYMLINKS_WARNING=1\n"${exeWithExt}" %*\n`;
+        fs.writeFileSync(batchFile, batchContent, 'utf-8');
+      } catch (e) {}
+    }
+  }
+  if (onProgress) onProgress({ step: 'verifying', status: 'success', message: `pocket-tts ready (${verify.version})` });
+  return { success: true };
+}
+export { detectPython, isSetup, install, getPocketTtsPath, VENV_DIR, CONFIG, cleanupPartialInstall, verifyInstallation };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentgui",
-  "version": "1.0.207",
+  "version": "1.0.209",
   "description": "Multi-agent ACP client with real-time communication",
   "type": "module",
   "main": "server.js",

package/readme.md CHANGED Viewed

@@ -37,6 +37,28 @@ Open `http://localhost:3000` in your browser.
 - `static/` - Browser client with streaming renderer, WebSocket manager, and HTML templates
 - `bin/gmgui.cjs` - CLI entry point for `npx agentgui`
+## Text-to-Speech on Windows
+On Windows, AgentGUI automatically sets up pocket-tts (text-to-speech) on your first TTS request. No manual setup required.
+### What Happens
+1. Server detects Python 3.9+ installation
+2. Creates virtual environment at `~/.gmgui/pocket-venv`
+3. Installs pocket-tts via pip
+4. All subsequent TTS requests use cached installation
+### Requirements
+- Python 3.9+ (check with `python --version`)
+- ~200 MB free disk space
+- Internet connection for first setup
+### Troubleshooting
+- **Python not found**: Download from https://www.python.org and ensure "Add Python to PATH" is checked
+- **Setup fails**: Check that you have write access to your home directory (~/.gmgui/)
+- **Manual cleanup**: Delete `%USERPROFILE%\.gmgui\pocket-venv` and try again
+For manual setup or detailed troubleshooting, see the setup instructions in the code or check `/api/speech-status` endpoint for error details.
 ## Configuration
 | Variable | Default | Description |

package/server.js CHANGED Viewed

@@ -11,12 +11,46 @@ import { createRequire } from 'module';
 import { OAuth2Client } from 'google-auth-library';
 import { queries } from './database.js';
 import { runClaudeWithStreaming } from './lib/claude-runner.js';
+import { isSetup, install, detectPython } from './lib/windows-pocket-tts-setup.js';
 let speechModule = null;
 async function getSpeech() {
   if (!speechModule) speechModule = await import('./lib/speech.js');
   return speechModule;
 }
+const pocketTtsSetupState = { attempted: false, ready: false, error: null, inProgress: false };
+async function ensurePocketTtsSetup(onProgress) {
+  if (pocketTtsSetupState.attempted) {
+    return pocketTtsSetupState.ready;
+  }
+  if (pocketTtsSetupState.inProgress) {
+    let waited = 0;
+    const MAX_WAIT = 600000;
+    while (pocketTtsSetupState.inProgress && waited < MAX_WAIT) {
+      await new Promise(r => setTimeout(r, 100));
+      waited += 100;
+    }
+    return pocketTtsSetupState.ready;
+  }
+  pocketTtsSetupState.inProgress = true;
+  if (onProgress) onProgress({ step: 'detecting-python', status: 'in-progress', message: 'Detecting Python installation' });
+  const result = await install((msg) => {
+    if (onProgress) onProgress(msg);
+  });
+  pocketTtsSetupState.attempted = true;
+  pocketTtsSetupState.ready = result.success;
+  pocketTtsSetupState.error = result.error || null;
+  pocketTtsSetupState.inProgress = false;
+  return pocketTtsSetupState.ready;
+}
 function eagerTTS(text, conversationId, sessionId) {
   getSpeech().then(speech => {
     const status = speech.getStatus();
@@ -1263,6 +1297,25 @@ const server = http.createServer(async (req, res) => {
           sendJSON(req, res, 400, { error: 'No text provided' });
           return;
         }
+        if (!pocketTtsSetupState.attempted && process.platform === 'win32') {
+          const setupOk = await ensurePocketTtsSetup((msg) => {
+            broadcastSync({ type: 'tts_setup_progress', ...msg });
+          });
+          if (!setupOk) {
+            sendJSON(req, res, 503, { error: pocketTtsSetupState.error || 'pocket-tts setup failed', retryable: false });
+            return;
+          }
+          // After successful setup, start the TTS sidecar if not already running
+          const speech = await getSpeech();
+          if (speech.preloadTTS) {
+            speech.preloadTTS();
+            // Wait a bit for it to start
+            await new Promise(r => setTimeout(r, 2000));
+          }
+        }
         const speech = await getSpeech();
         const status = speech.getStatus();
         if (status.ttsError) {
@@ -1322,9 +1375,29 @@ const server = http.createServer(async (req, res) => {
     if (pathOnly === '/api/speech-status' && req.method === 'GET') {
       try {
         const { getStatus } = await getSpeech();
-        sendJSON(req, res, 200, getStatus());
+        const baseStatus = getStatus();
+        const pythonDetect = detectPython();
+        const statusWithSetup = {
+          ...baseStatus,
+          pythonDetected: pythonDetect.found,
+          pythonVersion: pythonDetect.version,
+          pocketTtsSetup: {
+            ready: pocketTtsSetupState.ready,
+            attempted: pocketTtsSetupState.attempted,
+            error: pocketTtsSetupState.error,
+          },
+          setupMessage: pocketTtsSetupState.error || (pocketTtsSetupState.ready ? 'pocket-tts ready' : 'Will setup on first TTS request'),
+        };
+        sendJSON(req, res, 200, statusWithSetup);
       } catch (err) {
-        sendJSON(req, res, 200, { sttReady: false, ttsReady: false, sttLoading: false, ttsLoading: false });
+        const pythonDetect = detectPython();
+        sendJSON(req, res, 200, {
+          sttReady: false, ttsReady: false, sttLoading: false, ttsLoading: false,
+          pythonDetected: pythonDetect.found,
+          pythonVersion: pythonDetect.version,
+          pocketTtsSetup: { ready: false, attempted: false, error: null },
+          setupMessage: 'Will setup on first TTS request',
+        });
       }
       return;
     }