npm - otherwise-cli - Versions diffs - 0.1.0 - Mend

otherwise-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

package/README.md +193 -0
package/bin/otherwise.js +5 -0
package/frontend/404.html +84 -0
package/frontend/assets/OpenDyslexic3-Bold-CDyRs55Y.ttf +0 -0
package/frontend/assets/OpenDyslexic3-Regular-CIBXa4WE.ttf +0 -0
package/frontend/assets/__vite-browser-external-BIHI7g3E.js +1 -0
package/frontend/assets/conversational-worker-CeKiciGk.js +2929 -0
package/frontend/assets/dictation-worker-D0aYfq8b.js +29 -0
package/frontend/assets/gemini-color-CgSQmmva.png +0 -0
package/frontend/assets/index-BLux5ps4.js +21 -0
package/frontend/assets/index-Blh8_TEM.js +5272 -0
package/frontend/assets/index-BpQ1PuKu.js +18 -0
package/frontend/assets/index-Df737c8w.css +1 -0
package/frontend/assets/index-xaYHL6wb.js +113 -0
package/frontend/assets/ort-wasm-simd-threaded.asyncify-BynIiDiv.wasm +0 -0
package/frontend/assets/ort-wasm-simd-threaded.jsep-B0T3yYHD.wasm +0 -0
package/frontend/assets/transformers-tULNc5V3.js +31 -0
package/frontend/assets/tts-worker-DPJWqT7N.js +2899 -0
package/frontend/assets/voice-mode-worker-GzvIE_uh.js +2927 -0
package/frontend/assets/worker-2d5ABSLU.js +31 -0
package/frontend/banner.png +0 -0
package/frontend/favicon.svg +3 -0
package/frontend/google55e5ec47ee14a5f8.html +1 -0
package/frontend/index.html +234 -0
package/frontend/manifest.json +17 -0
package/frontend/pdf.worker.min.mjs +21 -0
package/frontend/robots.txt +5 -0
package/frontend/sitemap.xml +27 -0
package/package.json +81 -0
package/src/agent/index.js +1066 -0
package/src/agent/location.js +51 -0
package/src/agent/prompt.js +548 -0
package/src/agent/tools.js +4372 -0
package/src/browser/detect.js +68 -0
package/src/browser/session.js +1109 -0
package/src/config.js +137 -0
package/src/email/client.js +503 -0
package/src/index.js +557 -0
package/src/inference/anthropic.js +113 -0
package/src/inference/google.js +373 -0
package/src/inference/index.js +81 -0
package/src/inference/ollama.js +383 -0
package/src/inference/openai.js +140 -0
package/src/inference/openrouter.js +378 -0
package/src/inference/xai.js +200 -0
package/src/logBridge.js +9 -0
package/src/models.js +146 -0
package/src/remote/client.js +225 -0
package/src/scheduler/cron.js +243 -0
package/src/server.js +3876 -0
package/src/storage/db.js +1135 -0
package/src/storage/supabase.js +364 -0
package/src/tunnel/cloudflare.js +241 -0
package/src/ui/components/App.jsx +687 -0
package/src/ui/components/BrowserSelect.jsx +111 -0
package/src/ui/components/FilePicker.jsx +472 -0
package/src/ui/components/Header.jsx +444 -0
package/src/ui/components/HelpPanel.jsx +173 -0
package/src/ui/components/HistoryPanel.jsx +158 -0
package/src/ui/components/MessageList.jsx +235 -0
package/src/ui/components/ModelSelector.jsx +304 -0
package/src/ui/components/PromptInput.jsx +515 -0
package/src/ui/components/StreamingResponse.jsx +134 -0
package/src/ui/components/ThinkingIndicator.jsx +365 -0
package/src/ui/components/ToolExecution.jsx +714 -0
package/src/ui/components/index.js +82 -0
package/src/ui/context/TerminalContext.jsx +150 -0
package/src/ui/context/index.js +13 -0
package/src/ui/hooks/index.js +16 -0
package/src/ui/hooks/useChatState.js +675 -0
package/src/ui/hooks/useCommands.js +280 -0
package/src/ui/hooks/useFileAttachments.js +216 -0
package/src/ui/hooks/useKeyboardShortcuts.js +173 -0
package/src/ui/hooks/useNotifications.js +185 -0
package/src/ui/hooks/useTerminalSize.js +151 -0
package/src/ui/hooks/useWebSocket.js +273 -0
package/src/ui/index.js +94 -0
package/src/ui/ink-runner.js +22 -0
package/src/ui/utils/formatters.js +424 -0
package/src/ui/utils/index.js +6 -0
package/src/ui/utils/markdown.js +166 -0

package/src/server.js ADDED Viewed

@@ -0,0 +1,3876 @@
+import Fastify from "fastify";
+import fastifyWebsocket from "@fastify/websocket";
+import fastifyStatic from "@fastify/static";
+import { fileURLToPath } from "url";
+import { dirname, join, resolve, relative, basename, extname } from "path";
+import {
+  existsSync,
+  readdirSync,
+  statSync,
+  readFileSync,
+  lstatSync,
+  writeFileSync,
+  mkdirSync,
+  unlinkSync,
+  rmdirSync,
+  renameSync,
+  rmSync,
+} from "fs";
+import { homedir } from "os";
+import { config, getPublicConfig } from "./config.js";
+import {
+  initDb,
+  getDb,
+  getChat,
+  createChat as dbCreateChat,
+  getLocalChatIdByCloudId,
+  setCloudChatId,
+  isValidChatId,
+  saveFileSnapshot,
+  getSnapshotsToRevert,
+  deleteSnapshots,
+  deleteAllChatSnapshots,
+  saveShellUndo,
+  getShellUndosToRevert,
+  deleteShellUndos,
+  deleteAllChatShellUndos,
+} from "./storage/db.js";
+import { runAgent } from "./agent/index.js";
+import { initializeLocationCache } from "./agent/location.js";
+import {
+  cleanResponseText,
+  buildRichContextContent,
+  cleanContentForDisplay,
+} from "./agent/prompt.js";
+import { initScheduler, stopScheduler } from "./scheduler/cron.js";
+import {
+  performWebSearch,
+  fetchUrlContent,
+  deepWebResearch,
+} from "./agent/tools.js";
+import { streamInference, isImageModel } from "./inference/index.js";
+import { logSink } from "./logBridge.js";
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+let fastify = null;
+// Silent mode flag - when true, suppresses all server logging
+// This is used when Ink UI is running to prevent interference with terminal management
+let silentMode = false;
+/**
+ * Enable silent mode - suppresses all server logs
+ */
+export function enableSilentMode() {
+  silentMode = true;
+}
+/**
+ * Disable silent mode - restores server logging
+ */
+export function disableSilentMode() {
+  silentMode = false;
+}
+/**
+ * Server logging helper - respects silent mode
+ */
+function serverLog(...args) {
+  if (!silentMode) {
+    console.log(...args);
+  }
+}
+/** Shared WebSocket message handler factory for local and remote (backend) connections. Set when /ws is registered. */
+let wsHandlerFactory = null;
+/** When set (remote mode), local CLI messages are also forwarded to the backend so otherwise.ai frontend stays in sync. */
+let remoteRelaySend = null;
+/** True when this server was started with a remote pairing token (otherwise connect). Frontend uses this to connect to backend WS only in that case. */
+let remoteLinked = false;
+export async function startServer(port = 3000, options = {}) {
+  // Initialize database
+  await initDb();
+  // Prefetch user location for agent context (e.g. "what is my location") — same as frontend
+  await Promise.race([
+    initializeLocationCache(),
+    new Promise((r) => setTimeout(r, 2500)),
+  ]).catch(() => {});
+  // Initialize scheduler with a plain config object (same shape as agent config)
+  const schedulerConfig = { ...config.store };
+  initScheduler(schedulerConfig);
+  fastify = Fastify({
+    logger: false,
+    // Increase body limit for large RAG document uploads with embeddings
+    // Default is 1MB, but documents with many chunks + embeddings can be much larger
+    bodyLimit: 100 * 1024 * 1024, // 100MB
+  });
+  // Register WebSocket plugin
+  await fastify.register(fastifyWebsocket);
+  // Serve static frontend files
+  const frontendPath = join(__dirname, "..", "frontend");
+  const hasFrontend =
+    existsSync(frontendPath) && existsSync(join(frontendPath, "index.html"));
+  if (hasFrontend) {
+    await fastify.register(fastifyStatic, {
+      root: frontendPath,
+      prefix: "/",
+    });
+  } else {
+    serverLog("[Server] Frontend not built. Run: npm run build:frontend");
+  }
+  // ============================================
+  // API Routes
+  // ============================================
+  // Health check (remoteLinked: true when started with otherwise connect)
+  fastify.get("/api/health", async () => {
+    return { status: "ok", version: "0.1.0", remoteLinked };
+  });
+  // Serve generated images (avoids sending huge base64 over WebSocket)
+  fastify.get("/api/generated-images/:filename", async (request, reply) => {
+    const filename = request.params.filename;
+    if (!filename || filename.includes("..") || /[\/\\]/.test(filename)) {
+      return reply.status(400).send({ error: "Invalid filename" });
+    }
+    const imagesDir = join(process.cwd(), "generated_images");
+    const filePath = join(imagesDir, filename);
+    if (!existsSync(filePath)) {
+      return reply.status(404).send({ error: "Not found" });
+    }
+    const ext = extname(filename).toLowerCase();
+    const mimeTypes = {
+      ".png": "image/png",
+      ".jpg": "image/jpeg",
+      ".jpeg": "image/jpeg",
+      ".gif": "image/gif",
+      ".webp": "image/webp",
+    };
+    const mime = mimeTypes[ext] || "application/octet-stream";
+    return reply.type(mime).send(readFileSync(filePath));
+  });
+  // Get config (redacted)
+  fastify.get("/api/config", async () => {
+    return getPublicConfig();
+  });
+  // Detect available browsers for automation (used by browser selection modal)
+  fastify.get("/api/browsers/detect", async () => {
+    const { getAvailableBrowsers } = await import("./browser/detect.js");
+    return { browsers: getAvailableBrowsers() };
+  });
+  // Update config
+  fastify.put("/api/config", async (request) => {
+    const updates = request.body;
+    // Clear model on CLI when frontend sends model: null (e.g. "Clear Selected Model" in Settings)
+    if (
+      Object.prototype.hasOwnProperty.call(updates, "model") &&
+      (updates.model === null || updates.model === "")
+    ) {
+      config.set("model", "claude-sonnet-4-20250514");
+      delete updates.model;
+    }
+    // Only allow updating certain fields from frontend
+    const allowedFields = [
+      "model",
+      "temperature",
+      "ollamaUrl",
+      "browserChannel",
+    ];
+    for (const [key, value] of Object.entries(updates)) {
+      if (allowedFields.includes(key)) {
+        config.set(key, value);
+      }
+    }
+    // Handle API keys separately (they come as actual values)
+    if (updates.apiKeys) {
+      for (const [provider, key] of Object.entries(updates.apiKeys)) {
+        if (key && key !== true) {
+          // Trim whitespace to prevent auth failures from copy-paste artifacts
+          config.set(
+            `apiKeys.${provider}`,
+            typeof key === "string" ? key.trim() : key,
+          );
+        }
+      }
+    }
+    // Handle MyMX webhook secret
+    if (updates.mymx?.secret) {
+      config.set("mymx.secret", updates.mymx.secret);
+    }
+    // Handle Resend config (for sending emails)
+    if (updates.resend?.apiKey) {
+      config.set("resend.apiKey", updates.resend.apiKey);
+    }
+    if (updates.resend?.from) {
+      config.set("resend.from", updates.resend.from);
+    }
+    // Browser automation: channel ('chrome' | 'msedge' | 'chromium') or null to prompt user
+    if (Object.prototype.hasOwnProperty.call(updates, "browserChannel")) {
+      const v = updates.browserChannel;
+      config.set("browserChannel", v === "" || v === undefined ? null : v);
+    }
+    return { success: true };
+  });
+  // ============================================
+  // Model Selection API
+  // ============================================
+  // Get current model
+  fastify.get("/api/model", async () => {
+    return {
+      model: config.get("model") || "claude-sonnet-4-20250514",
+    };
+  });
+  // Set current model
+  fastify.put("/api/model", async (request) => {
+    const { model } = request.body;
+    if (model) {
+      config.set("model", model);
+      return { success: true, model };
+    }
+    return { success: false, error: "No model specified" };
+  });
+  // Get OpenRouter models (dynamically fetched from OpenRouter API)
+  fastify.get("/api/models/openrouter", async () => {
+    const apiKey = config.get("apiKeys.openrouter");
+    if (!apiKey) {
+      return {
+        success: false,
+        error: "OpenRouter API key not configured",
+        models: [],
+      };
+    }
+    try {
+      const { fetchOpenRouterModels } =
+        await import("./inference/openrouter.js");
+      const models = await fetchOpenRouterModels(apiKey);
+      serverLog("[Server] Fetched", models.length, "OpenRouter models");
+      return { success: true, models };
+    } catch (error) {
+      serverLog("[Server] Error fetching OpenRouter models:", error.message);
+      return { success: false, error: error.message, models: [] };
+    }
+  });
+  // Get all available models (static + dynamic OpenRouter)
+  fastify.get("/api/models", async () => {
+    const { MODEL_DATA } = await import("./models.js");
+    // Get configured API keys to determine which providers are available
+    const apiKeys = config.get("apiKeys") || {};
+    const result = {
+      localModels: MODEL_DATA.localModels,
+      apiModels: {
+        anthropic: apiKeys.anthropic ? MODEL_DATA.apiModels.anthropic : [],
+        openai: apiKeys.openai ? MODEL_DATA.apiModels.openai : [],
+        google: apiKeys.google ? MODEL_DATA.apiModels.google : [],
+        xai: apiKeys.xai ? MODEL_DATA.apiModels.xai : [],
+        openrouter: [],
+      },
+    };
+    // Fetch OpenRouter models if API key is configured
+    if (apiKeys.openrouter) {
+      try {
+        const { fetchOpenRouterModels } =
+          await import("./inference/openrouter.js");
+        const orModels = await fetchOpenRouterModels(apiKeys.openrouter);
+        result.apiModels.openrouter = orModels;
+        serverLog(
+          "[Server] /api/models includes",
+          orModels.length,
+          "OpenRouter models",
+        );
+      } catch (error) {
+        serverLog(
+          "[Server] Error fetching OpenRouter models for /api/models:",
+          error.message,
+        );
+      }
+    }
+    return result;
+  });
+  // ============================================
+  // Generation Data (for frontend analytics - optional)
+  // ============================================
+  fastify.post("/api/generation-data", async (request) => {
+    // This endpoint receives generation analytics from the frontend
+    // We can store this for analytics or just acknowledge it
+    serverLog("[Server] Generation data received:", request.body?.chatId);
+    return { success: true };
+  });
+  // ============================================
+  // Chat API Routes
+  // ============================================
+  // List all chats
+  fastify.get("/api/chats", async () => {
+    try {
+      const db = getDb();
+      serverLog("[Server] GET /api/chats - Database object exists:", !!db);
+      const chats = db
+        .prepare(
+          `
+        SELECT id, title, created_at, updated_at,
+          (SELECT COUNT(*) FROM messages WHERE chat_id = chats.id) as message_count
+        FROM chats
+        ORDER BY updated_at DESC
+      `,
+        )
+        .all();
+      serverLog("[Server] GET /api/chats returning", chats.length, "chats");
+      if (chats.length > 0) {
+        serverLog("[Server] First chat:", JSON.stringify(chats[0]));
+      } else {
+        serverLog("[Server] No chats found in database!");
+        // Debug: Check if table exists
+        const tables = db
+          .prepare("SELECT name FROM sqlite_master WHERE type='table'")
+          .all();
+        serverLog(
+          "[Server] Tables in database:",
+          tables.map((t) => t.name).join(", "),
+        );
+      }
+      // Transform to match frontend expected format
+      return chats.map((chat) => ({
+        ...chat,
+        // Add timestamp field for frontend compatibility
+        timestamp: chat.updated_at || chat.created_at,
+        // Indicate if chat has messages (for "Generating..." display)
+        hasMessages: chat.message_count > 0,
+      }));
+    } catch (error) {
+      console.error("[Server] Error in GET /api/chats:", error);
+      return [];
+    }
+  });
+  // Get single chat with messages
+  fastify.get("/api/chats/:id", async (request) => {
+    const { id } = request.params;
+    const db = getDb();
+    const chat = db.prepare("SELECT * FROM chats WHERE id = ?").get(id);
+    if (!chat) {
+      return { error: "Chat not found" };
+    }
+    const messages = db
+      .prepare(
+        `
+      SELECT id, role, content, metadata, created_at
+      FROM messages
+      WHERE chat_id = ?
+      ORDER BY created_at ASC
+    `,
+      )
+      .all(id);
+    // Parse metadata JSON and spread into message object
+    // For assistant messages, strip tool_call/tool_result XML from display content
+    // The actual tool calls are in metadata.toolCalls for proper UI rendering
+    const parsedMessages = messages.map((m) => {
+      const metadata = m.metadata ? JSON.parse(m.metadata) : {};
+      // Clean content for display (strip tool XML, keep prose)
+      const displayContent =
+        m.role === "assistant" && m.content
+          ? cleanContentForDisplay(m.content)
+          : m.content;
+      return {
+        ...m,
+        content: displayContent, // Clean version for frontend display
+        fullContent: m.content, // Full version with tool context (for AI history)
+        ...metadata, // Spread metadata fields (tps, numTokens, model, toolCalls, etc.) into message
+        metadata, // Keep original metadata object too
+      };
+    });
+    return { ...chat, messages: parsedMessages };
+  });
+  // Create new chat (id is UUID for local/cloud parity)
+  fastify.post("/api/chats", async (request) => {
+    const { title } = request.body || {};
+    const id = dbCreateChat(title || null, null);
+    return { id, title: title || null };
+  });
+  // Update chat (title and/or messages). Id is UUID string.
+  fastify.put("/api/chats/:id", async (request, reply) => {
+    const id = String(request.params.id ?? "").trim();
+    if (!id || !isValidChatId(id)) {
+      return reply.code(400).send({ error: "Invalid chat id (UUID required)" });
+    }
+    const { title, messages } = request.body || {};
+    const db = getDb();
+    try {
+      // Ensure chat exists (cloud/synced chats may not exist in CLI DB yet)
+      const existingChat = db
+        .prepare("SELECT id FROM chats WHERE id = ?")
+        .get(id);
+      if (!existingChat) {
+        db.prepare(
+          "INSERT INTO chats (id, title, updated_at) VALUES (?, ?, CURRENT_TIMESTAMP)",
+        ).run(id, title ?? null);
+        serverLog("[Server] Created chat", id, "for sync (cloud/synced)");
+      }
+      // Update title if provided
+      if (title !== undefined) {
+        db.prepare(
+          `
+          UPDATE chats SET title = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?
+        `,
+        ).run(title, id);
+      }
+      // If messages array provided, sync them to database
+      // PROTECTION: Don't overwrite if we already have MORE messages (prevents data loss from stale frontend state)
+      if (messages && Array.isArray(messages)) {
+        const existingCount =
+          db
+            .prepare("SELECT COUNT(*) as count FROM messages WHERE chat_id = ?")
+            .get(id)?.count || 0;
+        if (messages.length < existingCount) {
+          serverLog(
+            "[Server] SKIPPING message sync - would lose data (incoming:",
+            messages.length,
+            "existing:",
+            existingCount,
+            ")",
+          );
+          db.prepare(
+            "UPDATE chats SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
+          ).run(id);
+        } else {
+          db.prepare("DELETE FROM messages WHERE chat_id = ?").run(id);
+          const insertStmt = db.prepare(`
+            INSERT INTO messages (chat_id, role, content, metadata) VALUES (?, ?, ?, ?)
+          `);
+          for (const msg of messages) {
+            if (msg.role && msg.content !== undefined && msg.content !== null) {
+              const metadata = {
+                images: msg.images,
+                files: msg.files,
+                model: msg.model,
+                tps: msg.tps,
+                numTokens: msg.numTokens,
+                _searchData: msg._searchData,
+                _ragSources: msg._ragSources,
+                toolCalls: msg.toolCalls,
+              };
+              const contentToStore = msg.fullContent ?? msg.content;
+              const contentStr =
+                typeof contentToStore === "string"
+                  ? contentToStore
+                  : JSON.stringify(contentToStore);
+              insertStmt.run(
+                id,
+                msg.role,
+                contentStr,
+                JSON.stringify(metadata),
+              );
+            }
+          }
+          db.prepare(
+            "UPDATE chats SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
+          ).run(id);
+          serverLog(
+            "[Server] Updated chat",
+            id,
+            "with",
+            messages.length,
+            "messages",
+          );
+        }
+      }
+      return { success: true };
+    } catch (err) {
+      serverLog("[Server] PUT /api/chats/" + id, "error:", err.message);
+      return reply
+        .code(500)
+        .send({ error: err.message || "Failed to update chat" });
+    }
+  });
+  // Delete chat (id is UUID string)
+  fastify.delete("/api/chats/:id", async (request, reply) => {
+    const id = String(request.params.id ?? "").trim();
+    if (!id || !isValidChatId(id)) {
+      return reply.code(400).send({ error: "Invalid chat id (UUID required)" });
+    }
+    serverLog("[Server] DELETE /api/chats/" + id);
+    const db = getDb();
+    // Delete file snapshots and shell undo log for this chat (cleanup for undo feature)
+    deleteAllChatSnapshots(id);
+    deleteAllChatShellUndos(id);
+    serverLog(
+      "[Server] Deleted file snapshots and shell undo log for chat",
+      id,
+    );
+    // Delete messages first (foreign key)
+    const msgResult = db
+      .prepare("DELETE FROM messages WHERE chat_id = ?")
+      .run(id);
+    const chatResult = db.prepare("DELETE FROM chats WHERE id = ?").run(id);
+    serverLog(
+      "[Server] Deleted chat",
+      id,
+      "- messages:",
+      msgResult.changes,
+      "chat:",
+      chatResult.changes,
+    );
+    return { success: true };
+  });
+  // Add message to chat (id is UUID string)
+  fastify.post("/api/chats/:id/messages", async (request, reply) => {
+    const id = String(request.params.id ?? "").trim();
+    if (!id || !isValidChatId(id)) {
+      return reply.code(400).send({ error: "Invalid chat id (UUID required)" });
+    }
+    const { role, content, metadata } = request.body;
+    const db = getDb();
+    const result = db
+      .prepare(
+        `
+      INSERT INTO messages (chat_id, role, content, metadata) VALUES (?, ?, ?, ?)
+    `,
+      )
+      .run(id, role, content, metadata ? JSON.stringify(metadata) : null);
+    // Update chat timestamp
+    db.prepare(
+      "UPDATE chats SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
+    ).run(id);
+    return { id: result.lastInsertRowid };
+  });
+  // ============================================
+  // Web Search API
+  // ============================================
+  // Simple web search
+  fastify.post("/api/search", async (request) => {
+    const { query, numResults = 5 } = request.body;
+    if (!query) {
+      return { success: false, error: "Query is required" };
+    }
+    serverLog("[Server] Web search:", query);
+    try {
+      const results = await performWebSearch(query, numResults);
+      return {
+        success: true,
+        query,
+        results: results.map((r) => ({
+          title: r.title,
+          url: r.url,
+          snippet: r.snippet,
+          source: new URL(r.url).hostname,
+          engines: r.engines || [],
+        })),
+        count: results.length,
+      };
+    } catch (err) {
+      console.error("[Server] Search error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Fetch URL content
+  fastify.post("/api/fetch-url", async (request) => {
+    const { url } = request.body;
+    if (!url) {
+      return { success: false, error: "URL is required" };
+    }
+    serverLog("[Server] Fetching URL:", url);
+    try {
+      const content = await fetchUrlContent(url);
+      return { success: true, url, content };
+    } catch (err) {
+      console.error("[Server] Fetch error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Deep web research (search + fetch multiple pages)
+  fastify.post("/api/research", async (request) => {
+    const { query, numPages = 5 } = request.body;
+    if (!query) {
+      return { success: false, error: "Query is required" };
+    }
+    serverLog("[Server] Deep research:", query, "pages:", numPages);
+    try {
+      const results = await deepWebResearch(query, numPages);
+      // Format for LLM context
+      if (results.success && results.pages.length > 0) {
+        const formattedContext =
+          `WEB RESEARCH RESULTS for "${query}":\n\n` +
+          results.pages
+            .map(
+              (page, i) =>
+                `[Source ${i + 1}: ${page.title}]\nURL: ${page.url}\n${page.content}`,
+            )
+            .join("\n\n---\n\n") +
+          "\n\n---\nUse the above research to answer the user's question. Cite sources when appropriate.";
+        return {
+          success: true,
+          query,
+          pages: results.pages.map((p) => ({
+            title: p.title,
+            url: p.url,
+            excerpt: p.excerpt,
+            content: p.content,
+            source: new URL(p.url).hostname,
+          })),
+          count: results.pages.length,
+          formatted_context: formattedContext,
+        };
+      }
+      return { success: false, error: "No results found" };
+    } catch (err) {
+      console.error("[Server] Research error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // ============================================
+  // Shell Command Autocomplete API
+  // ============================================
+  // Fast autocomplete for shell commands
+  fastify.post("/api/autocomplete", async (request) => {
+    const { partial, context = "shell" } = request.body;
+    if (!partial || partial.length < 2) {
+      return { success: false, error: "Input too short" };
+    }
+    serverLog("[Server] Autocomplete request:", partial.substring(0, 50));
+    try {
+      // Use a fast model for low latency
+      // Priority: gpt-4o-mini > claude-3-haiku > gemini-flash > configured model
+      let autocompleteModel = config.get("model") || "claude-sonnet-4-20250514";
+      // Check for fast models
+      const apiKeys = config.get("apiKeys") || {};
+      if (apiKeys.openai) {
+        autocompleteModel = "gpt-4o-mini";
+      } else if (apiKeys.anthropic) {
+        autocompleteModel = "claude-3-5-haiku-20241022";
+      } else if (apiKeys.google) {
+        autocompleteModel = "gemini-2.0-flash";
+      }
+      // System prompt optimized for shell command completion
+      const systemPrompt =
+        context === "shell"
+          ? `You are a shell command autocomplete assistant. Complete the partial command the user has typed.
+Rules:
+- Output ONLY the completion text (what comes after what the user typed)
+- Do NOT repeat what the user already typed
+- Do NOT include any explanation or commentary
+- Keep completions short and practical
+- If multiple options exist, pick the most common/likely one
+- If the input doesn't look like a shell command, return empty string
+Examples:
+User: "git ch" -> "eckout "
+User: "npm i" -> "nstall "
+User: "docker run -" -> "-it "
+User: "ls -l" -> "a "
+User: "cd ~/" -> "Documents"
+User: "kubectl get p" -> "ods "`
+          : `You are an autocomplete assistant. Complete the user's partial input.
+Output ONLY the completion text (what comes after what the user typed).
+Do NOT repeat what the user already typed or add explanations.`;
+      // Import inference module dynamically
+      const { streamInference, hasRequiredApiKey } =
+        await import("./inference/index.js");
+      if (!hasRequiredApiKey(autocompleteModel, config.store)) {
+        return {
+          success: false,
+          error: "No API key configured for autocomplete",
+        };
+      }
+      // Build messages for completion
+      const messages = [{ role: "user", content: partial }];
+      // Collect response (non-streaming for simplicity and speed)
+      let completion = "";
+      for await (const chunk of streamInference(
+        autocompleteModel,
+        messages,
+        systemPrompt,
+        {
+          ...config.store,
+          maxTokens: 50, // Keep it short for speed
+          temperature: 0, // Deterministic completions
+        },
+      )) {
+        if (chunk.type === "text") {
+          completion += chunk.content;
+        }
+      }
+      // Clean up the completion
+      completion = completion.trim();
+      // Ensure the completion doesn't repeat the input
+      if (completion.toLowerCase().startsWith(partial.toLowerCase())) {
+        completion = completion.substring(partial.length);
+      }
+      serverLog("[Server] Autocomplete result:", completion.substring(0, 30));
+      return {
+        success: true,
+        suggestion: completion,
+        model: autocompleteModel,
+      };
+    } catch (err) {
+      console.error("[Server] Autocomplete error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // ============================================
+  // File System API (Cursor-like @ mentions)
+  // ============================================
+  // File extension to type mapping for icons
+  const FILE_TYPE_MAP = {
+    // Code files
+    ".js": "javascript",
+    ".jsx": "javascript",
+    ".ts": "typescript",
+    ".tsx": "typescript",
+    ".py": "python",
+    ".rb": "ruby",
+    ".go": "go",
+    ".rs": "rust",
+    ".java": "java",
+    ".c": "c",
+    ".cpp": "cpp",
+    ".h": "c",
+    ".hpp": "cpp",
+    ".cs": "csharp",
+    ".php": "php",
+    ".swift": "swift",
+    ".kt": "kotlin",
+    ".scala": "scala",
+    ".r": "r",
+    ".sql": "sql",
+    ".sh": "shell",
+    ".bash": "shell",
+    ".zsh": "shell",
+    ".fish": "shell",
+    // Web files
+    ".html": "html",
+    ".htm": "html",
+    ".css": "css",
+    ".scss": "scss",
+    ".sass": "sass",
+    ".less": "less",
+    ".vue": "vue",
+    ".svelte": "svelte",
+    // Config files
+    ".json": "json",
+    ".yaml": "yaml",
+    ".yml": "yaml",
+    ".toml": "toml",
+    ".xml": "xml",
+    ".ini": "config",
+    ".env": "config",
+    ".gitignore": "git",
+    // Docs
+    ".md": "markdown",
+    ".mdx": "markdown",
+    ".txt": "text",
+    ".rst": "text",
+    // Data
+    ".csv": "data",
+    ".tsv": "data",
+    // Images
+    ".png": "image",
+    ".jpg": "image",
+    ".jpeg": "image",
+    ".gif": "image",
+    ".svg": "image",
+    ".webp": "image",
+    ".ico": "image",
+  };
+  // Directories to ignore in listing
+  const IGNORED_DIRS = [
+    "node_modules",
+    ".git",
+    "__pycache__",
+    ".next",
+    ".nuxt",
+    "dist",
+    "build",
+    ".cache",
+    "coverage",
+    ".vscode",
+    ".idea",
+    "venv",
+    ".venv",
+    "env",
+    ".env.local",
+    ".DS_Store",
+  ];
+  // Get file type from extension
+  const getFileType = (filename) => {
+    const ext = extname(filename).toLowerCase();
+    return FILE_TYPE_MAP[ext] || "file";
+  };
+  // Check if path should be ignored
+  const shouldIgnore = (name) => {
+    return IGNORED_DIRS.includes(name) || name.startsWith(".");
+  };
+  // Get the workspace root (where the CLI is running from)
+  const getWorkspaceRoot = () => {
+    return process.cwd();
+  };
+  // List files and directories for @ mention autocomplete
+  fastify.get("/api/files", async (request) => {
+    const { path: requestPath = "", query = "" } = request.query;
+    const workspaceRoot = getWorkspaceRoot();
+    try {
+      // Resolve the path relative to workspace root
+      let targetPath = workspaceRoot;
+      if (requestPath) {
+        // Handle ~ expansion
+        if (requestPath.startsWith("~")) {
+          targetPath = join(homedir(), requestPath.slice(1));
+        } else if (requestPath.startsWith("/")) {
+          targetPath = requestPath;
+        } else {
+          targetPath = resolve(workspaceRoot, requestPath);
+        }
+      }
+      // Security: ensure we're not going outside allowed paths
+      // For now, allow workspace and home directory
+      const normalizedTarget = resolve(targetPath);
+      const normalizedWorkspace = resolve(workspaceRoot);
+      const normalizedHome = resolve(homedir());
+      if (
+        !normalizedTarget.startsWith(normalizedWorkspace) &&
+        !normalizedTarget.startsWith(normalizedHome)
+      ) {
+        return {
+          success: false,
+          error: "Access denied: path outside allowed directories",
+        };
+      }
+      if (!existsSync(targetPath)) {
+        return { success: false, error: "Path not found" };
+      }
+      const stat = statSync(targetPath);
+      // If it's a file, return file info
+      if (!stat.isDirectory()) {
+        return {
+          success: true,
+          type: "file",
+          file: {
+            name: basename(targetPath),
+            path: relative(workspaceRoot, targetPath) || targetPath,
+            absolutePath: targetPath,
+            type: getFileType(targetPath),
+            size: stat.size,
+            modified: stat.mtime,
+          },
+        };
+      }
+      // List directory contents
+      const entries = readdirSync(targetPath);
+      const items = [];
+      for (const name of entries) {
+        // Skip ignored directories/files unless specifically queried
+        if (!query && shouldIgnore(name)) continue;
+        // Filter by query if provided
+        if (query && !name.toLowerCase().includes(query.toLowerCase()))
+          continue;
+        const fullPath = join(targetPath, name);
+        try {
+          const entryStat = lstatSync(fullPath);
+          const isDirectory = entryStat.isDirectory();
+          const isSymlink = entryStat.isSymbolicLink();
+          // Get relative path from workspace root
+          const relativePath = relative(workspaceRoot, fullPath);
+          items.push({
+            name,
+            path: relativePath || fullPath,
+            absolutePath: fullPath,
+            type: isDirectory ? "directory" : getFileType(name),
+            isDirectory,
+            isSymlink,
+            size: isDirectory ? null : entryStat.size,
+            modified: entryStat.mtime,
+          });
+        } catch (err) {
+          // Skip files we can't access
+          console.warn(`[Server] Could not stat ${fullPath}:`, err.message);
+        }
+      }
+      // Sort: directories first, then alphabetically
+      items.sort((a, b) => {
+        if (a.isDirectory && !b.isDirectory) return -1;
+        if (!a.isDirectory && b.isDirectory) return 1;
+        return a.name.localeCompare(b.name);
+      });
+      return {
+        success: true,
+        type: "directory",
+        path: relative(workspaceRoot, targetPath) || ".",
+        absolutePath: targetPath,
+        workspaceRoot,
+        items,
+      };
+    } catch (err) {
+      console.error("[Server] File list error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Search files by pattern (glob-like)
+  fastify.get("/api/files/search", async (request) => {
+    const { query = "", maxResults = 50 } = request.query;
+    const workspaceRoot = getWorkspaceRoot();
+    if (!query) {
+      return { success: false, error: "Query is required" };
+    }
+    serverLog("[Server] File search:", query);
+    try {
+      const results = [];
+      const queryLower = query.toLowerCase();
+      // Recursive file search with depth limit
+      const searchDir = (dir, depth = 0) => {
+        if (depth > 5 || results.length >= maxResults) return;
+        try {
+          const entries = readdirSync(dir);
+          for (const name of entries) {
+            if (results.length >= maxResults) break;
+            if (shouldIgnore(name)) continue;
+            const fullPath = join(dir, name);
+            try {
+              const stat = lstatSync(fullPath);
+              const relativePath = relative(workspaceRoot, fullPath);
+              // Check if name matches query
+              if (
+                name.toLowerCase().includes(queryLower) ||
+                relativePath.toLowerCase().includes(queryLower)
+              ) {
+                results.push({
+                  name,
+                  path: relativePath,
+                  absolutePath: fullPath,
+                  type: stat.isDirectory() ? "directory" : getFileType(name),
+                  isDirectory: stat.isDirectory(),
+                  size: stat.isDirectory() ? null : stat.size,
+                  modified: stat.mtime,
+                });
+              }
+              // Recurse into directories
+              if (stat.isDirectory()) {
+                searchDir(fullPath, depth + 1);
+              }
+            } catch (err) {
+              // Skip inaccessible files
+            }
+          }
+        } catch (err) {
+          // Skip inaccessible directories
+        }
+      };
+      searchDir(workspaceRoot);
+      // Sort by path (shorter paths first for relevance)
+      results.sort((a, b) => {
+        // Exact filename matches first
+        const aExact = a.name.toLowerCase() === queryLower;
+        const bExact = b.name.toLowerCase() === queryLower;
+        if (aExact && !bExact) return -1;
+        if (!aExact && bExact) return 1;
+        // Then by path length (shorter = more relevant)
+        return a.path.length - b.path.length;
+      });
+      return {
+        success: true,
+        query,
+        workspaceRoot,
+        results: results.slice(0, maxResults),
+        total: results.length,
+      };
+    } catch (err) {
+      console.error("[Server] File search error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Read file content (for including in chat context)
+  fastify.get("/api/files/read", async (request) => {
+    const { path: filePath } = request.query;
+    const workspaceRoot = getWorkspaceRoot();
+    if (!filePath) {
+      return { success: false, error: "Path is required" };
+    }
+    try {
+      // Resolve the path
+      let targetPath = filePath;
+      if (filePath.startsWith("~")) {
+        targetPath = join(homedir(), filePath.slice(1));
+      } else if (!filePath.startsWith("/")) {
+        targetPath = resolve(workspaceRoot, filePath);
+      }
+      // Security check
+      const normalizedTarget = resolve(targetPath);
+      const normalizedWorkspace = resolve(workspaceRoot);
+      const normalizedHome = resolve(homedir());
+      if (
+        !normalizedTarget.startsWith(normalizedWorkspace) &&
+        !normalizedTarget.startsWith(normalizedHome)
+      ) {
+        return {
+          success: false,
+          error: "Access denied: path outside allowed directories",
+        };
+      }
+      if (!existsSync(targetPath)) {
+        return { success: false, error: "File not found" };
+      }
+      const stat = statSync(targetPath);
+      if (stat.isDirectory()) {
+        return { success: false, error: "Cannot read directory content" };
+      }
+      // Check file size (limit to 1MB for safety)
+      if (stat.size > 1024 * 1024) {
+        return {
+          success: false,
+          error: "File too large (max 1MB)",
+          size: stat.size,
+        };
+      }
+      // Check if it's a binary file (basic heuristic)
+      const binaryExtensions = [
+        ".png",
+        ".jpg",
+        ".jpeg",
+        ".gif",
+        ".ico",
+        ".pdf",
+        ".zip",
+        ".tar",
+        ".gz",
+        ".exe",
+        ".bin",
+        ".wasm",
+      ];
+      const ext = extname(targetPath).toLowerCase();
+      if (binaryExtensions.includes(ext)) {
+        return {
+          success: false,
+          error: "Cannot read binary file",
+          type: getFileType(targetPath),
+        };
+      }
+      const content = readFileSync(targetPath, "utf-8");
+      return {
+        success: true,
+        path: relative(workspaceRoot, targetPath) || targetPath,
+        absolutePath: targetPath,
+        name: basename(targetPath),
+        type: getFileType(targetPath),
+        size: stat.size,
+        lineCount: content.split("\n").length,
+        content,
+      };
+    } catch (err) {
+      console.error("[Server] File read error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Get folder tree (recursive file listing for @ mentions)
+  // Set includeContent=false to only get the file tree structure (for folder @mentions)
+  fastify.get("/api/files/tree", async (request) => {
+    const {
+      path: folderPath,
+      maxDepth = 5,
+      maxFiles = 100,
+      includeContent = "false",
+    } = request.query;
+    const shouldIncludeContent = includeContent === "true";
+    const workspaceRoot = getWorkspaceRoot();
+    if (!folderPath) {
+      return { success: false, error: "Path is required" };
+    }
+    try {
+      // Resolve the path
+      let targetPath = folderPath;
+      if (folderPath.startsWith("~")) {
+        targetPath = join(homedir(), folderPath.slice(1));
+      } else if (!folderPath.startsWith("/")) {
+        targetPath = resolve(workspaceRoot, folderPath);
+      }
+      // Security check
+      const normalizedTarget = resolve(targetPath);
+      const normalizedWorkspace = resolve(workspaceRoot);
+      const normalizedHome = resolve(homedir());
+      if (
+        !normalizedTarget.startsWith(normalizedWorkspace) &&
+        !normalizedTarget.startsWith(normalizedHome)
+      ) {
+        return {
+          success: false,
+          error: "Access denied: path outside allowed directories",
+        };
+      }
+      if (!existsSync(targetPath)) {
+        return { success: false, error: "Folder not found" };
+      }
+      const stat = statSync(targetPath);
+      if (!stat.isDirectory()) {
+        return { success: false, error: "Path is not a directory" };
+      }
+      // Binary file extensions to skip content reading
+      const binaryExtensions = [
+        ".png",
+        ".jpg",
+        ".jpeg",
+        ".gif",
+        ".ico",
+        ".pdf",
+        ".zip",
+        ".tar",
+        ".gz",
+        ".exe",
+        ".bin",
+        ".wasm",
+        ".mp3",
+        ".mp4",
+        ".wav",
+        ".avi",
+        ".mov",
+      ];
+      // Recursively build file tree
+      const files = [];
+      const directories = []; // Track subdirectories for tree display
+      let fileCount = 0;
+      const buildTree = (dir, depth = 0, parentRelative = "") => {
+        if (depth > maxDepth || fileCount >= maxFiles) return;
+        try {
+          const entries = readdirSync(dir).sort((a, b) => {
+            // Sort directories first, then files
+            const aPath = join(dir, a);
+            const bPath = join(dir, b);
+            try {
+              const aIsDir = lstatSync(aPath).isDirectory();
+              const bIsDir = lstatSync(bPath).isDirectory();
+              if (aIsDir && !bIsDir) return -1;
+              if (!aIsDir && bIsDir) return 1;
+            } catch (e) {}
+            return a.localeCompare(b);
+          });
+          for (const name of entries) {
+            if (fileCount >= maxFiles) break;
+            if (shouldIgnore(name)) continue;
+            const fullPath = join(dir, name);
+            try {
+              const entryStat = lstatSync(fullPath);
+              const relativePath = relative(workspaceRoot, fullPath);
+              if (entryStat.isDirectory()) {
+                // Track directory for tree display
+                directories.push({
+                  name,
+                  path: relativePath,
+                  depth,
+                });
+                // Recurse into subdirectory
+                buildTree(fullPath, depth + 1, relativePath);
+              } else {
+                // It's a file
+                const ext = extname(fullPath).toLowerCase();
+                const isBinary = binaryExtensions.includes(ext);
+                const isLarge = entryStat.size > 512 * 1024; // 512KB limit per file
+                const fileEntry = {
+                  name,
+                  path: relativePath,
+                  type: getFileType(name),
+                  size: entryStat.size,
+                  lineCount: 0,
+                  depth,
+                  content: null,
+                };
+                // Only read content if explicitly requested
+                if (shouldIncludeContent && !isBinary && !isLarge) {
+                  try {
+                    const content = readFileSync(fullPath, "utf-8");
+                    fileEntry.content = content;
+                    fileEntry.lineCount = content.split("\n").length;
+                  } catch (readErr) {
+                    fileEntry.error = "Could not read file";
+                  }
+                } else if (!shouldIncludeContent && !isBinary && !isLarge) {
+                  // Just count lines without storing content
+                  try {
+                    const content = readFileSync(fullPath, "utf-8");
+                    fileEntry.lineCount = content.split("\n").length;
+                  } catch (readErr) {
+                    // Ignore
+                  }
+                } else if (isBinary) {
+                  fileEntry.isBinary = true;
+                } else if (isLarge) {
+                  fileEntry.isLarge = true;
+                }
+                files.push(fileEntry);
+                fileCount++;
+              }
+            } catch (err) {
+              // Skip inaccessible files
+            }
+          }
+        } catch (err) {
+          // Skip inaccessible directories
+        }
+      };
+      buildTree(targetPath);
+      // Calculate total stats
+      const totalSize = files.reduce((sum, f) => sum + (f.size || 0), 0);
+      const totalLines = files.reduce((sum, f) => sum + (f.lineCount || 0), 0);
+      const folderRelPath = relative(workspaceRoot, targetPath) || folderPath;
+      // Build a tree display string for folder context
+      // Groups files by their parent directory for a nice tree visualization
+      const buildTreeDisplay = () => {
+        const lines = [];
+        const basePath = folderRelPath;
+        // Group files by their parent directory
+        const filesByDir = new Map();
+        filesByDir.set(basePath, []); // Root folder
+        for (const file of files) {
+          const parentDir = dirname(file.path);
+          if (!filesByDir.has(parentDir)) {
+            filesByDir.set(parentDir, []);
+          }
+          filesByDir.get(parentDir).push(file);
+        }
+        // Add directories that have no files but are parents
+        for (const dir of directories) {
+          if (!filesByDir.has(dir.path)) {
+            filesByDir.set(dir.path, []);
+          }
+        }
+        // Sort directories by path
+        const sortedDirs = Array.from(filesByDir.keys()).sort();
+        for (const dirPath of sortedDirs) {
+          const filesInDir = filesByDir.get(dirPath);
+          const depth =
+            dirPath === basePath
+              ? 0
+              : dirPath.split("/").length - basePath.split("/").length;
+          const indent = "  ".repeat(depth);
+          // Show directory name (except for root)
+          if (dirPath !== basePath) {
+            const dirName = basename(dirPath);
+            lines.push(`${indent}📁 ${dirName}/`);
+          }
+          // Show files in this directory
+          for (const file of filesInDir.sort((a, b) =>
+            a.name.localeCompare(b.name),
+          )) {
+            const fileIndent =
+              dirPath === basePath ? "" : "  ".repeat(depth + 1);
+            const lineInfo =
+              file.lineCount > 0 ? ` (${file.lineCount} lines)` : "";
+            const sizeInfo =
+              file.size > 0 ? ` [${(file.size / 1024).toFixed(1)}KB]` : "";
+            const icon = file.isBinary ? "📦" : "📄";
+            lines.push(
+              `${fileIndent}${icon} ${file.name}${lineInfo}${sizeInfo}`,
+            );
+          }
+        }
+        return lines.join("\n");
+      };
+      return {
+        success: true,
+        folderPath: folderRelPath,
+        folderName: basename(targetPath),
+        fileCount: files.length,
+        directoryCount: directories.length,
+        totalSize,
+        totalLines,
+        truncated: fileCount >= maxFiles,
+        files,
+        directories,
+        treeDisplay: buildTreeDisplay(), // Pre-formatted tree for display
+        workspaceRoot,
+      };
+    } catch (err) {
+      console.error("[Server] Folder tree error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // ============================================
+  // WebSocket for Chat Streaming
+  // ============================================
+  // Track all connected WebSocket clients for broadcasting
+  const connectedClients = new Set();
+  // Last synced state from frontend (so newly connected Ink clients get initial state)
+  let lastSyncedChatId = null;
+  let lastSyncedModel = null;
+  // Broadcast CLI logs to all connected frontends (when not silent)
+  function broadcastLog(level, args) {
+    if (connectedClients.size === 0) return;
+    const parts = Array.isArray(args) ? args : [args];
+    const message = parts
+      .map((a) => (typeof a === "object" ? JSON.stringify(a) : String(a)))
+      .join(" ");
+    const payload = { time: Date.now(), level, message };
+    const msgStr = JSON.stringify({ type: "log", payload });
+    for (const client of connectedClients) {
+      if (client.readyState === 1) client.send(msgStr);
+    }
+  }
+  logSink.write = (level, args) => {
+    broadcastLog(level, args);
+  };
+  // Broadcast to ALL local clients (used when handling frontend-originated messages so CLI Ink sees the stream)
+  const broadcastToAllLocalClients = (message) => {
+    const messageStr =
+      typeof message === "string" ? message : JSON.stringify(message);
+    for (const client of connectedClients) {
+      if (client.readyState === 1) client.send(messageStr);
+    }
+  };
+  // Helper function to broadcast to all clients except sender
+  // Defined here so it's available inside the WebSocket handler
+  const broadcastToOthers = (senderSocket, message) => {
+    const messageStr =
+      typeof message === "string" ? message : JSON.stringify(message);
+    // Only log non-text events to avoid spam (text events are very frequent)
+    if (
+      message.type &&
+      !["text", "thinking", "tool_streaming"].includes(message.type)
+    ) {
+      console.log(
+        `[WebSocket] Broadcasting ${message.type} to ${connectedClients.size - 1} other clients`,
+      );
+    }
+    for (const client of connectedClients) {
+      if (client !== senderSocket && client.readyState === 1) {
+        // 1 = OPEN
+        client.send(messageStr);
+      }
+    }
+  };
+  fastify.register(async function (fastify) {
+    fastify.get("/ws", { websocket: true }, (socket, req) => {
+      serverLog(
+        "WebSocket client connected, total clients:",
+        connectedClients.size + 1,
+      );
+      // Add to connected clients for broadcasting
+      connectedClients.add(socket);
+      // Send last synced state to this client so Ink reflects remote frontend on connect
+      if (socket.readyState === 1) {
+        if (lastSyncedChatId != null) {
+          socket.send(
+            JSON.stringify({ type: "chat_selected", chatId: lastSyncedChatId }),
+          );
+        }
+        if (lastSyncedModel) {
+          socket.send(
+            JSON.stringify({ type: "model_changed", model: lastSyncedModel }),
+          );
+        }
+      }
+      // Connection abstraction: same interface for local socket or remote (backend) relay.
+      // When in remote mode (otherwise connect), also forward to backend so frontend at otherwise.ai sees CLI activity.
+      //
+      // IMPORTANT: send+broadcast both relay, so using BOTH causes the relay to
+      // receive each event TWICE → remote frontends create duplicate messages.
+      // Use sendLocal/broadcastLocal for web-originated events where the
+      // originating frontend already persists to Supabase.
+      const conn = {
+        send: (msg) => {
+          if (socket.readyState === 1) socket.send(JSON.stringify(msg));
+          if (remoteRelaySend) {
+            try {
+              remoteRelaySend(msg);
+            } catch (e) {
+              serverLog("[WebSocket] Remote relay send error:", e);
+            }
+          }
+        },
+        broadcast: (msg) => {
+          broadcastToOthers(socket, msg);
+          if (remoteRelaySend) {
+            try {
+              remoteRelaySend(msg);
+            } catch (e) {
+              serverLog("[WebSocket] Remote relay broadcast error:", e);
+            }
+          }
+        },
+        sendLocal: (msg) => {
+          if (socket.readyState === 1) socket.send(JSON.stringify(msg));
+        },
+        broadcastLocal: (msg) => {
+          broadcastToOthers(socket, msg);
+        },
+      };
+      // CRITICAL FIX: Track generation state PER-CHAT for concurrent support
+      // Using Maps instead of single variables allows multiple chats to generate simultaneously
+      const activeGenerations = new Map(); // chatId -> { abortController, isGenerating }
+      // Factory set here so connectToBackend after listen() can use it before any local client connects.
+      function createWsMessageHandler(conn, activeGenerations) {
+        return async (rawMessage) => {
+          try {
+            const message =
+              typeof rawMessage === "string"
+                ? JSON.parse(rawMessage)
+                : JSON.parse(rawMessage.toString());
+            // Remote CLI: respond to config request (backend relay → frontend needs api keys + models from CLI)
+            if (message.type === "get_cli_config") {
+              try {
+                const publicConfig = getPublicConfig();
+                let ollamaModels = [];
+                try {
+                  const { listOllamaModels } =
+                    await import("./inference/ollama.js");
+                  const raw = await listOllamaModels(config);
+                  ollamaModels = raw.map((m) => {
+                    const name = (m.name || "").toLowerCase();
+                    const type = [];
+                    if (
+                      /deepseek-r1|deepseek-v3|qwq|qwen3|gpt-oss|thinking|reason/.test(
+                        name,
+                      )
+                    )
+                      type.push("reasoning");
+                    if (/llava|vision|bakllava|moondream/.test(name))
+                      type.push("image-input");
+                    if (
+                      /codellama|codegemma|starcoder|deepseek-coder/.test(name)
+                    )
+                      type.push("code");
+                    return {
+                      id: m.id,
+                      name:
+                        m.name?.replace(/:latest$/, "").split(":")[0] ||
+                        m.id.replace(/^ollama:/, ""),
+                      provider: "Ollama",
+                      type: type.length ? type : [""],
+                      size: m.size,
+                    };
+                  });
+                } catch (e) {
+                  serverLog(
+                    "[WebSocket] Could not list Ollama models for get_cli_config:",
+                    e?.message,
+                  );
+                }
+                let openRouterModels = [];
+                if (publicConfig.apiKeys?.openrouter) {
+                  try {
+                    const { fetchOpenRouterModels } =
+                      await import("./inference/openrouter.js");
+                    const key = config.get("apiKeys.openrouter");
+                    if (key)
+                      openRouterModels = await fetchOpenRouterModels(key);
+                  } catch (e) {
+                    serverLog(
+                      "[WebSocket] Could not fetch OpenRouter models for get_cli_config:",
+                      e?.message,
+                    );
+                  }
+                }
+                conn.send({
+                  type: "cli_config",
+                  apiKeys: publicConfig.apiKeys || {},
+                  ollamaModels,
+                  openRouterModels,
+                  browserChannel: publicConfig.browserChannel ?? null,
+                  model: config.get("model") || "claude-sonnet-4-20250514",
+                });
+              } catch (e) {
+                serverLog("[WebSocket] get_cli_config error:", e?.message);
+              }
+              return;
+            }
+            // Handle chat selection sync (broadcast to other clients)
+            if (message.type === "select_chat") {
+              const { chatId } = message;
+              serverLog("[WebSocket] Chat selected:", chatId);
+              lastSyncedChatId = chatId;
+              // Broadcast to all OTHER connected clients
+              conn.broadcast({
+                type: "chat_selected",
+                chatId: chatId,
+              });
+              return;
+            }
+            // Handle model change sync (broadcast to other clients)
+            if (message.type === "select_model") {
+              const { model } = message;
+              serverLog("[WebSocket] Model selected:", model);
+              lastSyncedModel = model;
+              // Save to config
+              config.set("model", model);
+              // Broadcast to all OTHER connected clients
+              conn.broadcast({
+                type: "model_changed",
+                model: model,
+              });
+              return;
+            }
+            if (message.type === "stop") {
+              // Handle stop request - stop ALL active generations or specific chat
+              const targetChatId = message.chatId;
+              serverLog(
+                "[WebSocket] Stop requested",
+                targetChatId ? `for chat ${targetChatId}` : "for all chats",
+              );
+              if (targetChatId && activeGenerations.has(targetChatId)) {
+                // Stop specific chat
+                const gen = activeGenerations.get(targetChatId);
+                gen.abortController?.abort();
+                activeGenerations.delete(targetChatId);
+                const stoppedMsg = { type: "stopped", chatId: targetChatId };
+                conn.send(stoppedMsg);
+                conn.broadcast(stoppedMsg);
+              } else {
+                // Stop all active generations
+                for (const [chatId, gen] of activeGenerations) {
+                  gen.abortController?.abort();
+                  const stoppedMsg = { type: "stopped", chatId };
+                  conn.send(stoppedMsg);
+                  conn.broadcast(stoppedMsg);
+                }
+                activeGenerations.clear();
+              }
+              return;
+            }
+            if (message.type === "chat") {
+              const payload = message.payload || message;
+              const {
+                chatId,
+                content,
+                images,
+                webSearch,
+                systemMessage,
+                model,
+                accessToken,
+                frontendApiKeys,
+                userMsgId: frontendUserMsgId,
+                assistantMsgId: frontendAssistantMsgId,
+                history: frontendHistory,
+              } = payload;
+              // Persist accessToken on connection so regen and other handlers can write to Supabase only when user is logged in
+              if (accessToken) conn.lastAccessToken = accessToken;
+              // Get or create chat; resolve cloud UUID from frontend to local id when CLI writes to Supabase
+              const db = getDb();
+              let actualChatId = null;
+              let cloudIdForClient = null; // ID we send to frontend (cloud UUID when synced, else local id)
+              // Create abort controller for this specific chat's generation
+              const abortController = new AbortController();
+              // Browser frontends set frontendUserMsgId; Ink terminal does not.
+              const senderIsBrowser = !!frontendUserMsgId;
+              if (chatId == null) {
+                actualChatId = dbCreateChat(null, null);
+                serverLog(
+                  "[WebSocket] Created new chat (local SQLite) ID:",
+                  actualChatId,
+                );
+                cloudIdForClient = actualChatId;
+                conn.sendLocal({ type: "chat_created", chatId: cloudIdForClient });
+                broadcastToAllLocalClients({
+                  type: "chat_created",
+                  chatId: cloudIdForClient,
+                });
+              } else if (typeof chatId === "string") {
+                actualChatId = getLocalChatIdByCloudId(chatId);
+                if (actualChatId == null) {
+                  actualChatId = dbCreateChat(null, chatId);
+                  serverLog(
+                    "[WebSocket] Adopted cloud chat (local):",
+                    actualChatId,
+                  );
+                }
+                cloudIdForClient = actualChatId;
+                broadcastToAllLocalClients({
+                  type: "chat_created",
+                  chatId: cloudIdForClient,
+                });
+              } else {
+                actualChatId = String(chatId);
+                cloudIdForClient = actualChatId;
+                broadcastToAllLocalClients({
+                  type: "chat_created",
+                  chatId: cloudIdForClient,
+                });
+              }
+              // Always broadcast user_message to local clients so the Ink UI
+              // reflects messages sent from any frontend (local browser or remote).
+              broadcastToAllLocalClients({
+                type: "user_message",
+                chatId: cloudIdForClient,
+                content: content,
+                images: images,
+                userMsgId: frontendUserMsgId || undefined,
+                assistantMsgId: frontendAssistantMsgId || undefined,
+              });
+              // Web search is now handled by the LLM via the web_search tool
+              // This gives the LLM full control with proper context (user location, current time)
+              let searchData = null;
+              // CRITICAL FIX: Check if this exact user message already exists (prevents duplicates on reconnection)
+              const existingUserMsg = db
+                .prepare(
+                  `
+              SELECT id FROM messages
+              WHERE chat_id = ? AND role = 'user' AND content = ?
+              ORDER BY created_at DESC LIMIT 1
+            `,
+                )
+                .get(actualChatId, content);
+              if (existingUserMsg) {
+                serverLog(
+                  "[WebSocket] User message already exists (id:",
+                  existingUserMsg.id,
+                  ") - skipping duplicate insert",
+                );
+              } else {
+                // Save user message (with original content, not enhanced)
+                db.prepare(
+                  `
+                INSERT INTO messages (chat_id, role, content, metadata) VALUES (?, ?, ?, ?)
+              `,
+                ).run(
+                  actualChatId,
+                  "user",
+                  content,
+                  images ? JSON.stringify({ images }) : null,
+                );
+                // NOTE: Frontend is the single Supabase writer — CLI only persists to local SQLite.
+              }
+              // Use frontend-provided branch-ordered history when available.
+              // This ensures the LLM sees only messages from the current branch,
+              // not interleaved messages from all branches (which SQLite ORDER BY created_at gives).
+              let history;
+              if (Array.isArray(frontendHistory) && frontendHistory.length > 0) {
+                history = frontendHistory
+                  .filter((m) => m.role && m.content)
+                  .map((msg) => {
+                    if (
+                      msg.role === "assistant" &&
+                      msg.content?.includes("<tool_call>")
+                    ) {
+                      return { ...msg, content: cleanResponseText(msg.content) };
+                    }
+                    return msg;
+                  });
+              } else {
+                const rawHistory = db
+                  .prepare(
+                    `SELECT role, content FROM messages WHERE chat_id = ? ORDER BY created_at ASC`,
+                  )
+                  .all(actualChatId);
+                history = rawHistory.map((msg) => {
+                  if (
+                    msg.role === "assistant" &&
+                    msg.content &&
+                    msg.content.includes("<tool_call>")
+                  ) {
+                    console.warn(
+                      "[WebSocket] ⚠️ Found tool_call XML in history, cleaning...",
+                    );
+                    return {
+                      ...msg,
+                      content: cleanResponseText(msg.content),
+                    };
+                  }
+                  return msg;
+                });
+              }
+              // CRITICAL: Register this chat's generation BEFORE starting
+              // This allows multiple chats to generate concurrently
+              activeGenerations.set(actualChatId, {
+                abortController,
+                isGenerating: true,
+              });
+              serverLog(
+                "[WebSocket] Starting generation for chat:",
+                actualChatId,
+                "Active generations:",
+                activeGenerations.size,
+              );
+              // Run agent and stream response
+              let fullResponse = "";
+              let thinkingContent = ""; // Track thinking/reasoning tokens for reasoning models
+              // Track generation stats for tps calculation
+              const generationStartTime = Date.now();
+              let tokenCount = 0; // Fallback: chunk counting (inaccurate)
+              let realUsageStats = null; // Real usage from API (accurate)
+              // Track tool calls for persistence (so they show on reload)
+              const toolCalls = [];
+              // Calculate message index for this assistant response (for file snapshots)
+              // The assistant message will be at this index once generated
+              const messageCount =
+                db
+                  .prepare(
+                    "SELECT COUNT(*) as count FROM messages WHERE chat_id = ?",
+                  )
+                  .get(actualChatId)?.count || 0;
+              const assistantMessageIndex = messageCount; // 0-indexed: if there's 1 message (user), assistant is index 1
+              // Create snapshot function for undo on regeneration
+              // This captures file state BEFORE write_file/edit_file modifies it
+              const snapshotFn = (snapshot, toolCallId = null) => {
+                serverLog(
+                  "[WebSocket] Capturing file snapshot for undo:",
+                  snapshot.path,
+                  "existed:",
+                  snapshot.existed,
+                );
+                saveFileSnapshot(
+                  actualChatId,
+                  assistantMessageIndex,
+                  toolCallId,
+                  snapshot.path,
+                  snapshot.content,
+                  snapshot.existed,
+                  snapshot.createdDir ?? null,
+                );
+              };
+              const shellUndoFn = (entry, toolCallId = null) => {
+                serverLog(
+                  "[WebSocket] Capturing shell undo:",
+                  entry.op,
+                  entry.path || entry.path_dest,
+                );
+                saveShellUndo(
+                  actualChatId,
+                  assistantMessageIndex,
+                  toolCallId,
+                  entry,
+                );
+              };
+              // Agent options - LLM handles all web searches via tools
+              const agentOptions = { snapshotFn, shellUndoFn };
+              // Pass custom system message and model through config if provided
+              const agentConfig = { ...config.store };
+              if (systemMessage) {
+                agentConfig.customSystemMessage = systemMessage;
+              }
+              // Use model from frontend if provided, otherwise fall back to config default
+              if (model) {
+                agentConfig.model = model;
+              }
+              // Pass user's access token so tools (e.g. search_memory) can search Supabase
+              if (conn.lastAccessToken) {
+                agentConfig.accessToken = conn.lastAccessToken;
+              }
+              // Merge frontend API keys: CLI keys take priority, frontend keys fill gaps
+              if (frontendApiKeys && typeof frontendApiKeys === "object") {
+                const mergedKeys = { ...(agentConfig.apiKeys || {}) };
+                for (const [provider, key] of Object.entries(frontendApiKeys)) {
+                  if (key && typeof key === "string" && !mergedKeys[provider]) {
+                    mergedKeys[provider] = key.trim();
+                    serverLog(
+                      "[WebSocket] Using frontend API key for provider:",
+                      provider,
+                    );
+                  }
+                }
+                agentConfig.apiKeys = mergedKeys;
+              }
+              // ============================================
+              // RAG Document Detection: Find @document mentions
+              // The agent will use the rag_search tool to search
+              // ============================================
+              let matchedRagDocuments = [];
+              // Detect @mentions in the content (pattern: @DocumentName or @"Document Name")
+              const ragMentionPattern = /@(\w+[\w\s-]*|\".+?\")/g;
+              const potentialMentions = content.match(ragMentionPattern);
+              if (potentialMentions && potentialMentions.length > 0) {
+                serverLog(
+                  "[WebSocket] Potential RAG mentions found:",
+                  potentialMentions,
+                );
+                // Get all RAG documents to match against mentions
+                const allRagDocs = getAllRagDocuments();
+                if (allRagDocs.length > 0) {
+                  // Find matching documents (case-insensitive)
+                  for (const mention of potentialMentions) {
+                    // Remove @ and quotes from mention
+                    const mentionName = mention
+                      .substring(1)
+                      .replace(/^"|"$/g, "")
+                      .toLowerCase();
+                    const matchingDoc = allRagDocs.find(
+                      (doc) =>
+                        doc.name.toLowerCase() === mentionName ||
+                        doc.name.toLowerCase().includes(mentionName) ||
+                        mentionName.includes(doc.name.toLowerCase()),
+                    );
+                    if (
+                      matchingDoc &&
+                      !matchedRagDocuments.some((d) => d.id === matchingDoc.id)
+                    ) {
+                      matchedRagDocuments.push(matchingDoc);
+                    }
+                  }
+                  if (matchedRagDocuments.length > 0) {
+                    serverLog(
+                      "[WebSocket] Matched RAG documents:",
+                      matchedRagDocuments.map((d) => d.name),
+                    );
+                    broadcastToAllLocalClients({
+                      type: "rag_documents_detected",
+                      chatId: actualChatId,
+                      documents: matchedRagDocuments.map((d) => ({
+                        id: d.id,
+                        name: d.name,
+                      })),
+                    });
+                  }
+                }
+              }
+              // Pass matched RAG documents to agent options
+              // The agent will use rag_search tool to search them
+              agentOptions.ragDocuments = matchedRagDocuments;
+              try {
+                // Check if this is an image generation model - they need special handling
+                // Image models should bypass the agent and go directly to inference
+                const effectiveModel = agentConfig.model || config.store.model;
+                const isImageGenModel = isImageModel(effectiveModel);
+                serverLog(
+                  "[WebSocket] Model:",
+                  effectiveModel,
+                  "isImageModel:",
+                  isImageGenModel,
+                );
+                if (matchedRagDocuments.length > 0) {
+                  serverLog(
+                    "[WebSocket] RAG documents passed to agent:",
+                    matchedRagDocuments.map((d) => d.name),
+                  );
+                }
+                // Choose the appropriate generator based on model type
+                // Include images in the user message for vision-capable models (GPT-5, etc.) and image gen
+                const userMessageWithImages = images?.length
+                  ? { role: "user", content, images }
+                  : { role: "user", content };
+                const generator = isImageGenModel
+                  ? streamInference(
+                      effectiveModel,
+                      [userMessageWithImages],
+                      "",
+                      agentConfig,
+                    )
+                  : runAgent(content, history.slice(0, -1), agentConfig, {
+                      ...agentOptions,
+                      images: images || [],
+                    });
+                for await (const chunk of generator) {
+                  // Check if THIS chat's generation was aborted (not other chats)
+                  const thisGen = activeGenerations.get(actualChatId);
+                  if (
+                    !thisGen ||
+                    !thisGen.isGenerating ||
+                    thisGen.abortController?.signal.aborted
+                  ) {
+                    serverLog(
+                      "[WebSocket] Generation aborted for chat:",
+                      actualChatId,
+                    );
+                    break;
+                  }
+                  // CRITICAL: Include chatId in ALL messages so client can route correctly
+                  // This prevents tokens from one chat leaking into another
+                  let msgWithChatId = { ...chunk, chatId: actualChatId };
+                  // For image chunks: do NOT send huge base64 over WebSocket (can exceed message limits).
+                  // Save to disk and send a small image_url so the frontend can load the image via GET.
+                  if (chunk.type === "image") {
+                    console.log(
+                      "[WebSocket] 🖼️ Received image chunk, content length:",
+                      chunk.content?.length,
+                    );
+                    const mimeType = chunk.mimeType || "image/png";
+                    let imageUrl = null;
+                    try {
+                      const imagesDir = join(process.cwd(), "generated_images");
+                      if (!existsSync(imagesDir))
+                        mkdirSync(imagesDir, { recursive: true });
+                      const filename = `gemini_${Date.now()}.png`;
+                      const filePath = join(imagesDir, filename);
+                      writeFileSync(
+                        filePath,
+                        Buffer.from(chunk.content, "base64"),
+                      );
+                      imageUrl = `/api/generated-images/${filename}`;
+                      console.log(
+                        "[WebSocket] 🖼️ Image saved, sending image_url:",
+                        imageUrl,
+                      );
+                    } catch (err) {
+                      console.error("[WebSocket] Failed to save image:", err);
+                    }
+                    if (imageUrl) {
+                      msgWithChatId = {
+                        type: "image_url",
+                        content: imageUrl,
+                        chatId: actualChatId,
+                      };
+                    } else {
+                      continue;
+                    }
+                  }
+                  // Send to ALL local clients (sender + Ink terminal) without relaying.
+                  // The originating frontend persists to Supabase; relaying would cause
+                  // Azure-connected frontends to create duplicate assistant messages.
+                  broadcastToAllLocalClients(msgWithChatId);
+                  if (chunk.type === "usage") {
+                    // Real usage stats from API - use these instead of chunk counting
+                    realUsageStats = {
+                      inputTokens: chunk.inputTokens || 0,
+                      outputTokens: chunk.outputTokens || 0,
+                      totalTokens: chunk.totalTokens || 0,
+                      thinkingTokens: chunk.thinkingTokens || 0,
+                    };
+                    serverLog(
+                      "[WebSocket] Real usage stats received:",
+                      realUsageStats,
+                    );
+                  } else if (chunk.type === "thinking") {
+                    // Track thinking/reasoning tokens from reasoning models (Gemini, Grok)
+                    thinkingContent += chunk.content;
+                    tokenCount++; // Fallback chunk counting
+                    serverLog(
+                      "[WebSocket] Thinking content now:",
+                      thinkingContent.length,
+                      "chars",
+                    );
+                  } else if (chunk.type === "text") {
+                    fullResponse += chunk.content;
+                    tokenCount++; // Fallback chunk counting
+                    // DEBUG: Log when fullResponse grows significantly
+                    if (
+                      fullResponse.length % 100 === 0 ||
+                      chunk.content.includes("<tool")
+                    ) {
+                      serverLog(
+                        "[WebSocket] fullResponse now:",
+                        fullResponse.length,
+                        "chars, last chunk:",
+                        chunk.content.substring(0, 50),
+                      );
+                    }
+                  } else if (
+                    chunk.type === "image" ||
+                    chunk.type === "image_url"
+                  ) {
+                    // Image: for 'image' we already saved and forwarded as image_url above. Build fullResponse from URL.
+                    const imageContent =
+                      chunk.type === "image_url"
+                        ? chunk.content
+                        : msgWithChatId.content;
+                    if (imageContent) {
+                      const imageMarkdown = `\n\n![Generated Image](${imageContent})\n\n`;
+                      fullResponse += imageMarkdown;
+                      tokenCount += chunk.type === "image_url" ? 10 : 100;
+                      serverLog(
+                        "[WebSocket] Image (URL) added to response:",
+                        imageContent.substring(0, 60),
+                      );
+                    }
+                  } else if (chunk.type === "tool_start") {
+                    // Track tool call start for persistence
+                    // Calculate content position, adjusting to nearest word boundary to avoid splitting words
+                    const cleanedSoFar = cleanResponseText(fullResponse);
+                    let contentPosition = cleanedSoFar.length;
+                    // Find the nearest word boundary (previous space or newline)
+                    // This prevents tool displays from appearing in the middle of words
+                    if (contentPosition > 0 && cleanedSoFar.length > 0) {
+                      const lastChar = cleanedSoFar[cleanedSoFar.length - 1];
+                      if (lastChar && !/\s/.test(lastChar)) {
+                        // We're in the middle of a word - find the last whitespace
+                        const lastSpaceIdx = Math.max(
+                          cleanedSoFar.lastIndexOf(" "),
+                          cleanedSoFar.lastIndexOf("\n"),
+                          cleanedSoFar.lastIndexOf("\t"),
+                        );
+                        if (lastSpaceIdx > 0) {
+                          contentPosition = lastSpaceIdx + 1; // Position after the space
+                        }
+                      }
+                    }
+                    toolCalls.push({
+                      id: chunk.callId,
+                      tool: chunk.name,
+                      params: chunk.args || {},
+                      status: "running",
+                      startTime: Date.now(),
+                      contentPosition,
+                    });
+                  } else if (
+                    chunk.type === "tool_result" ||
+                    chunk.type === "tool_error"
+                  ) {
+                    // Update tool call with result
+                    const toolIndex = chunk.callId
+                      ? toolCalls.findIndex((t) => t.id === chunk.callId)
+                      : toolCalls.findIndex(
+                          (t) =>
+                            t.tool === chunk.name && t.status === "running",
+                        );
+                    if (toolIndex !== -1) {
+                      toolCalls[toolIndex] = {
+                        ...toolCalls[toolIndex],
+                        status:
+                          chunk.type === "tool_error" ? "error" : "complete",
+                        result: chunk.result,
+                        error: chunk.error,
+                        endTime: Date.now(),
+                      };
+                      // Capture search data from web_search tool for persistence
+                      if (
+                        chunk.name === "web_search" &&
+                        chunk.result?.results
+                      ) {
+                        searchData = {
+                          success: true,
+                          results: chunk.result.results,
+                          count: chunk.result.results?.length || 0,
+                        };
+                      }
+                    }
+                  }
+                }
+                // Only save if THIS chat was not aborted and we have content
+                const thisGenFinal = activeGenerations.get(actualChatId);
+                if (thisGenFinal?.isGenerating && fullResponse.length > 0) {
+                  // DEBUG: Check if large content will be stripped
+                  const willStripLarge = fullResponse.length > 200;
+                  // Clean the response (remove tool_call XML blocks)
+                  const cleanedResponse = cleanResponseText(
+                    fullResponse,
+                    willStripLarge,
+                  );
+                  // DEBUG: Log if large content is being stripped
+                  if (
+                    fullResponse.length > 200 &&
+                    cleanedResponse.length < fullResponse.length * 0.3
+                  ) {
+                    console.warn(
+                      "[WebSocket] ⚠️ Large content being stripped!",
+                    );
+                    console.warn(
+                      "[WebSocket] fullResponse length:",
+                      fullResponse.length,
+                      "cleaned:",
+                      cleanedResponse.length,
+                    );
+                    console.warn(
+                      "[WebSocket] fullResponse preview:",
+                      fullResponse.substring(0, 500),
+                    );
+                    console.warn(
+                      "[WebSocket] fullResponse end:",
+                      fullResponse.substring(
+                        Math.max(0, fullResponse.length - 500),
+                      ),
+                    );
+                  }
+                  // Calculate final generation stats
+                  // Use real API usage stats when available, fall back to character-based estimation
+                  // Token estimation: ~4 characters per token for English (more accurate than chunk counting)
+                  const elapsedSeconds =
+                    (Date.now() - generationStartTime) / 1000;
+                  const estimatedOutputTokens = Math.ceil(
+                    (fullResponse.length + thinkingContent.length) / 4,
+                  );
+                  // Include reasoning/thinking tokens in output count when API reports them separately (e.g. Gemini)
+                  const realOutput = realUsageStats?.outputTokens ?? 0;
+                  const realThinking = realUsageStats?.thinkingTokens ?? 0;
+                  const finalOutputTokens =
+                    realUsageStats != null
+                      ? realOutput + realThinking
+                      : estimatedOutputTokens;
+                  const finalInputTokens = realUsageStats?.inputTokens || 0;
+                  const tps =
+                    elapsedSeconds > 0
+                      ? Math.round((finalOutputTokens / elapsedSeconds) * 100) /
+                        100
+                      : 0;
+                  serverLog(
+                    "[WebSocket] Final stats - real:",
+                    !!realUsageStats,
+                    "outputTokens:",
+                    finalOutputTokens,
+                    "estimated:",
+                    estimatedOutputTokens,
+                    "tps:",
+                    tps,
+                  );
+                  // Build metadata with model, stats, search results, tool calls, and thinking content
+                  const metadata = {
+                    model: agentConfig.model, // Use the actual model used for this generation
+                    tps,
+                    numTokens: finalOutputTokens,
+                    inputTokens: finalInputTokens, // Include input tokens for reference
+                    _searchData: searchData, // Include search results for display later
+                    toolCalls: toolCalls.length > 0 ? toolCalls : undefined, // Include tool calls for display on reload
+                    thinkingContent:
+                      thinkingContent.length > 0 ? thinkingContent : undefined, // Include thinking/reasoning content
+                  };
+                  // CRITICAL FIX: Check if chat still exists before saving
+                  // User may have deleted the chat while generation was running
+                  const chatExists = db
+                    .prepare("SELECT id FROM chats WHERE id = ?")
+                    .get(actualChatId);
+                  if (!chatExists) {
+                    console.warn(
+                      "[WebSocket] Chat",
+                      actualChatId,
+                      "was deleted during generation - skipping save",
+                    );
+                    const deletedDoneMsg = {
+                      type: "done",
+                      chatId: actualChatId,
+                      tps,
+                      numTokens: finalOutputTokens,
+                      deleted: true, // Signal that chat was deleted
+                    };
+                    broadcastToAllLocalClients(deletedDoneMsg);
+                  } else {
+                    // CRITICAL FIX: Prevent duplicate assistant messages
+                    // Find the last user message and delete any assistant messages after it
+                    // This handles cases where generation ran twice (reconnection, race conditions)
+                    //
+                    // IMPORTANT: Use message `id` (auto-increment) instead of `created_at` for ordering!
+                    // SQLite's CURRENT_TIMESTAMP has second-level precision, so if two messages are
+                    // inserted within the same second, `created_at > ?` may not catch duplicates.
+                    // Using `id > ?` guarantees we catch ALL assistant messages inserted after the user message.
+                    const lastUserMsg = db
+                      .prepare(
+                        `
+                    SELECT id FROM messages
+                    WHERE chat_id = ? AND role = 'user'
+                    ORDER BY id DESC LIMIT 1
+                  `,
+                      )
+                      .get(actualChatId);
+                    if (lastUserMsg) {
+                      const deleteResult = db
+                        .prepare(
+                          `
+                      DELETE FROM messages
+                      WHERE chat_id = ? AND role = 'assistant' AND id > ?
+                    `,
+                        )
+                        .run(actualChatId, lastUserMsg.id);
+                      if (deleteResult.changes > 0) {
+                        serverLog(
+                          "[WebSocket] Deleted",
+                          deleteResult.changes,
+                          "duplicate assistant message(s) before saving new response",
+                        );
+                      }
+                    }
+                    // Build rich context content with inline tool calls and results
+                    // Uses native XML format: <tool_call>...</tool_call><tool_result>...</tool_result>
+                    // This allows the AI to "remember" what tools it used and what they returned
+                    const richContent = buildRichContextContent(
+                      cleanedResponse,
+                      toolCalls,
+                    );
+                    // Save assistant message with rich content and metadata
+                    db.prepare(
+                      `
+                    INSERT INTO messages (chat_id, role, content, metadata) VALUES (?, ?, ?, ?)
+                  `,
+                    ).run(
+                      actualChatId,
+                      "assistant",
+                      richContent,
+                      JSON.stringify(metadata),
+                    );
+                    // NOTE: Frontend is the single Supabase writer — CLI only persists to local SQLite.
+                    // Title is set by frontend via backend /api/generate-chat-title at end of message.
+                    // Update chat timestamp
+                    db.prepare(
+                      "UPDATE chats SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
+                    ).run(actualChatId);
+                    serverLog(
+                      "[WebSocket] Sending done for chat:",
+                      actualChatId,
+                      "tps:",
+                      tps,
+                      "tokens:",
+                      finalOutputTokens,
+                    );
+                    const doneChatId = actualChatId;
+                    // Include complete message so frontend doesn't need to reconstruct
+                    const doneMessage = {
+                      type: "done",
+                      chatId: doneChatId,
+                      tps,
+                      numTokens: finalOutputTokens,
+                      message: {
+                        role: "assistant",
+                        content: cleanedResponse, // Clean version for display
+                        fullContent: richContent, // Rich version with tool history for AI context
+                        toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+                        _searchData: searchData,
+                        thinkingContent:
+                          thinkingContent.length > 0
+                            ? thinkingContent
+                            : undefined,
+                        tps,
+                        numTokens: finalOutputTokens,
+                        inputTokens: finalInputTokens, // Include input tokens
+                        model: agentConfig.model, // Use the actual model used for this generation
+                      },
+                    };
+                    broadcastToAllLocalClients(doneMessage);
+                  }
+                } else if (thisGenFinal?.isGenerating) {
+                  // Generation completed but with no text content (e.g., only tool calls, or max iterations reached)
+                  // Still need to send done event so frontend knows generation is finished
+                  const elapsedSeconds =
+                    (Date.now() - generationStartTime) / 1000;
+                  const finalOutputTokens =
+                    (realUsageStats?.outputTokens ?? 0) +
+                    (realUsageStats?.thinkingTokens ?? 0);
+                  const tps =
+                    elapsedSeconds > 0
+                      ? Math.round((finalOutputTokens / elapsedSeconds) * 100) /
+                        100
+                      : 0;
+                  serverLog(
+                    "[WebSocket] Sending done for chat (no content):",
+                    actualChatId,
+                  );
+                  const emptyDoneChatId = actualChatId;
+                  const emptyDoneMessage = {
+                    type: "done",
+                    chatId: emptyDoneChatId,
+                    tps,
+                    numTokens: finalOutputTokens,
+                    message: {
+                      role: "assistant",
+                      content: "",
+                      toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+                      _searchData: searchData,
+                      thinkingContent:
+                        thinkingContent.length > 0
+                          ? thinkingContent
+                          : undefined,
+                      tps,
+                      numTokens: finalOutputTokens,
+                      model: agentConfig.model,
+                    },
+                  };
+                  broadcastToAllLocalClients(emptyDoneMessage);
+                }
+                // Clean up THIS chat's generation state (doesn't affect other chats)
+                activeGenerations.delete(actualChatId);
+                serverLog(
+                  "[WebSocket] Generation complete for chat:",
+                  actualChatId,
+                  "Remaining active:",
+                  activeGenerations.size,
+                );
+              } catch (err) {
+                if (err.name === "AbortError") {
+                  serverLog(
+                    "[WebSocket] Generation aborted by user for chat:",
+                    actualChatId,
+                  );
+                } else {
+                  console.error("Agent error for chat:", actualChatId, err);
+                  const errorMsg = {
+                    type: "error",
+                    message: err.message,
+                    chatId: actualChatId,
+                  };
+                  broadcastToAllLocalClients(errorMsg);
+                }
+                // Clean up THIS chat's generation state
+                activeGenerations.delete(actualChatId);
+              }
+            } else if (message.type === "regenerate") {
+              // Handle regeneration request (retry/edit)
+              const {
+                chatId,
+                message: userMessage,
+                systemMessage: regenSystemMessage,
+                model: regenModel,
+                frontendApiKeys: regenFrontendApiKeys,
+                revert: payloadRevert,
+              } = message.payload || {};
+              // Default true for backward compat; frontend sends revert: false when user chose "Continue without reverting"
+              const shouldRevert = payloadRevert !== false;
+              serverLog(
+                "[WebSocket] Regeneration request for chat:",
+                chatId,
+                "revert:",
+                shouldRevert,
+              );
+              if (!chatId) {
+                conn.send({ type: "error", message: "No chatId provided" });
+                return;
+              }
+              // Concurrency: reject duplicate regenerate for the same chat (Cursor-like reliability)
+              if (activeGenerations.has(chatId)) {
+                conn.send({
+                  type: "error",
+                  message:
+                    "A generation is already in progress for this chat. Wait for it to finish or stop it first.",
+                });
+                return;
+              }
+              // Create abort controller for this regeneration - use per-chat tracking
+              const regenAbortController = new AbortController();
+              activeGenerations.set(chatId, {
+                abortController: regenAbortController,
+                isGenerating: true,
+              });
+              const db = getDb();
+              // ============================================
+              // STEP 1: Calculate assistant message index BEFORE any changes
+              // This is the index of the assistant message being regenerated
+              // ============================================
+              const allMessagesForIndex = db
+                .prepare(
+                  `
+              SELECT id, role FROM messages WHERE chat_id = ? ORDER BY id ASC
+            `,
+                )
+                .all(chatId);
+              // Find the index of the last assistant message (the one being regenerated)
+              // If no assistant message exists, it will be created at the current count
+              let regenMessageIndex = allMessagesForIndex.length;
+              for (let i = allMessagesForIndex.length - 1; i >= 0; i--) {
+                if (allMessagesForIndex[i].role === "assistant") {
+                  regenMessageIndex = i;
+                  break;
+                }
+              }
+              serverLog(
+                "[WebSocket] Regeneration at message index:",
+                regenMessageIndex,
+              );
+              // ============================================
+              // STEP 2: Revert files BEFORE starting new generation (only when user chose to revert)
+              // Undo any file changes from the message being regenerated
+              // ============================================
+              const snapshotsToRevert = shouldRevert
+                ? getSnapshotsToRevert(chatId, regenMessageIndex)
+                : [];
+              const revertedFiles = [];
+              const revertErrors = []; // Collect errors so we still revert as much as possible and inform user
+              if (snapshotsToRevert.length > 0) {
+                serverLog(
+                  "[WebSocket] Reverting",
+                  snapshotsToRevert.length,
+                  "file snapshot(s) before regeneration",
+                );
+                const safeRoot = resolve(process.cwd());
+                for (const snapshot of snapshotsToRevert) {
+                  try {
+                    if (snapshot.file_existed) {
+                      // File existed before - restore original content
+                      writeFileSync(
+                        snapshot.file_path,
+                        snapshot.original_content,
+                        "utf-8",
+                      );
+                      serverLog(
+                        "[WebSocket] Restored file:",
+                        snapshot.file_path,
+                      );
+                    } else {
+                      // File was created by tool - delete it
+                      if (existsSync(snapshot.file_path)) {
+                        unlinkSync(snapshot.file_path);
+                        serverLog(
+                          "[WebSocket] Deleted created file:",
+                          snapshot.file_path,
+                        );
+                      }
+                      // If write_file created the parent dir, remove it when empty (Strategy 2)
+                      const createdDir = snapshot.created_dir || null;
+                      if (createdDir && existsSync(createdDir)) {
+                        try {
+                          if (readdirSync(createdDir).length === 0) {
+                            rmdirSync(createdDir);
+                            serverLog(
+                              "[WebSocket] Removed empty created dir:",
+                              createdDir,
+                            );
+                            // Remove empty parents up to safe root
+                            let parent = dirname(createdDir);
+                            while (
+                              parent &&
+                              parent !== safeRoot &&
+                              parent !== dirname(parent)
+                            ) {
+                              if (
+                                existsSync(parent) &&
+                                readdirSync(parent).length === 0
+                              ) {
+                                rmdirSync(parent);
+                                serverLog(
+                                  "[WebSocket] Removed empty parent dir:",
+                                  parent,
+                                );
+                                parent = dirname(parent);
+                              } else {
+                                break;
+                              }
+                            }
+                          }
+                        } catch (dirErr) {
+                          console.warn(
+                            "[WebSocket] Failed to remove created dir:",
+                            createdDir,
+                            dirErr.message,
+                          );
+                          revertErrors.push({
+                            path: createdDir,
+                            message: dirErr.message,
+                            op: "rmdir",
+                          });
+                        }
+                      }
+                    }
+                    revertedFiles.push(snapshot.file_path);
+                  } catch (revertErr) {
+                    console.warn(
+                      "[WebSocket] Failed to revert file:",
+                      snapshot.file_path,
+                      revertErr.message,
+                    );
+                    revertErrors.push({
+                      path: snapshot.file_path,
+                      message: revertErr.message,
+                      op: "file",
+                    });
+                  }
+                }
+                // Delete the reverted snapshots from database (even if some reverts failed)
+                deleteSnapshots(chatId, regenMessageIndex);
+                // Notify client of reverted files
+                if (revertedFiles.length > 0) {
+                  const revertMsg = {
+                    type: "files_reverted",
+                    chatId,
+                    files: revertedFiles,
+                    count: revertedFiles.length,
+                  };
+                  conn.send(revertMsg);
+                  conn.broadcast(revertMsg);
+                }
+              }
+              // Revert shell commands (Strategy 1: undo mkdir, touch, cp, mv) — only when user chose to revert
+              const shellUndosToRevert = shouldRevert
+                ? getShellUndosToRevert(chatId, regenMessageIndex)
+                : [];
+              const revertedShellCount = [];
+              if (shellUndosToRevert.length > 0) {
+                serverLog(
+                  "[WebSocket] Reverting",
+                  shellUndosToRevert.length,
+                  "shell undo(s) before regeneration",
+                );
+                for (const entry of shellUndosToRevert) {
+                  try {
+                    if (entry.op === "mkdir" && entry.path) {
+                      if (existsSync(entry.path)) {
+                        try {
+                          if (readdirSync(entry.path).length === 0) {
+                            rmdirSync(entry.path);
+                            serverLog(
+                              "[WebSocket] Reverted mkdir:",
+                              entry.path,
+                            );
+                            revertedShellCount.push("mkdir");
+                          }
+                        } catch (e) {
+                          console.warn(
+                            "[WebSocket] Failed to rmdir:",
+                            entry.path,
+                            e.message,
+                          );
+                          revertErrors.push({
+                            op: entry.op,
+                            path: entry.path,
+                            message: e.message,
+                          });
+                        }
+                      }
+                    } else if (entry.op === "touch" && entry.path) {
+                      if (existsSync(entry.path)) {
+                        try {
+                          const st = statSync(entry.path);
+                          if (st.isFile()) {
+                            unlinkSync(entry.path);
+                            serverLog(
+                              "[WebSocket] Reverted touch:",
+                              entry.path,
+                            );
+                            revertedShellCount.push("touch");
+                          }
+                        } catch (e) {
+                          console.warn(
+                            "[WebSocket] Failed to unlink touch:",
+                            entry.path,
+                            e.message,
+                          );
+                          revertErrors.push({
+                            op: entry.op,
+                            path: entry.path,
+                            message: e.message,
+                          });
+                        }
+                      }
+                    } else if (entry.op === "cp" && entry.path_dest) {
+                      if (existsSync(entry.path_dest)) {
+                        try {
+                          const st = statSync(entry.path_dest);
+                          if (st.isDirectory()) {
+                            rmSync(entry.path_dest, { recursive: true });
+                          } else {
+                            unlinkSync(entry.path_dest);
+                          }
+                          serverLog(
+                            "[WebSocket] Reverted cp (removed dest):",
+                            entry.path_dest,
+                          );
+                          revertedShellCount.push("cp");
+                        } catch (e) {
+                          console.warn(
+                            "[WebSocket] Failed to unlink cp dest:",
+                            entry.path_dest,
+                            e.message,
+                          );
+                          revertErrors.push({
+                            op: entry.op,
+                            path: entry.path_dest,
+                            message: e.message,
+                          });
+                        }
+                      }
+                    } else if (
+                      entry.op === "mv" &&
+                      entry.path_src &&
+                      entry.path_dest
+                    ) {
+                      if (existsSync(entry.path_dest)) {
+                        try {
+                          renameSync(entry.path_dest, entry.path_src);
+                          serverLog(
+                            "[WebSocket] Reverted mv:",
+                            entry.path_dest,
+                            "->",
+                            entry.path_src,
+                          );
+                          revertedShellCount.push("mv");
+                        } catch (e) {
+                          console.warn(
+                            "[WebSocket] Failed to revert mv:",
+                            e.message,
+                          );
+                          revertErrors.push({
+                            op: entry.op,
+                            path: entry.path_dest,
+                            message: e.message,
+                          });
+                        }
+                      }
+                    }
+                  } catch (err) {
+                    console.warn(
+                      "[WebSocket] Failed to revert shell op:",
+                      entry.op,
+                      err.message,
+                    );
+                    revertErrors.push({
+                      op: entry.op,
+                      path: entry.path || entry.path_dest,
+                      message: err.message,
+                    });
+                  }
+                }
+                deleteShellUndos(chatId, regenMessageIndex);
+                if (revertedShellCount.length > 0) {
+                  const shellRevertMsg = {
+                    type: "shell_reverted",
+                    chatId,
+                    count: revertedShellCount.length,
+                  };
+                  conn.send(shellRevertMsg);
+                  conn.broadcast(shellRevertMsg);
+                }
+              }
+              // Notify client of any revert errors (so user can see e.g. "1 file failed: permission denied")
+              if (revertErrors.length > 0) {
+                const errMsg = {
+                  type: "revert_errors",
+                  chatId,
+                  errors: revertErrors,
+                  count: revertErrors.length,
+                };
+                conn.send(errMsg);
+                conn.broadcast(errMsg);
+              }
+              // ============================================
+              // STEP 3: Handle message cleanup and updates
+              // ============================================
+              const lastUserMsg = db
+                .prepare(
+                  `
+              SELECT id, content, metadata FROM messages
+              WHERE chat_id = ? AND role = 'user'
+              ORDER BY id DESC
+              LIMIT 1
+            `,
+                )
+                .get(chatId);
+              if (lastUserMsg) {
+                // Check if the user message content was edited (compare with what frontend sent)
+                // If userMessage differs from stored content, update the database
+                if (userMessage && userMessage !== lastUserMsg.content) {
+                  db.prepare(
+                    `
+                  UPDATE messages SET content = ? WHERE id = ?
+                `,
+                  ).run(userMessage, lastUserMsg.id);
+                  serverLog(
+                    "[WebSocket] Updated user message content for edit (id:",
+                    lastUserMsg.id,
+                    ")",
+                  );
+                }
+                // Delete all assistant messages that came after the last user message
+                // Using id > ? instead of created_at > ? for reliable ordering
+                const deleteResult = db
+                  .prepare(
+                    `
+                DELETE FROM messages
+                WHERE chat_id = ? AND role = 'assistant' AND id > ?
+              `,
+                  )
+                  .run(chatId, lastUserMsg.id);
+                if (deleteResult.changes > 0) {
+                  serverLog(
+                    "[WebSocket] Deleted",
+                    deleteResult.changes,
+                    "old assistant message(s) before regeneration",
+                  );
+                }
+              }
+              // Get chat history (EXCLUDING the last user message since runAgent appends it separately)
+              // This prevents the user message from appearing twice in the conversation
+              const allMessages = db
+                .prepare(
+                  `
+              SELECT role, content FROM messages WHERE chat_id = ? ORDER BY created_at ASC
+            `,
+                )
+                .all(chatId);
+              // Remove the last user message from history (runAgent will add it back via userMessage param)
+              const history =
+                allMessages.length > 0 &&
+                allMessages[allMessages.length - 1].role === "user"
+                  ? allMessages.slice(0, -1)
+                  : allMessages;
+              // ============================================
+              // STEP 4: Set up snapshot function for new generation
+              // ============================================
+              const newMessageCount =
+                db
+                  .prepare(
+                    "SELECT COUNT(*) as count FROM messages WHERE chat_id = ?",
+                  )
+                  .get(chatId)?.count || 0;
+              const newAssistantMessageIndex = newMessageCount; // Where the new assistant message will be
+              const regenSnapshotFn = (snapshot) => {
+                serverLog(
+                  "[WebSocket] Regen: Capturing file snapshot for undo:",
+                  snapshot.path,
+                  "existed:",
+                  snapshot.existed,
+                );
+                saveFileSnapshot(
+                  chatId,
+                  newAssistantMessageIndex,
+                  null,
+                  snapshot.path,
+                  snapshot.content,
+                  snapshot.existed,
+                  snapshot.createdDir ?? null,
+                );
+              };
+              // Run agent and stream response
+              let fullResponse = "";
+              let regenThinkingContent = ""; // Track thinking/reasoning tokens
+              // Track generation stats
+              const regenStartTime = Date.now();
+              let regenTokenCount = 0; // Fallback: chunk counting
+              let regenUsageStats = null; // Real usage from API
+              // Track tool calls for persistence
+              const regenToolCalls = [];
+              // Track search data from web_search tool
+              let regenSearchData = null;
+              // Pass custom system message and model through config if provided
+              const regenConfig = { ...config.store };
+              if (regenSystemMessage) {
+                regenConfig.customSystemMessage = regenSystemMessage;
+              }
+              // Use model from frontend if provided, otherwise fall back to config default
+              if (regenModel) {
+                regenConfig.model = regenModel;
+              }
+              // Pass user's access token so tools (e.g. search_memory) can search Supabase
+              if (conn.lastAccessToken) {
+                regenConfig.accessToken = conn.lastAccessToken;
+              }
+              // Merge frontend API keys: CLI keys take priority, frontend keys fill gaps
+              if (
+                regenFrontendApiKeys &&
+                typeof regenFrontendApiKeys === "object"
+              ) {
+                const mergedKeys = { ...(regenConfig.apiKeys || {}) };
+                for (const [provider, key] of Object.entries(
+                  regenFrontendApiKeys,
+                )) {
+                  if (key && typeof key === "string" && !mergedKeys[provider]) {
+                    mergedKeys[provider] = key.trim();
+                    serverLog(
+                      "[WebSocket] Regen: Using frontend API key for provider:",
+                      provider,
+                    );
+                  }
+                }
+                regenConfig.apiKeys = mergedKeys;
+              }
+              try {
+                // Check if this is an image generation model for regeneration
+                const effectiveRegenModel =
+                  regenConfig.model || config.store.model;
+                const isImageRegenModel = isImageModel(effectiveRegenModel);
+                serverLog(
+                  "[WebSocket] Regen Model:",
+                  effectiveRegenModel,
+                  "isImageModel:",
+                  isImageRegenModel,
+                );
+                // Include images from the last user message for vision (e.g. regen after attaching image)
+                const regenImages = (() => {
+                  try {
+                    const meta = lastUserMsg?.metadata
+                      ? JSON.parse(lastUserMsg.metadata)
+                      : {};
+                    return meta.images || [];
+                  } catch {
+                    return [];
+                  }
+                })();
+                const regenUserMessageWithImages = regenImages?.length
+                  ? { role: "user", content: userMessage, images: regenImages }
+                  : { role: "user", content: userMessage };
+                const regenGenerator = isImageRegenModel
+                  ? streamInference(
+                      effectiveRegenModel,
+                      [regenUserMessageWithImages],
+                      "",
+                      regenConfig,
+                    )
+                  : runAgent(userMessage, history, regenConfig, {
+                      snapshotFn: regenSnapshotFn,
+                      images: regenImages,
+                    });
+                for await (const chunk of regenGenerator) {
+                  // Check if THIS chat's regeneration was aborted
+                  const thisGen = activeGenerations.get(chatId);
+                  if (
+                    !thisGen ||
+                    !thisGen.isGenerating ||
+                    thisGen.abortController?.signal.aborted
+                  ) {
+                    serverLog(
+                      "[WebSocket] Regeneration aborted for chat:",
+                      chatId,
+                    );
+                    break;
+                  }
+                  let regenMsg = { ...chunk, chatId };
+                  if (chunk.type === "image") {
+                    let imageUrl = null;
+                    try {
+                      const imagesDir = join(process.cwd(), "generated_images");
+                      if (!existsSync(imagesDir))
+                        mkdirSync(imagesDir, { recursive: true });
+                      const filename = `gemini_regen_${Date.now()}.png`;
+                      const filePath = join(imagesDir, filename);
+                      writeFileSync(
+                        filePath,
+                        Buffer.from(chunk.content, "base64"),
+                      );
+                      imageUrl = `/api/generated-images/${filename}`;
+                    } catch (err) {
+                      console.error(
+                        "[WebSocket] Failed to save image during regen:",
+                        err,
+                      );
+                    }
+                    regenMsg = imageUrl
+                      ? { type: "image_url", content: imageUrl, chatId }
+                      : regenMsg;
+                  }
+                  conn.send(regenMsg);
+                  if (chunk.type === "usage") {
+                    // Real usage stats from API
+                    regenUsageStats = {
+                      inputTokens: chunk.inputTokens || 0,
+                      outputTokens: chunk.outputTokens || 0,
+                      totalTokens: chunk.totalTokens || 0,
+                      thinkingTokens: chunk.thinkingTokens || 0,
+                    };
+                    serverLog(
+                      "[WebSocket] Regen real usage stats:",
+                      regenUsageStats,
+                    );
+                  } else if (chunk.type === "thinking") {
+                    // Track thinking/reasoning tokens
+                    regenThinkingContent += chunk.content;
+                    regenTokenCount++;
+                  } else if (chunk.type === "text") {
+                    fullResponse += chunk.content;
+                    regenTokenCount++;
+                  } else if (
+                    chunk.type === "image" ||
+                    chunk.type === "image_url"
+                  ) {
+                    const imageContent =
+                      chunk.type === "image_url"
+                        ? chunk.content
+                        : regenMsg.content;
+                    if (imageContent) {
+                      fullResponse += `\n\n![Generated Image](${imageContent})\n\n`;
+                      regenTokenCount += 100;
+                    }
+                  } else if (chunk.type === "tool_start") {
+                    // Track tool call start for persistence
+                    // Calculate content position, adjusting to nearest word boundary to avoid splitting words
+                    const regenCleanedSoFar = cleanResponseText(fullResponse);
+                    let regenContentPosition = regenCleanedSoFar.length;
+                    // Find the nearest word boundary (previous space or newline)
+                    if (
+                      regenContentPosition > 0 &&
+                      regenCleanedSoFar.length > 0
+                    ) {
+                      const lastChar =
+                        regenCleanedSoFar[regenCleanedSoFar.length - 1];
+                      if (lastChar && !/\s/.test(lastChar)) {
+                        const lastSpaceIdx = Math.max(
+                          regenCleanedSoFar.lastIndexOf(" "),
+                          regenCleanedSoFar.lastIndexOf("\n"),
+                          regenCleanedSoFar.lastIndexOf("\t"),
+                        );
+                        if (lastSpaceIdx > 0) {
+                          regenContentPosition = lastSpaceIdx + 1;
+                        }
+                      }
+                    }
+                    regenToolCalls.push({
+                      id: chunk.callId,
+                      tool: chunk.name,
+                      params: chunk.args || {},
+                      status: "running",
+                      startTime: Date.now(),
+                      contentPosition: regenContentPosition,
+                    });
+                  } else if (
+                    chunk.type === "tool_result" ||
+                    chunk.type === "tool_error"
+                  ) {
+                    // Update tool call with result
+                    const toolIndex = chunk.callId
+                      ? regenToolCalls.findIndex((t) => t.id === chunk.callId)
+                      : regenToolCalls.findIndex(
+                          (t) =>
+                            t.tool === chunk.name && t.status === "running",
+                        );
+                    if (toolIndex !== -1) {
+                      regenToolCalls[toolIndex] = {
+                        ...regenToolCalls[toolIndex],
+                        status:
+                          chunk.type === "tool_error" ? "error" : "complete",
+                        result: chunk.result,
+                        error: chunk.error,
+                        endTime: Date.now(),
+                      };
+                      // Capture search data from web_search tool for persistence
+                      if (
+                        chunk.name === "web_search" &&
+                        chunk.result?.results
+                      ) {
+                        regenSearchData = {
+                          success: true,
+                          results: chunk.result.results,
+                          count: chunk.result.results?.length || 0,
+                        };
+                      }
+                    }
+                  }
+                }
+                // Only save if THIS chat was not aborted and we have content
+                const thisGenFinal = activeGenerations.get(chatId);
+                if (thisGenFinal?.isGenerating && fullResponse.length > 0) {
+                  // Clean the response
+                  const cleanedResponse = cleanResponseText(fullResponse);
+                  // Calculate generation stats - use real API stats when available
+                  // Token estimation: ~4 characters per token for English (more accurate than chunk counting)
+                  const regenElapsed = (Date.now() - regenStartTime) / 1000;
+                  const regenEstimatedTokens = Math.ceil(
+                    (fullResponse.length + regenThinkingContent.length) / 4,
+                  );
+                  const regenFinalOutputTokens =
+                    regenUsageStats != null
+                      ? (regenUsageStats.outputTokens ?? 0) +
+                        (regenUsageStats.thinkingTokens ?? 0)
+                      : regenEstimatedTokens;
+                  const regenFinalInputTokens =
+                    regenUsageStats?.inputTokens || 0;
+                  const regenTps =
+                    regenElapsed > 0
+                      ? Math.round(
+                          (regenFinalOutputTokens / regenElapsed) * 100,
+                        ) / 100
+                      : 0;
+                  serverLog(
+                    "[WebSocket] Regen final stats - real:",
+                    !!regenUsageStats,
+                    "outputTokens:",
+                    regenFinalOutputTokens,
+                    "estimated:",
+                    regenEstimatedTokens,
+                    "tps:",
+                    regenTps,
+                  );
+                  // Check if chat still exists before saving
+                  const chatExists = db
+                    .prepare("SELECT id FROM chats WHERE id = ?")
+                    .get(chatId);
+                  if (!chatExists) {
+                    console.warn(
+                      "[WebSocket] Chat",
+                      chatId,
+                      "was deleted during regeneration - skipping save",
+                    );
+                    const regenDeletedChatId = chatId;
+                    const regenDeletedMsg = {
+                      type: "done",
+                      chatId: regenDeletedChatId,
+                      tps: regenTps,
+                      numTokens: regenFinalOutputTokens,
+                      deleted: true,
+                    };
+                    conn.send(regenDeletedMsg);
+                    conn.broadcast(regenDeletedMsg);
+                  } else {
+                    // Build rich context content with inline tool calls and results
+                    const regenRichContent = buildRichContextContent(
+                      cleanedResponse,
+                      regenToolCalls,
+                    );
+                    // Save assistant message with rich content and metadata
+                    db.prepare(
+                      `
+                    INSERT INTO messages (chat_id, role, content, metadata) VALUES (?, ?, ?, ?)
+                  `,
+                    ).run(
+                      chatId,
+                      "assistant",
+                      regenRichContent,
+                      JSON.stringify({
+                        model: regenConfig.model, // Use the model from config for this regeneration
+                        tps: regenTps,
+                        numTokens: regenFinalOutputTokens,
+                        inputTokens: regenFinalInputTokens,
+                        _searchData: regenSearchData,
+                        toolCalls:
+                          regenToolCalls.length > 0
+                            ? regenToolCalls
+                            : undefined,
+                        thinkingContent:
+                          regenThinkingContent.length > 0
+                            ? regenThinkingContent
+                            : undefined,
+                      }),
+                    );
+                    const regenCloudId = chatId;
+                    // NOTE: Frontend is the single Supabase writer — CLI only persists to local SQLite.
+                    // Title is set by frontend via backend /api/generate-chat-title at end of message.
+                    // Update chat timestamp
+                    db.prepare(
+                      "UPDATE chats SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
+                    ).run(chatId);
+                    const regenDoneChatId = regenCloudId ?? chatId;
+                    // Include complete message so frontend doesn't need to reconstruct
+                    const regenDoneMsg = {
+                      type: "done",
+                      chatId: regenDoneChatId,
+                      tps: regenTps,
+                      numTokens: regenFinalOutputTokens,
+                      message: {
+                        role: "assistant",
+                        content: cleanedResponse, // Clean version for display
+                        fullContent: regenRichContent, // Rich version with tool history for AI context
+                        toolCalls:
+                          regenToolCalls.length > 0
+                            ? regenToolCalls
+                            : undefined,
+                        _searchData: regenSearchData,
+                        thinkingContent:
+                          regenThinkingContent.length > 0
+                            ? regenThinkingContent
+                            : undefined,
+                        tps: regenTps,
+                        numTokens: regenFinalOutputTokens,
+                        inputTokens: regenFinalInputTokens,
+                        model: regenConfig.model, // Use the model from config for this regeneration
+                      },
+                    };
+                    conn.send(regenDoneMsg);
+                    conn.broadcast(regenDoneMsg);
+                  }
+                } else if (thisGenFinal?.isGenerating) {
+                  // Regeneration completed but with no text content (e.g., only tool calls, or max iterations reached)
+                  // Still need to send done event so frontend knows generation is finished
+                  const regenElapsed = (Date.now() - regenStartTime) / 1000;
+                  const regenFinalOutputTokens =
+                    (regenUsageStats?.outputTokens ?? 0) +
+                    (regenUsageStats?.thinkingTokens ?? 0);
+                  const regenTps =
+                    regenElapsed > 0
+                      ? Math.round(
+                          (regenFinalOutputTokens / regenElapsed) * 100,
+                        ) / 100
+                      : 0;
+                  serverLog(
+                    "[WebSocket] Sending done for regen (no content):",
+                    chatId,
+                  );
+                  const emptyRegenDoneChatId = chatId;
+                  const emptyRegenDoneMsg = {
+                    type: "done",
+                    chatId: emptyRegenDoneChatId,
+                    tps: regenTps,
+                    numTokens: regenFinalOutputTokens,
+                    message: {
+                      role: "assistant",
+                      content: "",
+                      toolCalls:
+                        regenToolCalls.length > 0 ? regenToolCalls : undefined,
+                      _searchData: regenSearchData,
+                      thinkingContent:
+                        regenThinkingContent.length > 0
+                          ? regenThinkingContent
+                          : undefined,
+                      tps: regenTps,
+                      numTokens: regenFinalOutputTokens,
+                      model: regenConfig.model,
+                    },
+                  };
+                  conn.send(emptyRegenDoneMsg);
+                  conn.broadcast(emptyRegenDoneMsg);
+                }
+                // Clean up THIS chat's generation state
+                activeGenerations.delete(chatId);
+              } catch (err) {
+                if (err.name === "AbortError") {
+                  serverLog(
+                    "[WebSocket] Regeneration aborted by user for chat:",
+                    chatId,
+                  );
+                } else {
+                  console.error("Regeneration error for chat:", chatId, err);
+                  const errorMsg = {
+                    type: "error",
+                    message: err.message,
+                    chatId,
+                  };
+                  conn.send(errorMsg);
+                  conn.broadcast(errorMsg);
+                }
+                activeGenerations.delete(chatId);
+              }
+            }
+          } catch (err) {
+            console.error("WebSocket message error:", err);
+            conn.send({ type: "error", message: err.message });
+          }
+        };
+      }
+      wsHandlerFactory = createWsMessageHandler;
+      const handleMessage = wsHandlerFactory(conn, activeGenerations);
+      socket.on("message", (raw) => handleMessage(raw));
+      socket.on("close", () => {
+        serverLog(
+          "WebSocket client disconnected, remaining clients:",
+          connectedClients.size - 1,
+        );
+        connectedClients.delete(socket);
+      });
+    });
+  });
+  // ============================================
+  // Email Webhook (MyMX)
+  // ============================================
+  // MyMX webhook endpoint with raw body capture for signature verification
+  fastify.post(
+    "/api/email/webhook",
+    {
+      // Use preParsing hook to capture raw body before JSON parsing
+      preParsing: async (request, reply, payload) => {
+        const chunks = [];
+        for await (const chunk of payload) {
+          chunks.push(chunk);
+        }
+        const rawBody = Buffer.concat(chunks).toString("utf-8");
+        request.rawBody = rawBody;
+        // Return a new readable stream with the same content for Fastify to parse
+        const { Readable } = await import("stream");
+        return Readable.from([rawBody]);
+      },
+    },
+    async (request, reply) => {
+      const { handleEmailWebhook } = await import("./email/client.js");
+      return handleEmailWebhook(request, reply, config.store);
+    },
+  );
+  // ============================================
+  // Memory/Embeddings API
+  // ============================================
+  fastify.get("/api/memory", async () => {
+    // Return memory visualization data - will be implemented later
+    return { points: [], clusters: [] };
+  });
+  // ============================================
+  // RAG (Retrieval Augmented Generation) API
+  // ============================================
+  // Import RAG database functions
+  const {
+    createRagDocument,
+    getAllRagDocuments,
+    getRagDocument,
+    updateRagDocument: updateRagDoc,
+    deleteRagDocument: deleteRagDoc,
+    addRagChunks,
+    getRagChunksByDocument,
+    getRagChunksByDocuments,
+  } = await import("./storage/db.js");
+  // List all RAG documents
+  fastify.get("/api/rag/documents", async () => {
+    try {
+      const documents = getAllRagDocuments();
+      return { success: true, documents };
+    } catch (err) {
+      console.error("[Server] RAG list error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Get single RAG document
+  fastify.get("/api/rag/documents/:id", async (request) => {
+    const { id } = request.params;
+    try {
+      const document = getRagDocument(parseInt(id, 10));
+      if (!document) {
+        return { success: false, error: "Document not found" };
+      }
+      return { success: true, document };
+    } catch (err) {
+      console.error("[Server] RAG get error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Get chunks for a RAG document (for memory visualization when docs are on server)
+  fastify.get("/api/rag/documents/:id/chunks", async (request) => {
+    const { id } = request.params;
+    try {
+      const docId = parseInt(id, 10);
+      const document = getRagDocument(docId);
+      if (!document) {
+        return { success: false, error: "Document not found" };
+      }
+      const chunks = getRagChunksByDocument(docId);
+      return { success: true, chunks };
+    } catch (err) {
+      console.error("[Server] RAG chunks error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Create RAG document with chunks (accepts pre-processed data from frontend)
+  fastify.post("/api/rag/documents", async (request) => {
+    const { name, chunks, summary, fileCount, files } = request.body;
+    if (!name || !chunks || !Array.isArray(chunks) || chunks.length === 0) {
+      return { success: false, error: "Name and chunks are required" };
+    }
+    serverLog(
+      "[Server] Creating RAG document:",
+      name,
+      "with",
+      chunks.length,
+      "chunks",
+    );
+    try {
+      // Create the document
+      const docId = createRagDocument({
+        name,
+        chunkCount: chunks.length,
+        fileCount: fileCount || 1,
+        files: files || null,
+        summary: summary || null,
+        uploadDate: new Date().toISOString(),
+      });
+      // Add chunks (embeddings should already be included)
+      addRagChunks(docId, chunks);
+      serverLog("[Server] RAG document created with ID:", docId);
+      return {
+        success: true,
+        document: {
+          id: docId,
+          name,
+          chunkCount: chunks.length,
+          fileCount: fileCount || 1,
+          uploadDate: new Date().toISOString(),
+          summary,
+        },
+      };
+    } catch (err) {
+      console.error("[Server] RAG create error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Update RAG document (rename)
+  fastify.put("/api/rag/documents/:id", async (request) => {
+    const { id } = request.params;
+    const { name } = request.body;
+    try {
+      updateRagDoc(parseInt(id, 10), { name });
+      return { success: true };
+    } catch (err) {
+      console.error("[Server] RAG update error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Delete RAG document
+  fastify.delete("/api/rag/documents/:id", async (request) => {
+    const { id } = request.params;
+    serverLog("[Server] Deleting RAG document:", id);
+    try {
+      deleteRagDoc(parseInt(id, 10));
+      return { success: true };
+    } catch (err) {
+      console.error("[Server] RAG delete error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Generate embeddings for text chunks using OpenAI API
+  fastify.post("/api/rag/embed", async (request) => {
+    const { texts } = request.body;
+    if (!texts || !Array.isArray(texts) || texts.length === 0) {
+      return { success: false, error: "Texts array is required" };
+    }
+    const apiKey = config.get("apiKeys.openai");
+    if (!apiKey) {
+      return {
+        success: false,
+        error: "OpenAI API key not configured on server",
+      };
+    }
+    serverLog("[Server] Generating embeddings for", texts.length, "texts");
+    try {
+      // Call OpenAI embeddings API
+      const response = await fetch("https://api.openai.com/v1/embeddings", {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          Authorization: `Bearer ${apiKey}`,
+        },
+        body: JSON.stringify({
+          model: "text-embedding-3-small",
+          input: texts,
+        }),
+      });
+      if (!response.ok) {
+        const errorData = await response.json().catch(() => ({}));
+        throw new Error(
+          errorData.error?.message || `OpenAI API error: ${response.status}`,
+        );
+      }
+      const data = await response.json();
+      const embeddings = data.data.map((item) => item.embedding);
+      return { success: true, embeddings };
+    } catch (err) {
+      console.error("[Server] Embedding error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // RAG search - find relevant chunks for a query
+  fastify.post("/api/rag/search", async (request) => {
+    const { query, docIds, topK = 10, minSimilarity = 0.4 } = request.body;
+    if (!query) {
+      return { success: false, error: "Query is required" };
+    }
+    if (!docIds || !Array.isArray(docIds) || docIds.length === 0) {
+      return { success: false, error: "Document IDs are required" };
+    }
+    const apiKey = config.get("apiKeys.openai");
+    if (!apiKey) {
+      return {
+        success: false,
+        error: "OpenAI API key not configured on server",
+      };
+    }
+    serverLog(
+      "[Server] RAG search:",
+      query.substring(0, 50),
+      "... in",
+      docIds.length,
+      "documents",
+    );
+    try {
+      // 1. Generate embedding for the query
+      const embedResponse = await fetch(
+        "https://api.openai.com/v1/embeddings",
+        {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            Authorization: `Bearer ${apiKey}`,
+          },
+          body: JSON.stringify({
+            model: "text-embedding-3-small",
+            input: query,
+          }),
+        },
+      );
+      if (!embedResponse.ok) {
+        const errorData = await embedResponse.json().catch(() => ({}));
+        throw new Error(
+          errorData.error?.message || "Failed to generate query embedding",
+        );
+      }
+      const embedData = await embedResponse.json();
+      const queryEmbedding = embedData.data[0].embedding;
+      // 2. Get chunks from specified documents
+      const chunks = getRagChunksByDocuments(
+        docIds.map((id) => parseInt(id, 10)),
+      );
+      if (chunks.length === 0) {
+        return { success: true, results: [], query };
+      }
+      // 3. Calculate cosine similarity for each chunk
+      const cosineSimilarity = (vec1, vec2) => {
+        if (!vec1 || !vec2 || vec1.length !== vec2.length) return 0;
+        let dotProduct = 0,
+          norm1 = 0,
+          norm2 = 0;
+        for (let i = 0; i < vec1.length; i++) {
+          dotProduct += vec1[i] * vec2[i];
+          norm1 += vec1[i] * vec1[i];
+          norm2 += vec2[i] * vec2[i];
+        }
+        norm1 = Math.sqrt(norm1);
+        norm2 = Math.sqrt(norm2);
+        if (norm1 === 0 || norm2 === 0) return 0;
+        return dotProduct / (norm1 * norm2);
+      };
+      // 4. Score and rank chunks
+      const results = chunks
+        .map((chunk) => ({
+          ...chunk,
+          similarity: cosineSimilarity(queryEmbedding, chunk.embedding),
+        }))
+        .filter((chunk) => chunk.similarity >= minSimilarity)
+        .sort((a, b) => b.similarity - a.similarity)
+        .slice(0, topK)
+        .map(({ embedding, ...rest }) => rest); // Remove embedding from response
+      serverLog("[Server] RAG search found", results.length, "relevant chunks");
+      return { success: true, results, query };
+    } catch (err) {
+      console.error("[Server] RAG search error:", err);
+      return { success: false, error: err.message };
+    }
+  });
+  // Explicit /login route - serve SPA so login page loads (GitHub, Google, email)
+  fastify.get("/login", async (request, reply) => {
+    if (hasFrontend) {
+      return reply.sendFile("index.html");
+    }
+    return reply
+      .status(404)
+      .send({ error: "Frontend not built. Run: npm run build:frontend" });
+  });
+  // Fallback to index.html for SPA routing
+  fastify.setNotFoundHandler(async (request, reply) => {
+    if (request.url.startsWith("/api/")) {
+      return reply.status(404).send({ error: "Not found" });
+    }
+    // Serve index.html for client-side routing (only if frontend is built)
+    if (hasFrontend) {
+      return reply.sendFile("index.html");
+    }
+    // No frontend built - show helpful message
+    return reply.status(200).type("text/html").send(`
+      <!DOCTYPE html>
+      <html>
+      <head>
+        <title>Otherwise AI</title>
+        <style>
+          body { font-family: system-ui, sans-serif; max-width: 600px; margin: 100px auto; padding: 20px; }
+          h1 { color: #333; }
+          code { background: #f0f0f0; padding: 2px 8px; border-radius: 4px; }
+          pre { background: #f0f0f0; padding: 16px; border-radius: 8px; overflow-x: auto; }
+        </style>
+      </head>
+      <body>
+        <h1>Otherwise AI Server Running</h1>
+        <p>The server is running, but the frontend hasn't been built yet.</p>
+        <p>To build the frontend, run:</p>
+        <pre>cd /Users/thomasstahura/Desktop/Ultimasite/cli
+npm run build:frontend</pre>
+        <p>Then restart the server.</p>
+        <hr>
+        <p><strong>API Status:</strong> <a href="/api/health">/api/health</a></p>
+        <p><strong>Config:</strong> <code>otherwise config</code></p>
+      </body>
+      </html>
+    `);
+  });
+  // Start server
+  await fastify.listen({ port, host: "0.0.0.0" });
+  // Optional: connect to backend for remote access (otherwise.ai -> backend -> this CLI).
+  // Connect immediately so Azure sees [WS] CLI connected; message handler uses wsHandlerFactory when set (after first local /ws client).
+  const remoteToken =
+    options.remotePairingToken ?? config.get("remote.pairingToken");
+  if (remoteToken) {
+    remoteLinked = true;
+    const { connectToBackend, getBackendWsUrl } =
+      await import("./remote/client.js");
+    const backendUrl = config.get("remote.backendUrl") || getBackendWsUrl();
+    const remoteActiveGenerations = new Map();
+    connectToBackend(
+      remoteToken,
+      (raw, sendReply) => {
+        if (!wsHandlerFactory) return;
+        // send = to backend only; broadcast = to local Ink only. Handler calls both per chunk,
+        // so backend and Ink each get the stream once (avoid duplicate rendering in CLI).
+        // sendLocal/broadcastLocal added so the handler doesn't crash (TypeError) for the
+        // chat handler's local-only events; these are no-ops since the remote browser
+        // (on Azure) should handle its own Supabase persistence.
+        const remoteConn = {
+          send: (msg) => sendReply(msg),
+          broadcast: (msg) => broadcastToAllLocalClients(msg),
+          sendLocal: () => {},
+          broadcastLocal: () => {},
+        };
+        const handler = wsHandlerFactory(remoteConn, remoteActiveGenerations);
+        return handler(typeof raw === "string" ? raw : raw.toString());
+      },
+      {
+        backendUrl,
+        onConnect: async (reply) => {
+          remoteRelaySend = reply;
+          serverLog(
+            "[Server] Remote relay active: CLI messages will sync to otherwise.ai",
+          );
+          try {
+            const publicConfig = getPublicConfig();
+            const { listOllamaModels } = await import("./inference/ollama.js");
+            const raw = await listOllamaModels(config);
+            const ollamaModels = raw.map((m) => {
+              const name = (m.name || "").toLowerCase();
+              const type = [];
+              if (
+                /deepseek-r1|deepseek-v3|qwq|qwen3|gpt-oss|thinking|reason/.test(
+                  name,
+                )
+              )
+                type.push("reasoning");
+              if (/llava|vision|bakllava|moondream/.test(name))
+                type.push("image-input");
+              if (/codellama|codegemma|starcoder|deepseek-coder/.test(name))
+                type.push("code");
+              return {
+                id: m.id,
+                name:
+                  m.name?.replace(/:latest$/, "").split(":")[0] ||
+                  m.id.replace(/^ollama:/, ""),
+                provider: "Ollama",
+                type: type.length ? type : [""],
+                size: m.size,
+              };
+            });
+            if (ollamaModels.length)
+              reply({ type: "ollama_models", models: ollamaModels });
+            let openRouterModels = [];
+            if (publicConfig.apiKeys?.openrouter) {
+              try {
+                const { fetchOpenRouterModels } =
+                  await import("./inference/openrouter.js");
+                const key = config.get("apiKeys.openrouter");
+                if (key) openRouterModels = await fetchOpenRouterModels(key);
+              } catch (e) {
+                serverLog(
+                  "[Server] Could not fetch OpenRouter models on connect:",
+                  e?.message,
+                );
+              }
+            }
+            reply({
+              type: "cli_config",
+              apiKeys: publicConfig.apiKeys || {},
+              ollamaModels,
+              openRouterModels,
+              browserChannel: publicConfig.browserChannel ?? null,
+              model: config.get("model") || "claude-sonnet-4-20250514",
+            });
+          } catch (e) {
+            serverLog("[Server] Could not build remote config:", e?.message);
+          }
+        },
+        onDisconnect: () => {
+          remoteRelaySend = null;
+          serverLog("[Server] Remote relay inactive");
+        },
+        onInvalidToken: () => {
+          config.set("remote.pairingToken", null);
+          serverLog(
+            "[Server] Remote token cleared (expired or invalid). Run otherwise connect to link again.",
+          );
+        },
+      },
+    );
+    serverLog("[Server] Remote backend connection started (otherwise.ai)");
+  }
+  return fastify;
+}
+export async function stopServer() {
+  stopScheduler();
+  if (fastify) {
+    await fastify.close();
+    fastify = null;
+  }
+}