otherwise-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +193 -0
  2. package/bin/otherwise.js +5 -0
  3. package/frontend/404.html +84 -0
  4. package/frontend/assets/OpenDyslexic3-Bold-CDyRs55Y.ttf +0 -0
  5. package/frontend/assets/OpenDyslexic3-Regular-CIBXa4WE.ttf +0 -0
  6. package/frontend/assets/__vite-browser-external-BIHI7g3E.js +1 -0
  7. package/frontend/assets/conversational-worker-CeKiciGk.js +2929 -0
  8. package/frontend/assets/dictation-worker-D0aYfq8b.js +29 -0
  9. package/frontend/assets/gemini-color-CgSQmmva.png +0 -0
  10. package/frontend/assets/index-BLux5ps4.js +21 -0
  11. package/frontend/assets/index-Blh8_TEM.js +5272 -0
  12. package/frontend/assets/index-BpQ1PuKu.js +18 -0
  13. package/frontend/assets/index-Df737c8w.css +1 -0
  14. package/frontend/assets/index-xaYHL6wb.js +113 -0
  15. package/frontend/assets/ort-wasm-simd-threaded.asyncify-BynIiDiv.wasm +0 -0
  16. package/frontend/assets/ort-wasm-simd-threaded.jsep-B0T3yYHD.wasm +0 -0
  17. package/frontend/assets/transformers-tULNc5V3.js +31 -0
  18. package/frontend/assets/tts-worker-DPJWqT7N.js +2899 -0
  19. package/frontend/assets/voice-mode-worker-GzvIE_uh.js +2927 -0
  20. package/frontend/assets/worker-2d5ABSLU.js +31 -0
  21. package/frontend/banner.png +0 -0
  22. package/frontend/favicon.svg +3 -0
  23. package/frontend/google55e5ec47ee14a5f8.html +1 -0
  24. package/frontend/index.html +234 -0
  25. package/frontend/manifest.json +17 -0
  26. package/frontend/pdf.worker.min.mjs +21 -0
  27. package/frontend/robots.txt +5 -0
  28. package/frontend/sitemap.xml +27 -0
  29. package/package.json +81 -0
  30. package/src/agent/index.js +1066 -0
  31. package/src/agent/location.js +51 -0
  32. package/src/agent/prompt.js +548 -0
  33. package/src/agent/tools.js +4372 -0
  34. package/src/browser/detect.js +68 -0
  35. package/src/browser/session.js +1109 -0
  36. package/src/config.js +137 -0
  37. package/src/email/client.js +503 -0
  38. package/src/index.js +557 -0
  39. package/src/inference/anthropic.js +113 -0
  40. package/src/inference/google.js +373 -0
  41. package/src/inference/index.js +81 -0
  42. package/src/inference/ollama.js +383 -0
  43. package/src/inference/openai.js +140 -0
  44. package/src/inference/openrouter.js +378 -0
  45. package/src/inference/xai.js +200 -0
  46. package/src/logBridge.js +9 -0
  47. package/src/models.js +146 -0
  48. package/src/remote/client.js +225 -0
  49. package/src/scheduler/cron.js +243 -0
  50. package/src/server.js +3876 -0
  51. package/src/storage/db.js +1135 -0
  52. package/src/storage/supabase.js +364 -0
  53. package/src/tunnel/cloudflare.js +241 -0
  54. package/src/ui/components/App.jsx +687 -0
  55. package/src/ui/components/BrowserSelect.jsx +111 -0
  56. package/src/ui/components/FilePicker.jsx +472 -0
  57. package/src/ui/components/Header.jsx +444 -0
  58. package/src/ui/components/HelpPanel.jsx +173 -0
  59. package/src/ui/components/HistoryPanel.jsx +158 -0
  60. package/src/ui/components/MessageList.jsx +235 -0
  61. package/src/ui/components/ModelSelector.jsx +304 -0
  62. package/src/ui/components/PromptInput.jsx +515 -0
  63. package/src/ui/components/StreamingResponse.jsx +134 -0
  64. package/src/ui/components/ThinkingIndicator.jsx +365 -0
  65. package/src/ui/components/ToolExecution.jsx +714 -0
  66. package/src/ui/components/index.js +82 -0
  67. package/src/ui/context/TerminalContext.jsx +150 -0
  68. package/src/ui/context/index.js +13 -0
  69. package/src/ui/hooks/index.js +16 -0
  70. package/src/ui/hooks/useChatState.js +675 -0
  71. package/src/ui/hooks/useCommands.js +280 -0
  72. package/src/ui/hooks/useFileAttachments.js +216 -0
  73. package/src/ui/hooks/useKeyboardShortcuts.js +173 -0
  74. package/src/ui/hooks/useNotifications.js +185 -0
  75. package/src/ui/hooks/useTerminalSize.js +151 -0
  76. package/src/ui/hooks/useWebSocket.js +273 -0
  77. package/src/ui/index.js +94 -0
  78. package/src/ui/ink-runner.js +22 -0
  79. package/src/ui/utils/formatters.js +424 -0
  80. package/src/ui/utils/index.js +6 -0
  81. package/src/ui/utils/markdown.js +166 -0
package/src/server.js ADDED
@@ -0,0 +1,3876 @@
1
+ import Fastify from "fastify";
2
+ import fastifyWebsocket from "@fastify/websocket";
3
+ import fastifyStatic from "@fastify/static";
4
+ import { fileURLToPath } from "url";
5
+ import { dirname, join, resolve, relative, basename, extname } from "path";
6
+ import {
7
+ existsSync,
8
+ readdirSync,
9
+ statSync,
10
+ readFileSync,
11
+ lstatSync,
12
+ writeFileSync,
13
+ mkdirSync,
14
+ unlinkSync,
15
+ rmdirSync,
16
+ renameSync,
17
+ rmSync,
18
+ } from "fs";
19
+ import { homedir } from "os";
20
+ import { config, getPublicConfig } from "./config.js";
21
+ import {
22
+ initDb,
23
+ getDb,
24
+ getChat,
25
+ createChat as dbCreateChat,
26
+ getLocalChatIdByCloudId,
27
+ setCloudChatId,
28
+ isValidChatId,
29
+ saveFileSnapshot,
30
+ getSnapshotsToRevert,
31
+ deleteSnapshots,
32
+ deleteAllChatSnapshots,
33
+ saveShellUndo,
34
+ getShellUndosToRevert,
35
+ deleteShellUndos,
36
+ deleteAllChatShellUndos,
37
+ } from "./storage/db.js";
38
+ import { runAgent } from "./agent/index.js";
39
+ import { initializeLocationCache } from "./agent/location.js";
40
+ import {
41
+ cleanResponseText,
42
+ buildRichContextContent,
43
+ cleanContentForDisplay,
44
+ } from "./agent/prompt.js";
45
+ import { initScheduler, stopScheduler } from "./scheduler/cron.js";
46
+ import {
47
+ performWebSearch,
48
+ fetchUrlContent,
49
+ deepWebResearch,
50
+ } from "./agent/tools.js";
51
+ import { streamInference, isImageModel } from "./inference/index.js";
52
+ import { logSink } from "./logBridge.js";
53
+
54
+ const __filename = fileURLToPath(import.meta.url);
55
+ const __dirname = dirname(__filename);
56
+
57
+ let fastify = null;
58
+
59
+ // Silent mode flag - when true, suppresses all server logging
60
+ // This is used when Ink UI is running to prevent interference with terminal management
61
+ let silentMode = false;
62
+
63
+ /**
64
+ * Enable silent mode - suppresses all server logs
65
+ */
66
+ export function enableSilentMode() {
67
+ silentMode = true;
68
+ }
69
+
70
+ /**
71
+ * Disable silent mode - restores server logging
72
+ */
73
+ export function disableSilentMode() {
74
+ silentMode = false;
75
+ }
76
+
77
+ /**
78
+ * Server logging helper - respects silent mode
79
+ */
80
+ function serverLog(...args) {
81
+ if (!silentMode) {
82
+ console.log(...args);
83
+ }
84
+ }
85
+
86
+ /** Shared WebSocket message handler factory for local and remote (backend) connections. Set when /ws is registered. */
87
+ let wsHandlerFactory = null;
88
+
89
+ /** When set (remote mode), local CLI messages are also forwarded to the backend so otherwise.ai frontend stays in sync. */
90
+ let remoteRelaySend = null;
91
+
92
+ /** True when this server was started with a remote pairing token (otherwise connect). Frontend uses this to connect to backend WS only in that case. */
93
+ let remoteLinked = false;
94
+
95
+ export async function startServer(port = 3000, options = {}) {
96
+ // Initialize database
97
+ await initDb();
98
+
99
+ // Prefetch user location for agent context (e.g. "what is my location") — same as frontend
100
+ await Promise.race([
101
+ initializeLocationCache(),
102
+ new Promise((r) => setTimeout(r, 2500)),
103
+ ]).catch(() => {});
104
+
105
+ // Initialize scheduler with a plain config object (same shape as agent config)
106
+ const schedulerConfig = { ...config.store };
107
+ initScheduler(schedulerConfig);
108
+
109
+ fastify = Fastify({
110
+ logger: false,
111
+ // Increase body limit for large RAG document uploads with embeddings
112
+ // Default is 1MB, but documents with many chunks + embeddings can be much larger
113
+ bodyLimit: 100 * 1024 * 1024, // 100MB
114
+ });
115
+
116
+ // Register WebSocket plugin
117
+ await fastify.register(fastifyWebsocket);
118
+
119
+ // Serve static frontend files
120
+ const frontendPath = join(__dirname, "..", "frontend");
121
+ const hasFrontend =
122
+ existsSync(frontendPath) && existsSync(join(frontendPath, "index.html"));
123
+
124
+ if (hasFrontend) {
125
+ await fastify.register(fastifyStatic, {
126
+ root: frontendPath,
127
+ prefix: "/",
128
+ });
129
+ } else {
130
+ serverLog("[Server] Frontend not built. Run: npm run build:frontend");
131
+ }
132
+
133
+ // ============================================
134
+ // API Routes
135
+ // ============================================
136
+
137
+ // Health check (remoteLinked: true when started with otherwise connect)
138
+ fastify.get("/api/health", async () => {
139
+ return { status: "ok", version: "0.1.0", remoteLinked };
140
+ });
141
+
142
+ // Serve generated images (avoids sending huge base64 over WebSocket)
143
+ fastify.get("/api/generated-images/:filename", async (request, reply) => {
144
+ const filename = request.params.filename;
145
+ if (!filename || filename.includes("..") || /[\/\\]/.test(filename)) {
146
+ return reply.status(400).send({ error: "Invalid filename" });
147
+ }
148
+ const imagesDir = join(process.cwd(), "generated_images");
149
+ const filePath = join(imagesDir, filename);
150
+ if (!existsSync(filePath)) {
151
+ return reply.status(404).send({ error: "Not found" });
152
+ }
153
+ const ext = extname(filename).toLowerCase();
154
+ const mimeTypes = {
155
+ ".png": "image/png",
156
+ ".jpg": "image/jpeg",
157
+ ".jpeg": "image/jpeg",
158
+ ".gif": "image/gif",
159
+ ".webp": "image/webp",
160
+ };
161
+ const mime = mimeTypes[ext] || "application/octet-stream";
162
+ return reply.type(mime).send(readFileSync(filePath));
163
+ });
164
+
165
+ // Get config (redacted)
166
+ fastify.get("/api/config", async () => {
167
+ return getPublicConfig();
168
+ });
169
+
170
+ // Detect available browsers for automation (used by browser selection modal)
171
+ fastify.get("/api/browsers/detect", async () => {
172
+ const { getAvailableBrowsers } = await import("./browser/detect.js");
173
+ return { browsers: getAvailableBrowsers() };
174
+ });
175
+
176
+ // Update config
177
+ fastify.put("/api/config", async (request) => {
178
+ const updates = request.body;
179
+ // Clear model on CLI when frontend sends model: null (e.g. "Clear Selected Model" in Settings)
180
+ if (
181
+ Object.prototype.hasOwnProperty.call(updates, "model") &&
182
+ (updates.model === null || updates.model === "")
183
+ ) {
184
+ config.set("model", "claude-sonnet-4-20250514");
185
+ delete updates.model;
186
+ }
187
+ // Only allow updating certain fields from frontend
188
+ const allowedFields = [
189
+ "model",
190
+ "temperature",
191
+ "ollamaUrl",
192
+ "browserChannel",
193
+ ];
194
+ for (const [key, value] of Object.entries(updates)) {
195
+ if (allowedFields.includes(key)) {
196
+ config.set(key, value);
197
+ }
198
+ }
199
+ // Handle API keys separately (they come as actual values)
200
+ if (updates.apiKeys) {
201
+ for (const [provider, key] of Object.entries(updates.apiKeys)) {
202
+ if (key && key !== true) {
203
+ // Trim whitespace to prevent auth failures from copy-paste artifacts
204
+ config.set(
205
+ `apiKeys.${provider}`,
206
+ typeof key === "string" ? key.trim() : key,
207
+ );
208
+ }
209
+ }
210
+ }
211
+ // Handle MyMX webhook secret
212
+ if (updates.mymx?.secret) {
213
+ config.set("mymx.secret", updates.mymx.secret);
214
+ }
215
+ // Handle Resend config (for sending emails)
216
+ if (updates.resend?.apiKey) {
217
+ config.set("resend.apiKey", updates.resend.apiKey);
218
+ }
219
+ if (updates.resend?.from) {
220
+ config.set("resend.from", updates.resend.from);
221
+ }
222
+ // Browser automation: channel ('chrome' | 'msedge' | 'chromium') or null to prompt user
223
+ if (Object.prototype.hasOwnProperty.call(updates, "browserChannel")) {
224
+ const v = updates.browserChannel;
225
+ config.set("browserChannel", v === "" || v === undefined ? null : v);
226
+ }
227
+ return { success: true };
228
+ });
229
+
230
+ // ============================================
231
+ // Model Selection API
232
+ // ============================================
233
+
234
+ // Get current model
235
+ fastify.get("/api/model", async () => {
236
+ return {
237
+ model: config.get("model") || "claude-sonnet-4-20250514",
238
+ };
239
+ });
240
+
241
+ // Set current model
242
+ fastify.put("/api/model", async (request) => {
243
+ const { model } = request.body;
244
+ if (model) {
245
+ config.set("model", model);
246
+ return { success: true, model };
247
+ }
248
+ return { success: false, error: "No model specified" };
249
+ });
250
+
251
+ // Get OpenRouter models (dynamically fetched from OpenRouter API)
252
+ fastify.get("/api/models/openrouter", async () => {
253
+ const apiKey = config.get("apiKeys.openrouter");
254
+ if (!apiKey) {
255
+ return {
256
+ success: false,
257
+ error: "OpenRouter API key not configured",
258
+ models: [],
259
+ };
260
+ }
261
+
262
+ try {
263
+ const { fetchOpenRouterModels } =
264
+ await import("./inference/openrouter.js");
265
+ const models = await fetchOpenRouterModels(apiKey);
266
+ serverLog("[Server] Fetched", models.length, "OpenRouter models");
267
+ return { success: true, models };
268
+ } catch (error) {
269
+ serverLog("[Server] Error fetching OpenRouter models:", error.message);
270
+ return { success: false, error: error.message, models: [] };
271
+ }
272
+ });
273
+
274
+ // Get all available models (static + dynamic OpenRouter)
275
+ fastify.get("/api/models", async () => {
276
+ const { MODEL_DATA } = await import("./models.js");
277
+
278
+ // Get configured API keys to determine which providers are available
279
+ const apiKeys = config.get("apiKeys") || {};
280
+
281
+ const result = {
282
+ localModels: MODEL_DATA.localModels,
283
+ apiModels: {
284
+ anthropic: apiKeys.anthropic ? MODEL_DATA.apiModels.anthropic : [],
285
+ openai: apiKeys.openai ? MODEL_DATA.apiModels.openai : [],
286
+ google: apiKeys.google ? MODEL_DATA.apiModels.google : [],
287
+ xai: apiKeys.xai ? MODEL_DATA.apiModels.xai : [],
288
+ openrouter: [],
289
+ },
290
+ };
291
+
292
+ // Fetch OpenRouter models if API key is configured
293
+ if (apiKeys.openrouter) {
294
+ try {
295
+ const { fetchOpenRouterModels } =
296
+ await import("./inference/openrouter.js");
297
+ const orModels = await fetchOpenRouterModels(apiKeys.openrouter);
298
+ result.apiModels.openrouter = orModels;
299
+ serverLog(
300
+ "[Server] /api/models includes",
301
+ orModels.length,
302
+ "OpenRouter models",
303
+ );
304
+ } catch (error) {
305
+ serverLog(
306
+ "[Server] Error fetching OpenRouter models for /api/models:",
307
+ error.message,
308
+ );
309
+ }
310
+ }
311
+
312
+ return result;
313
+ });
314
+
315
+ // ============================================
316
+ // Generation Data (for frontend analytics - optional)
317
+ // ============================================
318
+
319
+ fastify.post("/api/generation-data", async (request) => {
320
+ // This endpoint receives generation analytics from the frontend
321
+ // We can store this for analytics or just acknowledge it
322
+ serverLog("[Server] Generation data received:", request.body?.chatId);
323
+ return { success: true };
324
+ });
325
+
326
+ // ============================================
327
+ // Chat API Routes
328
+ // ============================================
329
+
330
+ // List all chats
331
+ fastify.get("/api/chats", async () => {
332
+ try {
333
+ const db = getDb();
334
+ serverLog("[Server] GET /api/chats - Database object exists:", !!db);
335
+
336
+ const chats = db
337
+ .prepare(
338
+ `
339
+ SELECT id, title, created_at, updated_at,
340
+ (SELECT COUNT(*) FROM messages WHERE chat_id = chats.id) as message_count
341
+ FROM chats
342
+ ORDER BY updated_at DESC
343
+ `,
344
+ )
345
+ .all();
346
+
347
+ serverLog("[Server] GET /api/chats returning", chats.length, "chats");
348
+ if (chats.length > 0) {
349
+ serverLog("[Server] First chat:", JSON.stringify(chats[0]));
350
+ } else {
351
+ serverLog("[Server] No chats found in database!");
352
+ // Debug: Check if table exists
353
+ const tables = db
354
+ .prepare("SELECT name FROM sqlite_master WHERE type='table'")
355
+ .all();
356
+ serverLog(
357
+ "[Server] Tables in database:",
358
+ tables.map((t) => t.name).join(", "),
359
+ );
360
+ }
361
+
362
+ // Transform to match frontend expected format
363
+ return chats.map((chat) => ({
364
+ ...chat,
365
+ // Add timestamp field for frontend compatibility
366
+ timestamp: chat.updated_at || chat.created_at,
367
+ // Indicate if chat has messages (for "Generating..." display)
368
+ hasMessages: chat.message_count > 0,
369
+ }));
370
+ } catch (error) {
371
+ console.error("[Server] Error in GET /api/chats:", error);
372
+ return [];
373
+ }
374
+ });
375
+
376
+ // Get single chat with messages
377
+ fastify.get("/api/chats/:id", async (request) => {
378
+ const { id } = request.params;
379
+ const db = getDb();
380
+
381
+ const chat = db.prepare("SELECT * FROM chats WHERE id = ?").get(id);
382
+ if (!chat) {
383
+ return { error: "Chat not found" };
384
+ }
385
+
386
+ const messages = db
387
+ .prepare(
388
+ `
389
+ SELECT id, role, content, metadata, created_at
390
+ FROM messages
391
+ WHERE chat_id = ?
392
+ ORDER BY created_at ASC
393
+ `,
394
+ )
395
+ .all(id);
396
+
397
+ // Parse metadata JSON and spread into message object
398
+ // For assistant messages, strip tool_call/tool_result XML from display content
399
+ // The actual tool calls are in metadata.toolCalls for proper UI rendering
400
+ const parsedMessages = messages.map((m) => {
401
+ const metadata = m.metadata ? JSON.parse(m.metadata) : {};
402
+
403
+ // Clean content for display (strip tool XML, keep prose)
404
+ const displayContent =
405
+ m.role === "assistant" && m.content
406
+ ? cleanContentForDisplay(m.content)
407
+ : m.content;
408
+
409
+ return {
410
+ ...m,
411
+ content: displayContent, // Clean version for frontend display
412
+ fullContent: m.content, // Full version with tool context (for AI history)
413
+ ...metadata, // Spread metadata fields (tps, numTokens, model, toolCalls, etc.) into message
414
+ metadata, // Keep original metadata object too
415
+ };
416
+ });
417
+
418
+ return { ...chat, messages: parsedMessages };
419
+ });
420
+
421
+ // Create new chat (id is UUID for local/cloud parity)
422
+ fastify.post("/api/chats", async (request) => {
423
+ const { title } = request.body || {};
424
+ const id = dbCreateChat(title || null, null);
425
+ return { id, title: title || null };
426
+ });
427
+
428
+ // Update chat (title and/or messages). Id is UUID string.
429
+ fastify.put("/api/chats/:id", async (request, reply) => {
430
+ const id = String(request.params.id ?? "").trim();
431
+ if (!id || !isValidChatId(id)) {
432
+ return reply.code(400).send({ error: "Invalid chat id (UUID required)" });
433
+ }
434
+ const { title, messages } = request.body || {};
435
+ const db = getDb();
436
+
437
+ try {
438
+ // Ensure chat exists (cloud/synced chats may not exist in CLI DB yet)
439
+ const existingChat = db
440
+ .prepare("SELECT id FROM chats WHERE id = ?")
441
+ .get(id);
442
+ if (!existingChat) {
443
+ db.prepare(
444
+ "INSERT INTO chats (id, title, updated_at) VALUES (?, ?, CURRENT_TIMESTAMP)",
445
+ ).run(id, title ?? null);
446
+ serverLog("[Server] Created chat", id, "for sync (cloud/synced)");
447
+ }
448
+
449
+ // Update title if provided
450
+ if (title !== undefined) {
451
+ db.prepare(
452
+ `
453
+ UPDATE chats SET title = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?
454
+ `,
455
+ ).run(title, id);
456
+ }
457
+
458
+ // If messages array provided, sync them to database
459
+ // PROTECTION: Don't overwrite if we already have MORE messages (prevents data loss from stale frontend state)
460
+ if (messages && Array.isArray(messages)) {
461
+ const existingCount =
462
+ db
463
+ .prepare("SELECT COUNT(*) as count FROM messages WHERE chat_id = ?")
464
+ .get(id)?.count || 0;
465
+
466
+ if (messages.length < existingCount) {
467
+ serverLog(
468
+ "[Server] SKIPPING message sync - would lose data (incoming:",
469
+ messages.length,
470
+ "existing:",
471
+ existingCount,
472
+ ")",
473
+ );
474
+ db.prepare(
475
+ "UPDATE chats SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
476
+ ).run(id);
477
+ } else {
478
+ db.prepare("DELETE FROM messages WHERE chat_id = ?").run(id);
479
+
480
+ const insertStmt = db.prepare(`
481
+ INSERT INTO messages (chat_id, role, content, metadata) VALUES (?, ?, ?, ?)
482
+ `);
483
+
484
+ for (const msg of messages) {
485
+ if (msg.role && msg.content !== undefined && msg.content !== null) {
486
+ const metadata = {
487
+ images: msg.images,
488
+ files: msg.files,
489
+ model: msg.model,
490
+ tps: msg.tps,
491
+ numTokens: msg.numTokens,
492
+ _searchData: msg._searchData,
493
+ _ragSources: msg._ragSources,
494
+ toolCalls: msg.toolCalls,
495
+ };
496
+ const contentToStore = msg.fullContent ?? msg.content;
497
+ const contentStr =
498
+ typeof contentToStore === "string"
499
+ ? contentToStore
500
+ : JSON.stringify(contentToStore);
501
+ insertStmt.run(
502
+ id,
503
+ msg.role,
504
+ contentStr,
505
+ JSON.stringify(metadata),
506
+ );
507
+ }
508
+ }
509
+
510
+ db.prepare(
511
+ "UPDATE chats SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
512
+ ).run(id);
513
+ serverLog(
514
+ "[Server] Updated chat",
515
+ id,
516
+ "with",
517
+ messages.length,
518
+ "messages",
519
+ );
520
+ }
521
+ }
522
+
523
+ return { success: true };
524
+ } catch (err) {
525
+ serverLog("[Server] PUT /api/chats/" + id, "error:", err.message);
526
+ return reply
527
+ .code(500)
528
+ .send({ error: err.message || "Failed to update chat" });
529
+ }
530
+ });
531
+
532
+ // Delete chat (id is UUID string)
533
+ fastify.delete("/api/chats/:id", async (request, reply) => {
534
+ const id = String(request.params.id ?? "").trim();
535
+ if (!id || !isValidChatId(id)) {
536
+ return reply.code(400).send({ error: "Invalid chat id (UUID required)" });
537
+ }
538
+ serverLog("[Server] DELETE /api/chats/" + id);
539
+ const db = getDb();
540
+
541
+ // Delete file snapshots and shell undo log for this chat (cleanup for undo feature)
542
+ deleteAllChatSnapshots(id);
543
+ deleteAllChatShellUndos(id);
544
+ serverLog(
545
+ "[Server] Deleted file snapshots and shell undo log for chat",
546
+ id,
547
+ );
548
+
549
+ // Delete messages first (foreign key)
550
+ const msgResult = db
551
+ .prepare("DELETE FROM messages WHERE chat_id = ?")
552
+ .run(id);
553
+ const chatResult = db.prepare("DELETE FROM chats WHERE id = ?").run(id);
554
+
555
+ serverLog(
556
+ "[Server] Deleted chat",
557
+ id,
558
+ "- messages:",
559
+ msgResult.changes,
560
+ "chat:",
561
+ chatResult.changes,
562
+ );
563
+
564
+ return { success: true };
565
+ });
566
+
567
+ // Add message to chat (id is UUID string)
568
+ fastify.post("/api/chats/:id/messages", async (request, reply) => {
569
+ const id = String(request.params.id ?? "").trim();
570
+ if (!id || !isValidChatId(id)) {
571
+ return reply.code(400).send({ error: "Invalid chat id (UUID required)" });
572
+ }
573
+ const { role, content, metadata } = request.body;
574
+ const db = getDb();
575
+
576
+ const result = db
577
+ .prepare(
578
+ `
579
+ INSERT INTO messages (chat_id, role, content, metadata) VALUES (?, ?, ?, ?)
580
+ `,
581
+ )
582
+ .run(id, role, content, metadata ? JSON.stringify(metadata) : null);
583
+
584
+ // Update chat timestamp
585
+ db.prepare(
586
+ "UPDATE chats SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
587
+ ).run(id);
588
+
589
+ return { id: result.lastInsertRowid };
590
+ });
591
+
592
+ // ============================================
593
+ // Web Search API
594
+ // ============================================
595
+
596
+ // Simple web search
597
+ fastify.post("/api/search", async (request) => {
598
+ const { query, numResults = 5 } = request.body;
599
+
600
+ if (!query) {
601
+ return { success: false, error: "Query is required" };
602
+ }
603
+
604
+ serverLog("[Server] Web search:", query);
605
+
606
+ try {
607
+ const results = await performWebSearch(query, numResults);
608
+ return {
609
+ success: true,
610
+ query,
611
+ results: results.map((r) => ({
612
+ title: r.title,
613
+ url: r.url,
614
+ snippet: r.snippet,
615
+ source: new URL(r.url).hostname,
616
+ engines: r.engines || [],
617
+ })),
618
+ count: results.length,
619
+ };
620
+ } catch (err) {
621
+ console.error("[Server] Search error:", err);
622
+ return { success: false, error: err.message };
623
+ }
624
+ });
625
+
626
+ // Fetch URL content
627
+ fastify.post("/api/fetch-url", async (request) => {
628
+ const { url } = request.body;
629
+
630
+ if (!url) {
631
+ return { success: false, error: "URL is required" };
632
+ }
633
+
634
+ serverLog("[Server] Fetching URL:", url);
635
+
636
+ try {
637
+ const content = await fetchUrlContent(url);
638
+ return { success: true, url, content };
639
+ } catch (err) {
640
+ console.error("[Server] Fetch error:", err);
641
+ return { success: false, error: err.message };
642
+ }
643
+ });
644
+
645
+ // Deep web research (search + fetch multiple pages)
646
+ fastify.post("/api/research", async (request) => {
647
+ const { query, numPages = 5 } = request.body;
648
+
649
+ if (!query) {
650
+ return { success: false, error: "Query is required" };
651
+ }
652
+
653
+ serverLog("[Server] Deep research:", query, "pages:", numPages);
654
+
655
+ try {
656
+ const results = await deepWebResearch(query, numPages);
657
+
658
+ // Format for LLM context
659
+ if (results.success && results.pages.length > 0) {
660
+ const formattedContext =
661
+ `WEB RESEARCH RESULTS for "${query}":\n\n` +
662
+ results.pages
663
+ .map(
664
+ (page, i) =>
665
+ `[Source ${i + 1}: ${page.title}]\nURL: ${page.url}\n${page.content}`,
666
+ )
667
+ .join("\n\n---\n\n") +
668
+ "\n\n---\nUse the above research to answer the user's question. Cite sources when appropriate.";
669
+
670
+ return {
671
+ success: true,
672
+ query,
673
+ pages: results.pages.map((p) => ({
674
+ title: p.title,
675
+ url: p.url,
676
+ excerpt: p.excerpt,
677
+ content: p.content,
678
+ source: new URL(p.url).hostname,
679
+ })),
680
+ count: results.pages.length,
681
+ formatted_context: formattedContext,
682
+ };
683
+ }
684
+
685
+ return { success: false, error: "No results found" };
686
+ } catch (err) {
687
+ console.error("[Server] Research error:", err);
688
+ return { success: false, error: err.message };
689
+ }
690
+ });
691
+
692
+ // ============================================
693
+ // Shell Command Autocomplete API
694
+ // ============================================
695
+
696
+ // Fast autocomplete for shell commands
697
+ fastify.post("/api/autocomplete", async (request) => {
698
+ const { partial, context = "shell" } = request.body;
699
+
700
+ if (!partial || partial.length < 2) {
701
+ return { success: false, error: "Input too short" };
702
+ }
703
+
704
+ serverLog("[Server] Autocomplete request:", partial.substring(0, 50));
705
+
706
+ try {
707
+ // Use a fast model for low latency
708
+ // Priority: gpt-4o-mini > claude-3-haiku > gemini-flash > configured model
709
+ let autocompleteModel = config.get("model") || "claude-sonnet-4-20250514";
710
+
711
+ // Check for fast models
712
+ const apiKeys = config.get("apiKeys") || {};
713
+ if (apiKeys.openai) {
714
+ autocompleteModel = "gpt-4o-mini";
715
+ } else if (apiKeys.anthropic) {
716
+ autocompleteModel = "claude-3-5-haiku-20241022";
717
+ } else if (apiKeys.google) {
718
+ autocompleteModel = "gemini-2.0-flash";
719
+ }
720
+
721
+ // System prompt optimized for shell command completion
722
+ const systemPrompt =
723
+ context === "shell"
724
+ ? `You are a shell command autocomplete assistant. Complete the partial command the user has typed.
725
+ Rules:
726
+ - Output ONLY the completion text (what comes after what the user typed)
727
+ - Do NOT repeat what the user already typed
728
+ - Do NOT include any explanation or commentary
729
+ - Keep completions short and practical
730
+ - If multiple options exist, pick the most common/likely one
731
+ - If the input doesn't look like a shell command, return empty string
732
+
733
+ Examples:
734
+ User: "git ch" -> "eckout "
735
+ User: "npm i" -> "nstall "
736
+ User: "docker run -" -> "-it "
737
+ User: "ls -l" -> "a "
738
+ User: "cd ~/" -> "Documents"
739
+ User: "kubectl get p" -> "ods "`
740
+ : `You are an autocomplete assistant. Complete the user's partial input.
741
+ Output ONLY the completion text (what comes after what the user typed).
742
+ Do NOT repeat what the user already typed or add explanations.`;
743
+
744
+ // Import inference module dynamically
745
+ const { streamInference, hasRequiredApiKey } =
746
+ await import("./inference/index.js");
747
+
748
+ if (!hasRequiredApiKey(autocompleteModel, config.store)) {
749
+ return {
750
+ success: false,
751
+ error: "No API key configured for autocomplete",
752
+ };
753
+ }
754
+
755
+ // Build messages for completion
756
+ const messages = [{ role: "user", content: partial }];
757
+
758
+ // Collect response (non-streaming for simplicity and speed)
759
+ let completion = "";
760
+ for await (const chunk of streamInference(
761
+ autocompleteModel,
762
+ messages,
763
+ systemPrompt,
764
+ {
765
+ ...config.store,
766
+ maxTokens: 50, // Keep it short for speed
767
+ temperature: 0, // Deterministic completions
768
+ },
769
+ )) {
770
+ if (chunk.type === "text") {
771
+ completion += chunk.content;
772
+ }
773
+ }
774
+
775
+ // Clean up the completion
776
+ completion = completion.trim();
777
+
778
+ // Ensure the completion doesn't repeat the input
779
+ if (completion.toLowerCase().startsWith(partial.toLowerCase())) {
780
+ completion = completion.substring(partial.length);
781
+ }
782
+
783
+ serverLog("[Server] Autocomplete result:", completion.substring(0, 30));
784
+
785
+ return {
786
+ success: true,
787
+ suggestion: completion,
788
+ model: autocompleteModel,
789
+ };
790
+ } catch (err) {
791
+ console.error("[Server] Autocomplete error:", err);
792
+ return { success: false, error: err.message };
793
+ }
794
+ });
795
+
796
+ // ============================================
797
+ // File System API (Cursor-like @ mentions)
798
+ // ============================================
799
+
800
+ // File extension to type mapping for icons
801
+ const FILE_TYPE_MAP = {
802
+ // Code files
803
+ ".js": "javascript",
804
+ ".jsx": "javascript",
805
+ ".ts": "typescript",
806
+ ".tsx": "typescript",
807
+ ".py": "python",
808
+ ".rb": "ruby",
809
+ ".go": "go",
810
+ ".rs": "rust",
811
+ ".java": "java",
812
+ ".c": "c",
813
+ ".cpp": "cpp",
814
+ ".h": "c",
815
+ ".hpp": "cpp",
816
+ ".cs": "csharp",
817
+ ".php": "php",
818
+ ".swift": "swift",
819
+ ".kt": "kotlin",
820
+ ".scala": "scala",
821
+ ".r": "r",
822
+ ".sql": "sql",
823
+ ".sh": "shell",
824
+ ".bash": "shell",
825
+ ".zsh": "shell",
826
+ ".fish": "shell",
827
+ // Web files
828
+ ".html": "html",
829
+ ".htm": "html",
830
+ ".css": "css",
831
+ ".scss": "scss",
832
+ ".sass": "sass",
833
+ ".less": "less",
834
+ ".vue": "vue",
835
+ ".svelte": "svelte",
836
+ // Config files
837
+ ".json": "json",
838
+ ".yaml": "yaml",
839
+ ".yml": "yaml",
840
+ ".toml": "toml",
841
+ ".xml": "xml",
842
+ ".ini": "config",
843
+ ".env": "config",
844
+ ".gitignore": "git",
845
+ // Docs
846
+ ".md": "markdown",
847
+ ".mdx": "markdown",
848
+ ".txt": "text",
849
+ ".rst": "text",
850
+ // Data
851
+ ".csv": "data",
852
+ ".tsv": "data",
853
+ // Images
854
+ ".png": "image",
855
+ ".jpg": "image",
856
+ ".jpeg": "image",
857
+ ".gif": "image",
858
+ ".svg": "image",
859
+ ".webp": "image",
860
+ ".ico": "image",
861
+ };
862
+
863
+ // Directories to ignore in listing
864
+ const IGNORED_DIRS = [
865
+ "node_modules",
866
+ ".git",
867
+ "__pycache__",
868
+ ".next",
869
+ ".nuxt",
870
+ "dist",
871
+ "build",
872
+ ".cache",
873
+ "coverage",
874
+ ".vscode",
875
+ ".idea",
876
+ "venv",
877
+ ".venv",
878
+ "env",
879
+ ".env.local",
880
+ ".DS_Store",
881
+ ];
882
+
883
+ // Get file type from extension
884
+ const getFileType = (filename) => {
885
+ const ext = extname(filename).toLowerCase();
886
+ return FILE_TYPE_MAP[ext] || "file";
887
+ };
888
+
889
+ // Check if path should be ignored
890
+ const shouldIgnore = (name) => {
891
+ return IGNORED_DIRS.includes(name) || name.startsWith(".");
892
+ };
893
+
894
+ // Get the workspace root (where the CLI is running from)
895
+ const getWorkspaceRoot = () => {
896
+ return process.cwd();
897
+ };
898
+
899
+ // List files and directories for @ mention autocomplete
900
+ fastify.get("/api/files", async (request) => {
901
+ const { path: requestPath = "", query = "" } = request.query;
902
+ const workspaceRoot = getWorkspaceRoot();
903
+
904
+ try {
905
+ // Resolve the path relative to workspace root
906
+ let targetPath = workspaceRoot;
907
+ if (requestPath) {
908
+ // Handle ~ expansion
909
+ if (requestPath.startsWith("~")) {
910
+ targetPath = join(homedir(), requestPath.slice(1));
911
+ } else if (requestPath.startsWith("/")) {
912
+ targetPath = requestPath;
913
+ } else {
914
+ targetPath = resolve(workspaceRoot, requestPath);
915
+ }
916
+ }
917
+
918
+ // Security: ensure we're not going outside allowed paths
919
+ // For now, allow workspace and home directory
920
+ const normalizedTarget = resolve(targetPath);
921
+ const normalizedWorkspace = resolve(workspaceRoot);
922
+ const normalizedHome = resolve(homedir());
923
+
924
+ if (
925
+ !normalizedTarget.startsWith(normalizedWorkspace) &&
926
+ !normalizedTarget.startsWith(normalizedHome)
927
+ ) {
928
+ return {
929
+ success: false,
930
+ error: "Access denied: path outside allowed directories",
931
+ };
932
+ }
933
+
934
+ if (!existsSync(targetPath)) {
935
+ return { success: false, error: "Path not found" };
936
+ }
937
+
938
+ const stat = statSync(targetPath);
939
+
940
+ // If it's a file, return file info
941
+ if (!stat.isDirectory()) {
942
+ return {
943
+ success: true,
944
+ type: "file",
945
+ file: {
946
+ name: basename(targetPath),
947
+ path: relative(workspaceRoot, targetPath) || targetPath,
948
+ absolutePath: targetPath,
949
+ type: getFileType(targetPath),
950
+ size: stat.size,
951
+ modified: stat.mtime,
952
+ },
953
+ };
954
+ }
955
+
956
+ // List directory contents
957
+ const entries = readdirSync(targetPath);
958
+ const items = [];
959
+
960
+ for (const name of entries) {
961
+ // Skip ignored directories/files unless specifically queried
962
+ if (!query && shouldIgnore(name)) continue;
963
+
964
+ // Filter by query if provided
965
+ if (query && !name.toLowerCase().includes(query.toLowerCase()))
966
+ continue;
967
+
968
+ const fullPath = join(targetPath, name);
969
+ try {
970
+ const entryStat = lstatSync(fullPath);
971
+ const isDirectory = entryStat.isDirectory();
972
+ const isSymlink = entryStat.isSymbolicLink();
973
+
974
+ // Get relative path from workspace root
975
+ const relativePath = relative(workspaceRoot, fullPath);
976
+
977
+ items.push({
978
+ name,
979
+ path: relativePath || fullPath,
980
+ absolutePath: fullPath,
981
+ type: isDirectory ? "directory" : getFileType(name),
982
+ isDirectory,
983
+ isSymlink,
984
+ size: isDirectory ? null : entryStat.size,
985
+ modified: entryStat.mtime,
986
+ });
987
+ } catch (err) {
988
+ // Skip files we can't access
989
+ console.warn(`[Server] Could not stat ${fullPath}:`, err.message);
990
+ }
991
+ }
992
+
993
+ // Sort: directories first, then alphabetically
994
+ items.sort((a, b) => {
995
+ if (a.isDirectory && !b.isDirectory) return -1;
996
+ if (!a.isDirectory && b.isDirectory) return 1;
997
+ return a.name.localeCompare(b.name);
998
+ });
999
+
1000
+ return {
1001
+ success: true,
1002
+ type: "directory",
1003
+ path: relative(workspaceRoot, targetPath) || ".",
1004
+ absolutePath: targetPath,
1005
+ workspaceRoot,
1006
+ items,
1007
+ };
1008
+ } catch (err) {
1009
+ console.error("[Server] File list error:", err);
1010
+ return { success: false, error: err.message };
1011
+ }
1012
+ });
1013
+
1014
+ // Search files by pattern (glob-like)
1015
+ fastify.get("/api/files/search", async (request) => {
1016
+ const { query = "", maxResults = 50 } = request.query;
1017
+ const workspaceRoot = getWorkspaceRoot();
1018
+
1019
+ if (!query) {
1020
+ return { success: false, error: "Query is required" };
1021
+ }
1022
+
1023
+ serverLog("[Server] File search:", query);
1024
+
1025
+ try {
1026
+ const results = [];
1027
+ const queryLower = query.toLowerCase();
1028
+
1029
+ // Recursive file search with depth limit
1030
+ const searchDir = (dir, depth = 0) => {
1031
+ if (depth > 5 || results.length >= maxResults) return;
1032
+
1033
+ try {
1034
+ const entries = readdirSync(dir);
1035
+
1036
+ for (const name of entries) {
1037
+ if (results.length >= maxResults) break;
1038
+ if (shouldIgnore(name)) continue;
1039
+
1040
+ const fullPath = join(dir, name);
1041
+
1042
+ try {
1043
+ const stat = lstatSync(fullPath);
1044
+ const relativePath = relative(workspaceRoot, fullPath);
1045
+
1046
+ // Check if name matches query
1047
+ if (
1048
+ name.toLowerCase().includes(queryLower) ||
1049
+ relativePath.toLowerCase().includes(queryLower)
1050
+ ) {
1051
+ results.push({
1052
+ name,
1053
+ path: relativePath,
1054
+ absolutePath: fullPath,
1055
+ type: stat.isDirectory() ? "directory" : getFileType(name),
1056
+ isDirectory: stat.isDirectory(),
1057
+ size: stat.isDirectory() ? null : stat.size,
1058
+ modified: stat.mtime,
1059
+ });
1060
+ }
1061
+
1062
+ // Recurse into directories
1063
+ if (stat.isDirectory()) {
1064
+ searchDir(fullPath, depth + 1);
1065
+ }
1066
+ } catch (err) {
1067
+ // Skip inaccessible files
1068
+ }
1069
+ }
1070
+ } catch (err) {
1071
+ // Skip inaccessible directories
1072
+ }
1073
+ };
1074
+
1075
+ searchDir(workspaceRoot);
1076
+
1077
+ // Sort by path (shorter paths first for relevance)
1078
+ results.sort((a, b) => {
1079
+ // Exact filename matches first
1080
+ const aExact = a.name.toLowerCase() === queryLower;
1081
+ const bExact = b.name.toLowerCase() === queryLower;
1082
+ if (aExact && !bExact) return -1;
1083
+ if (!aExact && bExact) return 1;
1084
+
1085
+ // Then by path length (shorter = more relevant)
1086
+ return a.path.length - b.path.length;
1087
+ });
1088
+
1089
+ return {
1090
+ success: true,
1091
+ query,
1092
+ workspaceRoot,
1093
+ results: results.slice(0, maxResults),
1094
+ total: results.length,
1095
+ };
1096
+ } catch (err) {
1097
+ console.error("[Server] File search error:", err);
1098
+ return { success: false, error: err.message };
1099
+ }
1100
+ });
1101
+
1102
+ // Read file content (for including in chat context)
1103
+ fastify.get("/api/files/read", async (request) => {
1104
+ const { path: filePath } = request.query;
1105
+ const workspaceRoot = getWorkspaceRoot();
1106
+
1107
+ if (!filePath) {
1108
+ return { success: false, error: "Path is required" };
1109
+ }
1110
+
1111
+ try {
1112
+ // Resolve the path
1113
+ let targetPath = filePath;
1114
+ if (filePath.startsWith("~")) {
1115
+ targetPath = join(homedir(), filePath.slice(1));
1116
+ } else if (!filePath.startsWith("/")) {
1117
+ targetPath = resolve(workspaceRoot, filePath);
1118
+ }
1119
+
1120
+ // Security check
1121
+ const normalizedTarget = resolve(targetPath);
1122
+ const normalizedWorkspace = resolve(workspaceRoot);
1123
+ const normalizedHome = resolve(homedir());
1124
+
1125
+ if (
1126
+ !normalizedTarget.startsWith(normalizedWorkspace) &&
1127
+ !normalizedTarget.startsWith(normalizedHome)
1128
+ ) {
1129
+ return {
1130
+ success: false,
1131
+ error: "Access denied: path outside allowed directories",
1132
+ };
1133
+ }
1134
+
1135
+ if (!existsSync(targetPath)) {
1136
+ return { success: false, error: "File not found" };
1137
+ }
1138
+
1139
+ const stat = statSync(targetPath);
1140
+
1141
+ if (stat.isDirectory()) {
1142
+ return { success: false, error: "Cannot read directory content" };
1143
+ }
1144
+
1145
+ // Check file size (limit to 1MB for safety)
1146
+ if (stat.size > 1024 * 1024) {
1147
+ return {
1148
+ success: false,
1149
+ error: "File too large (max 1MB)",
1150
+ size: stat.size,
1151
+ };
1152
+ }
1153
+
1154
+ // Check if it's a binary file (basic heuristic)
1155
+ const binaryExtensions = [
1156
+ ".png",
1157
+ ".jpg",
1158
+ ".jpeg",
1159
+ ".gif",
1160
+ ".ico",
1161
+ ".pdf",
1162
+ ".zip",
1163
+ ".tar",
1164
+ ".gz",
1165
+ ".exe",
1166
+ ".bin",
1167
+ ".wasm",
1168
+ ];
1169
+ const ext = extname(targetPath).toLowerCase();
1170
+ if (binaryExtensions.includes(ext)) {
1171
+ return {
1172
+ success: false,
1173
+ error: "Cannot read binary file",
1174
+ type: getFileType(targetPath),
1175
+ };
1176
+ }
1177
+
1178
+ const content = readFileSync(targetPath, "utf-8");
1179
+
1180
+ return {
1181
+ success: true,
1182
+ path: relative(workspaceRoot, targetPath) || targetPath,
1183
+ absolutePath: targetPath,
1184
+ name: basename(targetPath),
1185
+ type: getFileType(targetPath),
1186
+ size: stat.size,
1187
+ lineCount: content.split("\n").length,
1188
+ content,
1189
+ };
1190
+ } catch (err) {
1191
+ console.error("[Server] File read error:", err);
1192
+ return { success: false, error: err.message };
1193
+ }
1194
+ });
1195
+
1196
+ // Get folder tree (recursive file listing for @ mentions)
1197
+ // Set includeContent=false to only get the file tree structure (for folder @mentions)
1198
+ fastify.get("/api/files/tree", async (request) => {
1199
+ const {
1200
+ path: folderPath,
1201
+ maxDepth = 5,
1202
+ maxFiles = 100,
1203
+ includeContent = "false",
1204
+ } = request.query;
1205
+ const shouldIncludeContent = includeContent === "true";
1206
+ const workspaceRoot = getWorkspaceRoot();
1207
+
1208
+ if (!folderPath) {
1209
+ return { success: false, error: "Path is required" };
1210
+ }
1211
+
1212
+ try {
1213
+ // Resolve the path
1214
+ let targetPath = folderPath;
1215
+ if (folderPath.startsWith("~")) {
1216
+ targetPath = join(homedir(), folderPath.slice(1));
1217
+ } else if (!folderPath.startsWith("/")) {
1218
+ targetPath = resolve(workspaceRoot, folderPath);
1219
+ }
1220
+
1221
+ // Security check
1222
+ const normalizedTarget = resolve(targetPath);
1223
+ const normalizedWorkspace = resolve(workspaceRoot);
1224
+ const normalizedHome = resolve(homedir());
1225
+
1226
+ if (
1227
+ !normalizedTarget.startsWith(normalizedWorkspace) &&
1228
+ !normalizedTarget.startsWith(normalizedHome)
1229
+ ) {
1230
+ return {
1231
+ success: false,
1232
+ error: "Access denied: path outside allowed directories",
1233
+ };
1234
+ }
1235
+
1236
+ if (!existsSync(targetPath)) {
1237
+ return { success: false, error: "Folder not found" };
1238
+ }
1239
+
1240
+ const stat = statSync(targetPath);
1241
+
1242
+ if (!stat.isDirectory()) {
1243
+ return { success: false, error: "Path is not a directory" };
1244
+ }
1245
+
1246
+ // Binary file extensions to skip content reading
1247
+ const binaryExtensions = [
1248
+ ".png",
1249
+ ".jpg",
1250
+ ".jpeg",
1251
+ ".gif",
1252
+ ".ico",
1253
+ ".pdf",
1254
+ ".zip",
1255
+ ".tar",
1256
+ ".gz",
1257
+ ".exe",
1258
+ ".bin",
1259
+ ".wasm",
1260
+ ".mp3",
1261
+ ".mp4",
1262
+ ".wav",
1263
+ ".avi",
1264
+ ".mov",
1265
+ ];
1266
+
1267
+ // Recursively build file tree
1268
+ const files = [];
1269
+ const directories = []; // Track subdirectories for tree display
1270
+ let fileCount = 0;
1271
+
1272
+ const buildTree = (dir, depth = 0, parentRelative = "") => {
1273
+ if (depth > maxDepth || fileCount >= maxFiles) return;
1274
+
1275
+ try {
1276
+ const entries = readdirSync(dir).sort((a, b) => {
1277
+ // Sort directories first, then files
1278
+ const aPath = join(dir, a);
1279
+ const bPath = join(dir, b);
1280
+ try {
1281
+ const aIsDir = lstatSync(aPath).isDirectory();
1282
+ const bIsDir = lstatSync(bPath).isDirectory();
1283
+ if (aIsDir && !bIsDir) return -1;
1284
+ if (!aIsDir && bIsDir) return 1;
1285
+ } catch (e) {}
1286
+ return a.localeCompare(b);
1287
+ });
1288
+
1289
+ for (const name of entries) {
1290
+ if (fileCount >= maxFiles) break;
1291
+ if (shouldIgnore(name)) continue;
1292
+
1293
+ const fullPath = join(dir, name);
1294
+
1295
+ try {
1296
+ const entryStat = lstatSync(fullPath);
1297
+ const relativePath = relative(workspaceRoot, fullPath);
1298
+
1299
+ if (entryStat.isDirectory()) {
1300
+ // Track directory for tree display
1301
+ directories.push({
1302
+ name,
1303
+ path: relativePath,
1304
+ depth,
1305
+ });
1306
+ // Recurse into subdirectory
1307
+ buildTree(fullPath, depth + 1, relativePath);
1308
+ } else {
1309
+ // It's a file
1310
+ const ext = extname(fullPath).toLowerCase();
1311
+ const isBinary = binaryExtensions.includes(ext);
1312
+ const isLarge = entryStat.size > 512 * 1024; // 512KB limit per file
1313
+
1314
+ const fileEntry = {
1315
+ name,
1316
+ path: relativePath,
1317
+ type: getFileType(name),
1318
+ size: entryStat.size,
1319
+ lineCount: 0,
1320
+ depth,
1321
+ content: null,
1322
+ };
1323
+
1324
+ // Only read content if explicitly requested
1325
+ if (shouldIncludeContent && !isBinary && !isLarge) {
1326
+ try {
1327
+ const content = readFileSync(fullPath, "utf-8");
1328
+ fileEntry.content = content;
1329
+ fileEntry.lineCount = content.split("\n").length;
1330
+ } catch (readErr) {
1331
+ fileEntry.error = "Could not read file";
1332
+ }
1333
+ } else if (!shouldIncludeContent && !isBinary && !isLarge) {
1334
+ // Just count lines without storing content
1335
+ try {
1336
+ const content = readFileSync(fullPath, "utf-8");
1337
+ fileEntry.lineCount = content.split("\n").length;
1338
+ } catch (readErr) {
1339
+ // Ignore
1340
+ }
1341
+ } else if (isBinary) {
1342
+ fileEntry.isBinary = true;
1343
+ } else if (isLarge) {
1344
+ fileEntry.isLarge = true;
1345
+ }
1346
+
1347
+ files.push(fileEntry);
1348
+ fileCount++;
1349
+ }
1350
+ } catch (err) {
1351
+ // Skip inaccessible files
1352
+ }
1353
+ }
1354
+ } catch (err) {
1355
+ // Skip inaccessible directories
1356
+ }
1357
+ };
1358
+
1359
+ buildTree(targetPath);
1360
+
1361
+ // Calculate total stats
1362
+ const totalSize = files.reduce((sum, f) => sum + (f.size || 0), 0);
1363
+ const totalLines = files.reduce((sum, f) => sum + (f.lineCount || 0), 0);
1364
+ const folderRelPath = relative(workspaceRoot, targetPath) || folderPath;
1365
+
1366
+ // Build a tree display string for folder context
1367
+ // Groups files by their parent directory for a nice tree visualization
1368
+ const buildTreeDisplay = () => {
1369
+ const lines = [];
1370
+ const basePath = folderRelPath;
1371
+
1372
+ // Group files by their parent directory
1373
+ const filesByDir = new Map();
1374
+ filesByDir.set(basePath, []); // Root folder
1375
+
1376
+ for (const file of files) {
1377
+ const parentDir = dirname(file.path);
1378
+ if (!filesByDir.has(parentDir)) {
1379
+ filesByDir.set(parentDir, []);
1380
+ }
1381
+ filesByDir.get(parentDir).push(file);
1382
+ }
1383
+
1384
+ // Add directories that have no files but are parents
1385
+ for (const dir of directories) {
1386
+ if (!filesByDir.has(dir.path)) {
1387
+ filesByDir.set(dir.path, []);
1388
+ }
1389
+ }
1390
+
1391
+ // Sort directories by path
1392
+ const sortedDirs = Array.from(filesByDir.keys()).sort();
1393
+
1394
+ for (const dirPath of sortedDirs) {
1395
+ const filesInDir = filesByDir.get(dirPath);
1396
+ const depth =
1397
+ dirPath === basePath
1398
+ ? 0
1399
+ : dirPath.split("/").length - basePath.split("/").length;
1400
+ const indent = " ".repeat(depth);
1401
+
1402
+ // Show directory name (except for root)
1403
+ if (dirPath !== basePath) {
1404
+ const dirName = basename(dirPath);
1405
+ lines.push(`${indent}📁 ${dirName}/`);
1406
+ }
1407
+
1408
+ // Show files in this directory
1409
+ for (const file of filesInDir.sort((a, b) =>
1410
+ a.name.localeCompare(b.name),
1411
+ )) {
1412
+ const fileIndent =
1413
+ dirPath === basePath ? "" : " ".repeat(depth + 1);
1414
+ const lineInfo =
1415
+ file.lineCount > 0 ? ` (${file.lineCount} lines)` : "";
1416
+ const sizeInfo =
1417
+ file.size > 0 ? ` [${(file.size / 1024).toFixed(1)}KB]` : "";
1418
+ const icon = file.isBinary ? "📦" : "📄";
1419
+ lines.push(
1420
+ `${fileIndent}${icon} ${file.name}${lineInfo}${sizeInfo}`,
1421
+ );
1422
+ }
1423
+ }
1424
+
1425
+ return lines.join("\n");
1426
+ };
1427
+
1428
+ return {
1429
+ success: true,
1430
+ folderPath: folderRelPath,
1431
+ folderName: basename(targetPath),
1432
+ fileCount: files.length,
1433
+ directoryCount: directories.length,
1434
+ totalSize,
1435
+ totalLines,
1436
+ truncated: fileCount >= maxFiles,
1437
+ files,
1438
+ directories,
1439
+ treeDisplay: buildTreeDisplay(), // Pre-formatted tree for display
1440
+ workspaceRoot,
1441
+ };
1442
+ } catch (err) {
1443
+ console.error("[Server] Folder tree error:", err);
1444
+ return { success: false, error: err.message };
1445
+ }
1446
+ });
1447
+
1448
+ // ============================================
1449
+ // WebSocket for Chat Streaming
1450
+ // ============================================
1451
+
1452
+ // Track all connected WebSocket clients for broadcasting
1453
+ const connectedClients = new Set();
1454
+
1455
+ // Last synced state from frontend (so newly connected Ink clients get initial state)
1456
+ let lastSyncedChatId = null;
1457
+ let lastSyncedModel = null;
1458
+
1459
+ // Broadcast CLI logs to all connected frontends (when not silent)
1460
+ function broadcastLog(level, args) {
1461
+ if (connectedClients.size === 0) return;
1462
+ const parts = Array.isArray(args) ? args : [args];
1463
+ const message = parts
1464
+ .map((a) => (typeof a === "object" ? JSON.stringify(a) : String(a)))
1465
+ .join(" ");
1466
+ const payload = { time: Date.now(), level, message };
1467
+ const msgStr = JSON.stringify({ type: "log", payload });
1468
+ for (const client of connectedClients) {
1469
+ if (client.readyState === 1) client.send(msgStr);
1470
+ }
1471
+ }
1472
+ logSink.write = (level, args) => {
1473
+ broadcastLog(level, args);
1474
+ };
1475
+
1476
+ // Broadcast to ALL local clients (used when handling frontend-originated messages so CLI Ink sees the stream)
1477
+ const broadcastToAllLocalClients = (message) => {
1478
+ const messageStr =
1479
+ typeof message === "string" ? message : JSON.stringify(message);
1480
+ for (const client of connectedClients) {
1481
+ if (client.readyState === 1) client.send(messageStr);
1482
+ }
1483
+ };
1484
+
1485
+ // Helper function to broadcast to all clients except sender
1486
+ // Defined here so it's available inside the WebSocket handler
1487
+ const broadcastToOthers = (senderSocket, message) => {
1488
+ const messageStr =
1489
+ typeof message === "string" ? message : JSON.stringify(message);
1490
+ // Only log non-text events to avoid spam (text events are very frequent)
1491
+ if (
1492
+ message.type &&
1493
+ !["text", "thinking", "tool_streaming"].includes(message.type)
1494
+ ) {
1495
+ console.log(
1496
+ `[WebSocket] Broadcasting ${message.type} to ${connectedClients.size - 1} other clients`,
1497
+ );
1498
+ }
1499
+ for (const client of connectedClients) {
1500
+ if (client !== senderSocket && client.readyState === 1) {
1501
+ // 1 = OPEN
1502
+ client.send(messageStr);
1503
+ }
1504
+ }
1505
+ };
1506
+
1507
+ fastify.register(async function (fastify) {
1508
+ fastify.get("/ws", { websocket: true }, (socket, req) => {
1509
+ serverLog(
1510
+ "WebSocket client connected, total clients:",
1511
+ connectedClients.size + 1,
1512
+ );
1513
+
1514
+ // Add to connected clients for broadcasting
1515
+ connectedClients.add(socket);
1516
+
1517
+ // Send last synced state to this client so Ink reflects remote frontend on connect
1518
+ if (socket.readyState === 1) {
1519
+ if (lastSyncedChatId != null) {
1520
+ socket.send(
1521
+ JSON.stringify({ type: "chat_selected", chatId: lastSyncedChatId }),
1522
+ );
1523
+ }
1524
+ if (lastSyncedModel) {
1525
+ socket.send(
1526
+ JSON.stringify({ type: "model_changed", model: lastSyncedModel }),
1527
+ );
1528
+ }
1529
+ }
1530
+
1531
+ // Connection abstraction: same interface for local socket or remote (backend) relay.
1532
+ // When in remote mode (otherwise connect), also forward to backend so frontend at otherwise.ai sees CLI activity.
1533
+ //
1534
+ // IMPORTANT: send+broadcast both relay, so using BOTH causes the relay to
1535
+ // receive each event TWICE → remote frontends create duplicate messages.
1536
+ // Use sendLocal/broadcastLocal for web-originated events where the
1537
+ // originating frontend already persists to Supabase.
1538
+ const conn = {
1539
+ send: (msg) => {
1540
+ if (socket.readyState === 1) socket.send(JSON.stringify(msg));
1541
+ if (remoteRelaySend) {
1542
+ try {
1543
+ remoteRelaySend(msg);
1544
+ } catch (e) {
1545
+ serverLog("[WebSocket] Remote relay send error:", e);
1546
+ }
1547
+ }
1548
+ },
1549
+ broadcast: (msg) => {
1550
+ broadcastToOthers(socket, msg);
1551
+ if (remoteRelaySend) {
1552
+ try {
1553
+ remoteRelaySend(msg);
1554
+ } catch (e) {
1555
+ serverLog("[WebSocket] Remote relay broadcast error:", e);
1556
+ }
1557
+ }
1558
+ },
1559
+ sendLocal: (msg) => {
1560
+ if (socket.readyState === 1) socket.send(JSON.stringify(msg));
1561
+ },
1562
+ broadcastLocal: (msg) => {
1563
+ broadcastToOthers(socket, msg);
1564
+ },
1565
+ };
1566
+
1567
+ // CRITICAL FIX: Track generation state PER-CHAT for concurrent support
1568
+ // Using Maps instead of single variables allows multiple chats to generate simultaneously
1569
+ const activeGenerations = new Map(); // chatId -> { abortController, isGenerating }
1570
+
1571
+ // Factory set here so connectToBackend after listen() can use it before any local client connects.
1572
+ function createWsMessageHandler(conn, activeGenerations) {
1573
+ return async (rawMessage) => {
1574
+ try {
1575
+ const message =
1576
+ typeof rawMessage === "string"
1577
+ ? JSON.parse(rawMessage)
1578
+ : JSON.parse(rawMessage.toString());
1579
+
1580
+ // Remote CLI: respond to config request (backend relay → frontend needs api keys + models from CLI)
1581
+ if (message.type === "get_cli_config") {
1582
+ try {
1583
+ const publicConfig = getPublicConfig();
1584
+ let ollamaModels = [];
1585
+ try {
1586
+ const { listOllamaModels } =
1587
+ await import("./inference/ollama.js");
1588
+ const raw = await listOllamaModels(config);
1589
+ ollamaModels = raw.map((m) => {
1590
+ const name = (m.name || "").toLowerCase();
1591
+ const type = [];
1592
+ if (
1593
+ /deepseek-r1|deepseek-v3|qwq|qwen3|gpt-oss|thinking|reason/.test(
1594
+ name,
1595
+ )
1596
+ )
1597
+ type.push("reasoning");
1598
+ if (/llava|vision|bakllava|moondream/.test(name))
1599
+ type.push("image-input");
1600
+ if (
1601
+ /codellama|codegemma|starcoder|deepseek-coder/.test(name)
1602
+ )
1603
+ type.push("code");
1604
+ return {
1605
+ id: m.id,
1606
+ name:
1607
+ m.name?.replace(/:latest$/, "").split(":")[0] ||
1608
+ m.id.replace(/^ollama:/, ""),
1609
+ provider: "Ollama",
1610
+ type: type.length ? type : [""],
1611
+ size: m.size,
1612
+ };
1613
+ });
1614
+ } catch (e) {
1615
+ serverLog(
1616
+ "[WebSocket] Could not list Ollama models for get_cli_config:",
1617
+ e?.message,
1618
+ );
1619
+ }
1620
+ let openRouterModels = [];
1621
+ if (publicConfig.apiKeys?.openrouter) {
1622
+ try {
1623
+ const { fetchOpenRouterModels } =
1624
+ await import("./inference/openrouter.js");
1625
+ const key = config.get("apiKeys.openrouter");
1626
+ if (key)
1627
+ openRouterModels = await fetchOpenRouterModels(key);
1628
+ } catch (e) {
1629
+ serverLog(
1630
+ "[WebSocket] Could not fetch OpenRouter models for get_cli_config:",
1631
+ e?.message,
1632
+ );
1633
+ }
1634
+ }
1635
+ conn.send({
1636
+ type: "cli_config",
1637
+ apiKeys: publicConfig.apiKeys || {},
1638
+ ollamaModels,
1639
+ openRouterModels,
1640
+ browserChannel: publicConfig.browserChannel ?? null,
1641
+ model: config.get("model") || "claude-sonnet-4-20250514",
1642
+ });
1643
+ } catch (e) {
1644
+ serverLog("[WebSocket] get_cli_config error:", e?.message);
1645
+ }
1646
+ return;
1647
+ }
1648
+
1649
+ // Handle chat selection sync (broadcast to other clients)
1650
+ if (message.type === "select_chat") {
1651
+ const { chatId } = message;
1652
+ serverLog("[WebSocket] Chat selected:", chatId);
1653
+ lastSyncedChatId = chatId;
1654
+
1655
+ // Broadcast to all OTHER connected clients
1656
+ conn.broadcast({
1657
+ type: "chat_selected",
1658
+ chatId: chatId,
1659
+ });
1660
+ return;
1661
+ }
1662
+
1663
+ // Handle model change sync (broadcast to other clients)
1664
+ if (message.type === "select_model") {
1665
+ const { model } = message;
1666
+ serverLog("[WebSocket] Model selected:", model);
1667
+ lastSyncedModel = model;
1668
+
1669
+ // Save to config
1670
+ config.set("model", model);
1671
+
1672
+ // Broadcast to all OTHER connected clients
1673
+ conn.broadcast({
1674
+ type: "model_changed",
1675
+ model: model,
1676
+ });
1677
+ return;
1678
+ }
1679
+
1680
+ if (message.type === "stop") {
1681
+ // Handle stop request - stop ALL active generations or specific chat
1682
+ const targetChatId = message.chatId;
1683
+ serverLog(
1684
+ "[WebSocket] Stop requested",
1685
+ targetChatId ? `for chat ${targetChatId}` : "for all chats",
1686
+ );
1687
+
1688
+ if (targetChatId && activeGenerations.has(targetChatId)) {
1689
+ // Stop specific chat
1690
+ const gen = activeGenerations.get(targetChatId);
1691
+ gen.abortController?.abort();
1692
+ activeGenerations.delete(targetChatId);
1693
+ const stoppedMsg = { type: "stopped", chatId: targetChatId };
1694
+ conn.send(stoppedMsg);
1695
+ conn.broadcast(stoppedMsg);
1696
+ } else {
1697
+ // Stop all active generations
1698
+ for (const [chatId, gen] of activeGenerations) {
1699
+ gen.abortController?.abort();
1700
+ const stoppedMsg = { type: "stopped", chatId };
1701
+ conn.send(stoppedMsg);
1702
+ conn.broadcast(stoppedMsg);
1703
+ }
1704
+ activeGenerations.clear();
1705
+ }
1706
+ return;
1707
+ }
1708
+
1709
+ if (message.type === "chat") {
1710
+ const payload = message.payload || message;
1711
+ const {
1712
+ chatId,
1713
+ content,
1714
+ images,
1715
+ webSearch,
1716
+ systemMessage,
1717
+ model,
1718
+ accessToken,
1719
+ frontendApiKeys,
1720
+ userMsgId: frontendUserMsgId,
1721
+ assistantMsgId: frontendAssistantMsgId,
1722
+ history: frontendHistory,
1723
+ } = payload;
1724
+
1725
+ // Persist accessToken on connection so regen and other handlers can write to Supabase only when user is logged in
1726
+ if (accessToken) conn.lastAccessToken = accessToken;
1727
+
1728
+ // Get or create chat; resolve cloud UUID from frontend to local id when CLI writes to Supabase
1729
+ const db = getDb();
1730
+ let actualChatId = null;
1731
+ let cloudIdForClient = null; // ID we send to frontend (cloud UUID when synced, else local id)
1732
+
1733
+ // Create abort controller for this specific chat's generation
1734
+ const abortController = new AbortController();
1735
+
1736
+ // Browser frontends set frontendUserMsgId; Ink terminal does not.
1737
+ const senderIsBrowser = !!frontendUserMsgId;
1738
+
1739
+ if (chatId == null) {
1740
+ actualChatId = dbCreateChat(null, null);
1741
+ serverLog(
1742
+ "[WebSocket] Created new chat (local SQLite) ID:",
1743
+ actualChatId,
1744
+ );
1745
+ cloudIdForClient = actualChatId;
1746
+ conn.sendLocal({ type: "chat_created", chatId: cloudIdForClient });
1747
+ broadcastToAllLocalClients({
1748
+ type: "chat_created",
1749
+ chatId: cloudIdForClient,
1750
+ });
1751
+ } else if (typeof chatId === "string") {
1752
+ actualChatId = getLocalChatIdByCloudId(chatId);
1753
+ if (actualChatId == null) {
1754
+ actualChatId = dbCreateChat(null, chatId);
1755
+ serverLog(
1756
+ "[WebSocket] Adopted cloud chat (local):",
1757
+ actualChatId,
1758
+ );
1759
+ }
1760
+ cloudIdForClient = actualChatId;
1761
+ broadcastToAllLocalClients({
1762
+ type: "chat_created",
1763
+ chatId: cloudIdForClient,
1764
+ });
1765
+ } else {
1766
+ actualChatId = String(chatId);
1767
+ cloudIdForClient = actualChatId;
1768
+ broadcastToAllLocalClients({
1769
+ type: "chat_created",
1770
+ chatId: cloudIdForClient,
1771
+ });
1772
+ }
1773
+
1774
+ // Always broadcast user_message to local clients so the Ink UI
1775
+ // reflects messages sent from any frontend (local browser or remote).
1776
+ broadcastToAllLocalClients({
1777
+ type: "user_message",
1778
+ chatId: cloudIdForClient,
1779
+ content: content,
1780
+ images: images,
1781
+ userMsgId: frontendUserMsgId || undefined,
1782
+ assistantMsgId: frontendAssistantMsgId || undefined,
1783
+ });
1784
+
1785
+ // Web search is now handled by the LLM via the web_search tool
1786
+ // This gives the LLM full control with proper context (user location, current time)
1787
+ let searchData = null;
1788
+
1789
+ // CRITICAL FIX: Check if this exact user message already exists (prevents duplicates on reconnection)
1790
+ const existingUserMsg = db
1791
+ .prepare(
1792
+ `
1793
+ SELECT id FROM messages
1794
+ WHERE chat_id = ? AND role = 'user' AND content = ?
1795
+ ORDER BY created_at DESC LIMIT 1
1796
+ `,
1797
+ )
1798
+ .get(actualChatId, content);
1799
+
1800
+ if (existingUserMsg) {
1801
+ serverLog(
1802
+ "[WebSocket] User message already exists (id:",
1803
+ existingUserMsg.id,
1804
+ ") - skipping duplicate insert",
1805
+ );
1806
+ } else {
1807
+ // Save user message (with original content, not enhanced)
1808
+ db.prepare(
1809
+ `
1810
+ INSERT INTO messages (chat_id, role, content, metadata) VALUES (?, ?, ?, ?)
1811
+ `,
1812
+ ).run(
1813
+ actualChatId,
1814
+ "user",
1815
+ content,
1816
+ images ? JSON.stringify({ images }) : null,
1817
+ );
1818
+ // NOTE: Frontend is the single Supabase writer — CLI only persists to local SQLite.
1819
+ }
1820
+
1821
+ // Use frontend-provided branch-ordered history when available.
1822
+ // This ensures the LLM sees only messages from the current branch,
1823
+ // not interleaved messages from all branches (which SQLite ORDER BY created_at gives).
1824
+ let history;
1825
+ if (Array.isArray(frontendHistory) && frontendHistory.length > 0) {
1826
+ history = frontendHistory
1827
+ .filter((m) => m.role && m.content)
1828
+ .map((msg) => {
1829
+ if (
1830
+ msg.role === "assistant" &&
1831
+ msg.content?.includes("<tool_call>")
1832
+ ) {
1833
+ return { ...msg, content: cleanResponseText(msg.content) };
1834
+ }
1835
+ return msg;
1836
+ });
1837
+ } else {
1838
+ const rawHistory = db
1839
+ .prepare(
1840
+ `SELECT role, content FROM messages WHERE chat_id = ? ORDER BY created_at ASC`,
1841
+ )
1842
+ .all(actualChatId);
1843
+ history = rawHistory.map((msg) => {
1844
+ if (
1845
+ msg.role === "assistant" &&
1846
+ msg.content &&
1847
+ msg.content.includes("<tool_call>")
1848
+ ) {
1849
+ console.warn(
1850
+ "[WebSocket] ⚠️ Found tool_call XML in history, cleaning...",
1851
+ );
1852
+ return {
1853
+ ...msg,
1854
+ content: cleanResponseText(msg.content),
1855
+ };
1856
+ }
1857
+ return msg;
1858
+ });
1859
+ }
1860
+
1861
+ // CRITICAL: Register this chat's generation BEFORE starting
1862
+ // This allows multiple chats to generate concurrently
1863
+ activeGenerations.set(actualChatId, {
1864
+ abortController,
1865
+ isGenerating: true,
1866
+ });
1867
+ serverLog(
1868
+ "[WebSocket] Starting generation for chat:",
1869
+ actualChatId,
1870
+ "Active generations:",
1871
+ activeGenerations.size,
1872
+ );
1873
+
1874
+ // Run agent and stream response
1875
+ let fullResponse = "";
1876
+ let thinkingContent = ""; // Track thinking/reasoning tokens for reasoning models
1877
+
1878
+ // Track generation stats for tps calculation
1879
+ const generationStartTime = Date.now();
1880
+ let tokenCount = 0; // Fallback: chunk counting (inaccurate)
1881
+ let realUsageStats = null; // Real usage from API (accurate)
1882
+
1883
+ // Track tool calls for persistence (so they show on reload)
1884
+ const toolCalls = [];
1885
+
1886
+ // Calculate message index for this assistant response (for file snapshots)
1887
+ // The assistant message will be at this index once generated
1888
+ const messageCount =
1889
+ db
1890
+ .prepare(
1891
+ "SELECT COUNT(*) as count FROM messages WHERE chat_id = ?",
1892
+ )
1893
+ .get(actualChatId)?.count || 0;
1894
+ const assistantMessageIndex = messageCount; // 0-indexed: if there's 1 message (user), assistant is index 1
1895
+
1896
+ // Create snapshot function for undo on regeneration
1897
+ // This captures file state BEFORE write_file/edit_file modifies it
1898
+ const snapshotFn = (snapshot, toolCallId = null) => {
1899
+ serverLog(
1900
+ "[WebSocket] Capturing file snapshot for undo:",
1901
+ snapshot.path,
1902
+ "existed:",
1903
+ snapshot.existed,
1904
+ );
1905
+ saveFileSnapshot(
1906
+ actualChatId,
1907
+ assistantMessageIndex,
1908
+ toolCallId,
1909
+ snapshot.path,
1910
+ snapshot.content,
1911
+ snapshot.existed,
1912
+ snapshot.createdDir ?? null,
1913
+ );
1914
+ };
1915
+
1916
+ const shellUndoFn = (entry, toolCallId = null) => {
1917
+ serverLog(
1918
+ "[WebSocket] Capturing shell undo:",
1919
+ entry.op,
1920
+ entry.path || entry.path_dest,
1921
+ );
1922
+ saveShellUndo(
1923
+ actualChatId,
1924
+ assistantMessageIndex,
1925
+ toolCallId,
1926
+ entry,
1927
+ );
1928
+ };
1929
+
1930
+ // Agent options - LLM handles all web searches via tools
1931
+ const agentOptions = { snapshotFn, shellUndoFn };
1932
+
1933
+ // Pass custom system message and model through config if provided
1934
+ const agentConfig = { ...config.store };
1935
+ if (systemMessage) {
1936
+ agentConfig.customSystemMessage = systemMessage;
1937
+ }
1938
+ // Use model from frontend if provided, otherwise fall back to config default
1939
+ if (model) {
1940
+ agentConfig.model = model;
1941
+ }
1942
+ // Pass user's access token so tools (e.g. search_memory) can search Supabase
1943
+ if (conn.lastAccessToken) {
1944
+ agentConfig.accessToken = conn.lastAccessToken;
1945
+ }
1946
+
1947
+ // Merge frontend API keys: CLI keys take priority, frontend keys fill gaps
1948
+ if (frontendApiKeys && typeof frontendApiKeys === "object") {
1949
+ const mergedKeys = { ...(agentConfig.apiKeys || {}) };
1950
+ for (const [provider, key] of Object.entries(frontendApiKeys)) {
1951
+ if (key && typeof key === "string" && !mergedKeys[provider]) {
1952
+ mergedKeys[provider] = key.trim();
1953
+ serverLog(
1954
+ "[WebSocket] Using frontend API key for provider:",
1955
+ provider,
1956
+ );
1957
+ }
1958
+ }
1959
+ agentConfig.apiKeys = mergedKeys;
1960
+ }
1961
+
1962
+ // ============================================
1963
+ // RAG Document Detection: Find @document mentions
1964
+ // The agent will use the rag_search tool to search
1965
+ // ============================================
1966
+ let matchedRagDocuments = [];
1967
+
1968
+ // Detect @mentions in the content (pattern: @DocumentName or @"Document Name")
1969
+ const ragMentionPattern = /@(\w+[\w\s-]*|\".+?\")/g;
1970
+ const potentialMentions = content.match(ragMentionPattern);
1971
+
1972
+ if (potentialMentions && potentialMentions.length > 0) {
1973
+ serverLog(
1974
+ "[WebSocket] Potential RAG mentions found:",
1975
+ potentialMentions,
1976
+ );
1977
+
1978
+ // Get all RAG documents to match against mentions
1979
+ const allRagDocs = getAllRagDocuments();
1980
+
1981
+ if (allRagDocs.length > 0) {
1982
+ // Find matching documents (case-insensitive)
1983
+ for (const mention of potentialMentions) {
1984
+ // Remove @ and quotes from mention
1985
+ const mentionName = mention
1986
+ .substring(1)
1987
+ .replace(/^"|"$/g, "")
1988
+ .toLowerCase();
1989
+ const matchingDoc = allRagDocs.find(
1990
+ (doc) =>
1991
+ doc.name.toLowerCase() === mentionName ||
1992
+ doc.name.toLowerCase().includes(mentionName) ||
1993
+ mentionName.includes(doc.name.toLowerCase()),
1994
+ );
1995
+ if (
1996
+ matchingDoc &&
1997
+ !matchedRagDocuments.some((d) => d.id === matchingDoc.id)
1998
+ ) {
1999
+ matchedRagDocuments.push(matchingDoc);
2000
+ }
2001
+ }
2002
+
2003
+ if (matchedRagDocuments.length > 0) {
2004
+ serverLog(
2005
+ "[WebSocket] Matched RAG documents:",
2006
+ matchedRagDocuments.map((d) => d.name),
2007
+ );
2008
+
2009
+ broadcastToAllLocalClients({
2010
+ type: "rag_documents_detected",
2011
+ chatId: actualChatId,
2012
+ documents: matchedRagDocuments.map((d) => ({
2013
+ id: d.id,
2014
+ name: d.name,
2015
+ })),
2016
+ });
2017
+ }
2018
+ }
2019
+ }
2020
+
2021
+ // Pass matched RAG documents to agent options
2022
+ // The agent will use rag_search tool to search them
2023
+ agentOptions.ragDocuments = matchedRagDocuments;
2024
+
2025
+ try {
2026
+ // Check if this is an image generation model - they need special handling
2027
+ // Image models should bypass the agent and go directly to inference
2028
+ const effectiveModel = agentConfig.model || config.store.model;
2029
+ const isImageGenModel = isImageModel(effectiveModel);
2030
+
2031
+ serverLog(
2032
+ "[WebSocket] Model:",
2033
+ effectiveModel,
2034
+ "isImageModel:",
2035
+ isImageGenModel,
2036
+ );
2037
+ if (matchedRagDocuments.length > 0) {
2038
+ serverLog(
2039
+ "[WebSocket] RAG documents passed to agent:",
2040
+ matchedRagDocuments.map((d) => d.name),
2041
+ );
2042
+ }
2043
+
2044
+ // Choose the appropriate generator based on model type
2045
+ // Include images in the user message for vision-capable models (GPT-5, etc.) and image gen
2046
+ const userMessageWithImages = images?.length
2047
+ ? { role: "user", content, images }
2048
+ : { role: "user", content };
2049
+ const generator = isImageGenModel
2050
+ ? streamInference(
2051
+ effectiveModel,
2052
+ [userMessageWithImages],
2053
+ "",
2054
+ agentConfig,
2055
+ )
2056
+ : runAgent(content, history.slice(0, -1), agentConfig, {
2057
+ ...agentOptions,
2058
+ images: images || [],
2059
+ });
2060
+
2061
+ for await (const chunk of generator) {
2062
+ // Check if THIS chat's generation was aborted (not other chats)
2063
+ const thisGen = activeGenerations.get(actualChatId);
2064
+ if (
2065
+ !thisGen ||
2066
+ !thisGen.isGenerating ||
2067
+ thisGen.abortController?.signal.aborted
2068
+ ) {
2069
+ serverLog(
2070
+ "[WebSocket] Generation aborted for chat:",
2071
+ actualChatId,
2072
+ );
2073
+ break;
2074
+ }
2075
+
2076
+ // CRITICAL: Include chatId in ALL messages so client can route correctly
2077
+ // This prevents tokens from one chat leaking into another
2078
+ let msgWithChatId = { ...chunk, chatId: actualChatId };
2079
+ // For image chunks: do NOT send huge base64 over WebSocket (can exceed message limits).
2080
+ // Save to disk and send a small image_url so the frontend can load the image via GET.
2081
+ if (chunk.type === "image") {
2082
+ console.log(
2083
+ "[WebSocket] 🖼️ Received image chunk, content length:",
2084
+ chunk.content?.length,
2085
+ );
2086
+ const mimeType = chunk.mimeType || "image/png";
2087
+ let imageUrl = null;
2088
+ try {
2089
+ const imagesDir = join(process.cwd(), "generated_images");
2090
+ if (!existsSync(imagesDir))
2091
+ mkdirSync(imagesDir, { recursive: true });
2092
+ const filename = `gemini_${Date.now()}.png`;
2093
+ const filePath = join(imagesDir, filename);
2094
+ writeFileSync(
2095
+ filePath,
2096
+ Buffer.from(chunk.content, "base64"),
2097
+ );
2098
+ imageUrl = `/api/generated-images/${filename}`;
2099
+ console.log(
2100
+ "[WebSocket] 🖼️ Image saved, sending image_url:",
2101
+ imageUrl,
2102
+ );
2103
+ } catch (err) {
2104
+ console.error("[WebSocket] Failed to save image:", err);
2105
+ }
2106
+ if (imageUrl) {
2107
+ msgWithChatId = {
2108
+ type: "image_url",
2109
+ content: imageUrl,
2110
+ chatId: actualChatId,
2111
+ };
2112
+ } else {
2113
+ continue;
2114
+ }
2115
+ }
2116
+
2117
+ // Send to ALL local clients (sender + Ink terminal) without relaying.
2118
+ // The originating frontend persists to Supabase; relaying would cause
2119
+ // Azure-connected frontends to create duplicate assistant messages.
2120
+ broadcastToAllLocalClients(msgWithChatId);
2121
+
2122
+ if (chunk.type === "usage") {
2123
+ // Real usage stats from API - use these instead of chunk counting
2124
+ realUsageStats = {
2125
+ inputTokens: chunk.inputTokens || 0,
2126
+ outputTokens: chunk.outputTokens || 0,
2127
+ totalTokens: chunk.totalTokens || 0,
2128
+ thinkingTokens: chunk.thinkingTokens || 0,
2129
+ };
2130
+ serverLog(
2131
+ "[WebSocket] Real usage stats received:",
2132
+ realUsageStats,
2133
+ );
2134
+ } else if (chunk.type === "thinking") {
2135
+ // Track thinking/reasoning tokens from reasoning models (Gemini, Grok)
2136
+ thinkingContent += chunk.content;
2137
+ tokenCount++; // Fallback chunk counting
2138
+ serverLog(
2139
+ "[WebSocket] Thinking content now:",
2140
+ thinkingContent.length,
2141
+ "chars",
2142
+ );
2143
+ } else if (chunk.type === "text") {
2144
+ fullResponse += chunk.content;
2145
+ tokenCount++; // Fallback chunk counting
2146
+ // DEBUG: Log when fullResponse grows significantly
2147
+ if (
2148
+ fullResponse.length % 100 === 0 ||
2149
+ chunk.content.includes("<tool")
2150
+ ) {
2151
+ serverLog(
2152
+ "[WebSocket] fullResponse now:",
2153
+ fullResponse.length,
2154
+ "chars, last chunk:",
2155
+ chunk.content.substring(0, 50),
2156
+ );
2157
+ }
2158
+ } else if (
2159
+ chunk.type === "image" ||
2160
+ chunk.type === "image_url"
2161
+ ) {
2162
+ // Image: for 'image' we already saved and forwarded as image_url above. Build fullResponse from URL.
2163
+ const imageContent =
2164
+ chunk.type === "image_url"
2165
+ ? chunk.content
2166
+ : msgWithChatId.content;
2167
+ if (imageContent) {
2168
+ const imageMarkdown = `\n\n![Generated Image](${imageContent})\n\n`;
2169
+ fullResponse += imageMarkdown;
2170
+ tokenCount += chunk.type === "image_url" ? 10 : 100;
2171
+ serverLog(
2172
+ "[WebSocket] Image (URL) added to response:",
2173
+ imageContent.substring(0, 60),
2174
+ );
2175
+ }
2176
+ } else if (chunk.type === "tool_start") {
2177
+ // Track tool call start for persistence
2178
+ // Calculate content position, adjusting to nearest word boundary to avoid splitting words
2179
+ const cleanedSoFar = cleanResponseText(fullResponse);
2180
+ let contentPosition = cleanedSoFar.length;
2181
+
2182
+ // Find the nearest word boundary (previous space or newline)
2183
+ // This prevents tool displays from appearing in the middle of words
2184
+ if (contentPosition > 0 && cleanedSoFar.length > 0) {
2185
+ const lastChar = cleanedSoFar[cleanedSoFar.length - 1];
2186
+ if (lastChar && !/\s/.test(lastChar)) {
2187
+ // We're in the middle of a word - find the last whitespace
2188
+ const lastSpaceIdx = Math.max(
2189
+ cleanedSoFar.lastIndexOf(" "),
2190
+ cleanedSoFar.lastIndexOf("\n"),
2191
+ cleanedSoFar.lastIndexOf("\t"),
2192
+ );
2193
+ if (lastSpaceIdx > 0) {
2194
+ contentPosition = lastSpaceIdx + 1; // Position after the space
2195
+ }
2196
+ }
2197
+ }
2198
+
2199
+ toolCalls.push({
2200
+ id: chunk.callId,
2201
+ tool: chunk.name,
2202
+ params: chunk.args || {},
2203
+ status: "running",
2204
+ startTime: Date.now(),
2205
+ contentPosition,
2206
+ });
2207
+ } else if (
2208
+ chunk.type === "tool_result" ||
2209
+ chunk.type === "tool_error"
2210
+ ) {
2211
+ // Update tool call with result
2212
+ const toolIndex = chunk.callId
2213
+ ? toolCalls.findIndex((t) => t.id === chunk.callId)
2214
+ : toolCalls.findIndex(
2215
+ (t) =>
2216
+ t.tool === chunk.name && t.status === "running",
2217
+ );
2218
+ if (toolIndex !== -1) {
2219
+ toolCalls[toolIndex] = {
2220
+ ...toolCalls[toolIndex],
2221
+ status:
2222
+ chunk.type === "tool_error" ? "error" : "complete",
2223
+ result: chunk.result,
2224
+ error: chunk.error,
2225
+ endTime: Date.now(),
2226
+ };
2227
+
2228
+ // Capture search data from web_search tool for persistence
2229
+ if (
2230
+ chunk.name === "web_search" &&
2231
+ chunk.result?.results
2232
+ ) {
2233
+ searchData = {
2234
+ success: true,
2235
+ results: chunk.result.results,
2236
+ count: chunk.result.results?.length || 0,
2237
+ };
2238
+ }
2239
+ }
2240
+ }
2241
+ }
2242
+
2243
+ // Only save if THIS chat was not aborted and we have content
2244
+ const thisGenFinal = activeGenerations.get(actualChatId);
2245
+ if (thisGenFinal?.isGenerating && fullResponse.length > 0) {
2246
+ // DEBUG: Check if large content will be stripped
2247
+ const willStripLarge = fullResponse.length > 200;
2248
+
2249
+ // Clean the response (remove tool_call XML blocks)
2250
+ const cleanedResponse = cleanResponseText(
2251
+ fullResponse,
2252
+ willStripLarge,
2253
+ );
2254
+
2255
+ // DEBUG: Log if large content is being stripped
2256
+ if (
2257
+ fullResponse.length > 200 &&
2258
+ cleanedResponse.length < fullResponse.length * 0.3
2259
+ ) {
2260
+ console.warn(
2261
+ "[WebSocket] ⚠️ Large content being stripped!",
2262
+ );
2263
+ console.warn(
2264
+ "[WebSocket] fullResponse length:",
2265
+ fullResponse.length,
2266
+ "cleaned:",
2267
+ cleanedResponse.length,
2268
+ );
2269
+ console.warn(
2270
+ "[WebSocket] fullResponse preview:",
2271
+ fullResponse.substring(0, 500),
2272
+ );
2273
+ console.warn(
2274
+ "[WebSocket] fullResponse end:",
2275
+ fullResponse.substring(
2276
+ Math.max(0, fullResponse.length - 500),
2277
+ ),
2278
+ );
2279
+ }
2280
+
2281
+ // Calculate final generation stats
2282
+ // Use real API usage stats when available, fall back to character-based estimation
2283
+ // Token estimation: ~4 characters per token for English (more accurate than chunk counting)
2284
+ const elapsedSeconds =
2285
+ (Date.now() - generationStartTime) / 1000;
2286
+ const estimatedOutputTokens = Math.ceil(
2287
+ (fullResponse.length + thinkingContent.length) / 4,
2288
+ );
2289
+ // Include reasoning/thinking tokens in output count when API reports them separately (e.g. Gemini)
2290
+ const realOutput = realUsageStats?.outputTokens ?? 0;
2291
+ const realThinking = realUsageStats?.thinkingTokens ?? 0;
2292
+ const finalOutputTokens =
2293
+ realUsageStats != null
2294
+ ? realOutput + realThinking
2295
+ : estimatedOutputTokens;
2296
+ const finalInputTokens = realUsageStats?.inputTokens || 0;
2297
+ const tps =
2298
+ elapsedSeconds > 0
2299
+ ? Math.round((finalOutputTokens / elapsedSeconds) * 100) /
2300
+ 100
2301
+ : 0;
2302
+
2303
+ serverLog(
2304
+ "[WebSocket] Final stats - real:",
2305
+ !!realUsageStats,
2306
+ "outputTokens:",
2307
+ finalOutputTokens,
2308
+ "estimated:",
2309
+ estimatedOutputTokens,
2310
+ "tps:",
2311
+ tps,
2312
+ );
2313
+
2314
+ // Build metadata with model, stats, search results, tool calls, and thinking content
2315
+ const metadata = {
2316
+ model: agentConfig.model, // Use the actual model used for this generation
2317
+ tps,
2318
+ numTokens: finalOutputTokens,
2319
+ inputTokens: finalInputTokens, // Include input tokens for reference
2320
+ _searchData: searchData, // Include search results for display later
2321
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined, // Include tool calls for display on reload
2322
+ thinkingContent:
2323
+ thinkingContent.length > 0 ? thinkingContent : undefined, // Include thinking/reasoning content
2324
+ };
2325
+
2326
+ // CRITICAL FIX: Check if chat still exists before saving
2327
+ // User may have deleted the chat while generation was running
2328
+ const chatExists = db
2329
+ .prepare("SELECT id FROM chats WHERE id = ?")
2330
+ .get(actualChatId);
2331
+ if (!chatExists) {
2332
+ console.warn(
2333
+ "[WebSocket] Chat",
2334
+ actualChatId,
2335
+ "was deleted during generation - skipping save",
2336
+ );
2337
+ const deletedDoneMsg = {
2338
+ type: "done",
2339
+ chatId: actualChatId,
2340
+ tps,
2341
+ numTokens: finalOutputTokens,
2342
+ deleted: true, // Signal that chat was deleted
2343
+ };
2344
+ broadcastToAllLocalClients(deletedDoneMsg);
2345
+ } else {
2346
+ // CRITICAL FIX: Prevent duplicate assistant messages
2347
+ // Find the last user message and delete any assistant messages after it
2348
+ // This handles cases where generation ran twice (reconnection, race conditions)
2349
+ //
2350
+ // IMPORTANT: Use message `id` (auto-increment) instead of `created_at` for ordering!
2351
+ // SQLite's CURRENT_TIMESTAMP has second-level precision, so if two messages are
2352
+ // inserted within the same second, `created_at > ?` may not catch duplicates.
2353
+ // Using `id > ?` guarantees we catch ALL assistant messages inserted after the user message.
2354
+ const lastUserMsg = db
2355
+ .prepare(
2356
+ `
2357
+ SELECT id FROM messages
2358
+ WHERE chat_id = ? AND role = 'user'
2359
+ ORDER BY id DESC LIMIT 1
2360
+ `,
2361
+ )
2362
+ .get(actualChatId);
2363
+
2364
+ if (lastUserMsg) {
2365
+ const deleteResult = db
2366
+ .prepare(
2367
+ `
2368
+ DELETE FROM messages
2369
+ WHERE chat_id = ? AND role = 'assistant' AND id > ?
2370
+ `,
2371
+ )
2372
+ .run(actualChatId, lastUserMsg.id);
2373
+
2374
+ if (deleteResult.changes > 0) {
2375
+ serverLog(
2376
+ "[WebSocket] Deleted",
2377
+ deleteResult.changes,
2378
+ "duplicate assistant message(s) before saving new response",
2379
+ );
2380
+ }
2381
+ }
2382
+
2383
+ // Build rich context content with inline tool calls and results
2384
+ // Uses native XML format: <tool_call>...</tool_call><tool_result>...</tool_result>
2385
+ // This allows the AI to "remember" what tools it used and what they returned
2386
+ const richContent = buildRichContextContent(
2387
+ cleanedResponse,
2388
+ toolCalls,
2389
+ );
2390
+
2391
+ // Save assistant message with rich content and metadata
2392
+ db.prepare(
2393
+ `
2394
+ INSERT INTO messages (chat_id, role, content, metadata) VALUES (?, ?, ?, ?)
2395
+ `,
2396
+ ).run(
2397
+ actualChatId,
2398
+ "assistant",
2399
+ richContent,
2400
+ JSON.stringify(metadata),
2401
+ );
2402
+ // NOTE: Frontend is the single Supabase writer — CLI only persists to local SQLite.
2403
+ // Title is set by frontend via backend /api/generate-chat-title at end of message.
2404
+
2405
+ // Update chat timestamp
2406
+ db.prepare(
2407
+ "UPDATE chats SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
2408
+ ).run(actualChatId);
2409
+
2410
+ serverLog(
2411
+ "[WebSocket] Sending done for chat:",
2412
+ actualChatId,
2413
+ "tps:",
2414
+ tps,
2415
+ "tokens:",
2416
+ finalOutputTokens,
2417
+ );
2418
+ const doneChatId = actualChatId;
2419
+ // Include complete message so frontend doesn't need to reconstruct
2420
+ const doneMessage = {
2421
+ type: "done",
2422
+ chatId: doneChatId,
2423
+ tps,
2424
+ numTokens: finalOutputTokens,
2425
+ message: {
2426
+ role: "assistant",
2427
+ content: cleanedResponse, // Clean version for display
2428
+ fullContent: richContent, // Rich version with tool history for AI context
2429
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
2430
+ _searchData: searchData,
2431
+ thinkingContent:
2432
+ thinkingContent.length > 0
2433
+ ? thinkingContent
2434
+ : undefined,
2435
+ tps,
2436
+ numTokens: finalOutputTokens,
2437
+ inputTokens: finalInputTokens, // Include input tokens
2438
+ model: agentConfig.model, // Use the actual model used for this generation
2439
+ },
2440
+ };
2441
+ broadcastToAllLocalClients(doneMessage);
2442
+ }
2443
+ } else if (thisGenFinal?.isGenerating) {
2444
+ // Generation completed but with no text content (e.g., only tool calls, or max iterations reached)
2445
+ // Still need to send done event so frontend knows generation is finished
2446
+ const elapsedSeconds =
2447
+ (Date.now() - generationStartTime) / 1000;
2448
+ const finalOutputTokens =
2449
+ (realUsageStats?.outputTokens ?? 0) +
2450
+ (realUsageStats?.thinkingTokens ?? 0);
2451
+ const tps =
2452
+ elapsedSeconds > 0
2453
+ ? Math.round((finalOutputTokens / elapsedSeconds) * 100) /
2454
+ 100
2455
+ : 0;
2456
+
2457
+ serverLog(
2458
+ "[WebSocket] Sending done for chat (no content):",
2459
+ actualChatId,
2460
+ );
2461
+ const emptyDoneChatId = actualChatId;
2462
+ const emptyDoneMessage = {
2463
+ type: "done",
2464
+ chatId: emptyDoneChatId,
2465
+ tps,
2466
+ numTokens: finalOutputTokens,
2467
+ message: {
2468
+ role: "assistant",
2469
+ content: "",
2470
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
2471
+ _searchData: searchData,
2472
+ thinkingContent:
2473
+ thinkingContent.length > 0
2474
+ ? thinkingContent
2475
+ : undefined,
2476
+ tps,
2477
+ numTokens: finalOutputTokens,
2478
+ model: agentConfig.model,
2479
+ },
2480
+ };
2481
+ broadcastToAllLocalClients(emptyDoneMessage);
2482
+ }
2483
+
2484
+ // Clean up THIS chat's generation state (doesn't affect other chats)
2485
+ activeGenerations.delete(actualChatId);
2486
+ serverLog(
2487
+ "[WebSocket] Generation complete for chat:",
2488
+ actualChatId,
2489
+ "Remaining active:",
2490
+ activeGenerations.size,
2491
+ );
2492
+ } catch (err) {
2493
+ if (err.name === "AbortError") {
2494
+ serverLog(
2495
+ "[WebSocket] Generation aborted by user for chat:",
2496
+ actualChatId,
2497
+ );
2498
+ } else {
2499
+ console.error("Agent error for chat:", actualChatId, err);
2500
+ const errorMsg = {
2501
+ type: "error",
2502
+ message: err.message,
2503
+ chatId: actualChatId,
2504
+ };
2505
+ broadcastToAllLocalClients(errorMsg);
2506
+ }
2507
+ // Clean up THIS chat's generation state
2508
+ activeGenerations.delete(actualChatId);
2509
+ }
2510
+ } else if (message.type === "regenerate") {
2511
+ // Handle regeneration request (retry/edit)
2512
+ const {
2513
+ chatId,
2514
+ message: userMessage,
2515
+ systemMessage: regenSystemMessage,
2516
+ model: regenModel,
2517
+ frontendApiKeys: regenFrontendApiKeys,
2518
+ revert: payloadRevert,
2519
+ } = message.payload || {};
2520
+ // Default true for backward compat; frontend sends revert: false when user chose "Continue without reverting"
2521
+ const shouldRevert = payloadRevert !== false;
2522
+ serverLog(
2523
+ "[WebSocket] Regeneration request for chat:",
2524
+ chatId,
2525
+ "revert:",
2526
+ shouldRevert,
2527
+ );
2528
+
2529
+ if (!chatId) {
2530
+ conn.send({ type: "error", message: "No chatId provided" });
2531
+ return;
2532
+ }
2533
+
2534
+ // Concurrency: reject duplicate regenerate for the same chat (Cursor-like reliability)
2535
+ if (activeGenerations.has(chatId)) {
2536
+ conn.send({
2537
+ type: "error",
2538
+ message:
2539
+ "A generation is already in progress for this chat. Wait for it to finish or stop it first.",
2540
+ });
2541
+ return;
2542
+ }
2543
+
2544
+ // Create abort controller for this regeneration - use per-chat tracking
2545
+ const regenAbortController = new AbortController();
2546
+ activeGenerations.set(chatId, {
2547
+ abortController: regenAbortController,
2548
+ isGenerating: true,
2549
+ });
2550
+
2551
+ const db = getDb();
2552
+
2553
+ // ============================================
2554
+ // STEP 1: Calculate assistant message index BEFORE any changes
2555
+ // This is the index of the assistant message being regenerated
2556
+ // ============================================
2557
+ const allMessagesForIndex = db
2558
+ .prepare(
2559
+ `
2560
+ SELECT id, role FROM messages WHERE chat_id = ? ORDER BY id ASC
2561
+ `,
2562
+ )
2563
+ .all(chatId);
2564
+
2565
+ // Find the index of the last assistant message (the one being regenerated)
2566
+ // If no assistant message exists, it will be created at the current count
2567
+ let regenMessageIndex = allMessagesForIndex.length;
2568
+ for (let i = allMessagesForIndex.length - 1; i >= 0; i--) {
2569
+ if (allMessagesForIndex[i].role === "assistant") {
2570
+ regenMessageIndex = i;
2571
+ break;
2572
+ }
2573
+ }
2574
+ serverLog(
2575
+ "[WebSocket] Regeneration at message index:",
2576
+ regenMessageIndex,
2577
+ );
2578
+
2579
+ // ============================================
2580
+ // STEP 2: Revert files BEFORE starting new generation (only when user chose to revert)
2581
+ // Undo any file changes from the message being regenerated
2582
+ // ============================================
2583
+ const snapshotsToRevert = shouldRevert
2584
+ ? getSnapshotsToRevert(chatId, regenMessageIndex)
2585
+ : [];
2586
+ const revertedFiles = [];
2587
+ const revertErrors = []; // Collect errors so we still revert as much as possible and inform user
2588
+
2589
+ if (snapshotsToRevert.length > 0) {
2590
+ serverLog(
2591
+ "[WebSocket] Reverting",
2592
+ snapshotsToRevert.length,
2593
+ "file snapshot(s) before regeneration",
2594
+ );
2595
+
2596
+ const safeRoot = resolve(process.cwd());
2597
+ for (const snapshot of snapshotsToRevert) {
2598
+ try {
2599
+ if (snapshot.file_existed) {
2600
+ // File existed before - restore original content
2601
+ writeFileSync(
2602
+ snapshot.file_path,
2603
+ snapshot.original_content,
2604
+ "utf-8",
2605
+ );
2606
+ serverLog(
2607
+ "[WebSocket] Restored file:",
2608
+ snapshot.file_path,
2609
+ );
2610
+ } else {
2611
+ // File was created by tool - delete it
2612
+ if (existsSync(snapshot.file_path)) {
2613
+ unlinkSync(snapshot.file_path);
2614
+ serverLog(
2615
+ "[WebSocket] Deleted created file:",
2616
+ snapshot.file_path,
2617
+ );
2618
+ }
2619
+ // If write_file created the parent dir, remove it when empty (Strategy 2)
2620
+ const createdDir = snapshot.created_dir || null;
2621
+ if (createdDir && existsSync(createdDir)) {
2622
+ try {
2623
+ if (readdirSync(createdDir).length === 0) {
2624
+ rmdirSync(createdDir);
2625
+ serverLog(
2626
+ "[WebSocket] Removed empty created dir:",
2627
+ createdDir,
2628
+ );
2629
+ // Remove empty parents up to safe root
2630
+ let parent = dirname(createdDir);
2631
+ while (
2632
+ parent &&
2633
+ parent !== safeRoot &&
2634
+ parent !== dirname(parent)
2635
+ ) {
2636
+ if (
2637
+ existsSync(parent) &&
2638
+ readdirSync(parent).length === 0
2639
+ ) {
2640
+ rmdirSync(parent);
2641
+ serverLog(
2642
+ "[WebSocket] Removed empty parent dir:",
2643
+ parent,
2644
+ );
2645
+ parent = dirname(parent);
2646
+ } else {
2647
+ break;
2648
+ }
2649
+ }
2650
+ }
2651
+ } catch (dirErr) {
2652
+ console.warn(
2653
+ "[WebSocket] Failed to remove created dir:",
2654
+ createdDir,
2655
+ dirErr.message,
2656
+ );
2657
+ revertErrors.push({
2658
+ path: createdDir,
2659
+ message: dirErr.message,
2660
+ op: "rmdir",
2661
+ });
2662
+ }
2663
+ }
2664
+ }
2665
+ revertedFiles.push(snapshot.file_path);
2666
+ } catch (revertErr) {
2667
+ console.warn(
2668
+ "[WebSocket] Failed to revert file:",
2669
+ snapshot.file_path,
2670
+ revertErr.message,
2671
+ );
2672
+ revertErrors.push({
2673
+ path: snapshot.file_path,
2674
+ message: revertErr.message,
2675
+ op: "file",
2676
+ });
2677
+ }
2678
+ }
2679
+
2680
+ // Delete the reverted snapshots from database (even if some reverts failed)
2681
+ deleteSnapshots(chatId, regenMessageIndex);
2682
+
2683
+ // Notify client of reverted files
2684
+ if (revertedFiles.length > 0) {
2685
+ const revertMsg = {
2686
+ type: "files_reverted",
2687
+ chatId,
2688
+ files: revertedFiles,
2689
+ count: revertedFiles.length,
2690
+ };
2691
+ conn.send(revertMsg);
2692
+ conn.broadcast(revertMsg);
2693
+ }
2694
+ }
2695
+
2696
+ // Revert shell commands (Strategy 1: undo mkdir, touch, cp, mv) — only when user chose to revert
2697
+ const shellUndosToRevert = shouldRevert
2698
+ ? getShellUndosToRevert(chatId, regenMessageIndex)
2699
+ : [];
2700
+ const revertedShellCount = [];
2701
+ if (shellUndosToRevert.length > 0) {
2702
+ serverLog(
2703
+ "[WebSocket] Reverting",
2704
+ shellUndosToRevert.length,
2705
+ "shell undo(s) before regeneration",
2706
+ );
2707
+ for (const entry of shellUndosToRevert) {
2708
+ try {
2709
+ if (entry.op === "mkdir" && entry.path) {
2710
+ if (existsSync(entry.path)) {
2711
+ try {
2712
+ if (readdirSync(entry.path).length === 0) {
2713
+ rmdirSync(entry.path);
2714
+ serverLog(
2715
+ "[WebSocket] Reverted mkdir:",
2716
+ entry.path,
2717
+ );
2718
+ revertedShellCount.push("mkdir");
2719
+ }
2720
+ } catch (e) {
2721
+ console.warn(
2722
+ "[WebSocket] Failed to rmdir:",
2723
+ entry.path,
2724
+ e.message,
2725
+ );
2726
+ revertErrors.push({
2727
+ op: entry.op,
2728
+ path: entry.path,
2729
+ message: e.message,
2730
+ });
2731
+ }
2732
+ }
2733
+ } else if (entry.op === "touch" && entry.path) {
2734
+ if (existsSync(entry.path)) {
2735
+ try {
2736
+ const st = statSync(entry.path);
2737
+ if (st.isFile()) {
2738
+ unlinkSync(entry.path);
2739
+ serverLog(
2740
+ "[WebSocket] Reverted touch:",
2741
+ entry.path,
2742
+ );
2743
+ revertedShellCount.push("touch");
2744
+ }
2745
+ } catch (e) {
2746
+ console.warn(
2747
+ "[WebSocket] Failed to unlink touch:",
2748
+ entry.path,
2749
+ e.message,
2750
+ );
2751
+ revertErrors.push({
2752
+ op: entry.op,
2753
+ path: entry.path,
2754
+ message: e.message,
2755
+ });
2756
+ }
2757
+ }
2758
+ } else if (entry.op === "cp" && entry.path_dest) {
2759
+ if (existsSync(entry.path_dest)) {
2760
+ try {
2761
+ const st = statSync(entry.path_dest);
2762
+ if (st.isDirectory()) {
2763
+ rmSync(entry.path_dest, { recursive: true });
2764
+ } else {
2765
+ unlinkSync(entry.path_dest);
2766
+ }
2767
+ serverLog(
2768
+ "[WebSocket] Reverted cp (removed dest):",
2769
+ entry.path_dest,
2770
+ );
2771
+ revertedShellCount.push("cp");
2772
+ } catch (e) {
2773
+ console.warn(
2774
+ "[WebSocket] Failed to unlink cp dest:",
2775
+ entry.path_dest,
2776
+ e.message,
2777
+ );
2778
+ revertErrors.push({
2779
+ op: entry.op,
2780
+ path: entry.path_dest,
2781
+ message: e.message,
2782
+ });
2783
+ }
2784
+ }
2785
+ } else if (
2786
+ entry.op === "mv" &&
2787
+ entry.path_src &&
2788
+ entry.path_dest
2789
+ ) {
2790
+ if (existsSync(entry.path_dest)) {
2791
+ try {
2792
+ renameSync(entry.path_dest, entry.path_src);
2793
+ serverLog(
2794
+ "[WebSocket] Reverted mv:",
2795
+ entry.path_dest,
2796
+ "->",
2797
+ entry.path_src,
2798
+ );
2799
+ revertedShellCount.push("mv");
2800
+ } catch (e) {
2801
+ console.warn(
2802
+ "[WebSocket] Failed to revert mv:",
2803
+ e.message,
2804
+ );
2805
+ revertErrors.push({
2806
+ op: entry.op,
2807
+ path: entry.path_dest,
2808
+ message: e.message,
2809
+ });
2810
+ }
2811
+ }
2812
+ }
2813
+ } catch (err) {
2814
+ console.warn(
2815
+ "[WebSocket] Failed to revert shell op:",
2816
+ entry.op,
2817
+ err.message,
2818
+ );
2819
+ revertErrors.push({
2820
+ op: entry.op,
2821
+ path: entry.path || entry.path_dest,
2822
+ message: err.message,
2823
+ });
2824
+ }
2825
+ }
2826
+ deleteShellUndos(chatId, regenMessageIndex);
2827
+ if (revertedShellCount.length > 0) {
2828
+ const shellRevertMsg = {
2829
+ type: "shell_reverted",
2830
+ chatId,
2831
+ count: revertedShellCount.length,
2832
+ };
2833
+ conn.send(shellRevertMsg);
2834
+ conn.broadcast(shellRevertMsg);
2835
+ }
2836
+ }
2837
+
2838
+ // Notify client of any revert errors (so user can see e.g. "1 file failed: permission denied")
2839
+ if (revertErrors.length > 0) {
2840
+ const errMsg = {
2841
+ type: "revert_errors",
2842
+ chatId,
2843
+ errors: revertErrors,
2844
+ count: revertErrors.length,
2845
+ };
2846
+ conn.send(errMsg);
2847
+ conn.broadcast(errMsg);
2848
+ }
2849
+
2850
+ // ============================================
2851
+ // STEP 3: Handle message cleanup and updates
2852
+ // ============================================
2853
+ const lastUserMsg = db
2854
+ .prepare(
2855
+ `
2856
+ SELECT id, content, metadata FROM messages
2857
+ WHERE chat_id = ? AND role = 'user'
2858
+ ORDER BY id DESC
2859
+ LIMIT 1
2860
+ `,
2861
+ )
2862
+ .get(chatId);
2863
+
2864
+ if (lastUserMsg) {
2865
+ // Check if the user message content was edited (compare with what frontend sent)
2866
+ // If userMessage differs from stored content, update the database
2867
+ if (userMessage && userMessage !== lastUserMsg.content) {
2868
+ db.prepare(
2869
+ `
2870
+ UPDATE messages SET content = ? WHERE id = ?
2871
+ `,
2872
+ ).run(userMessage, lastUserMsg.id);
2873
+ serverLog(
2874
+ "[WebSocket] Updated user message content for edit (id:",
2875
+ lastUserMsg.id,
2876
+ ")",
2877
+ );
2878
+ }
2879
+
2880
+ // Delete all assistant messages that came after the last user message
2881
+ // Using id > ? instead of created_at > ? for reliable ordering
2882
+ const deleteResult = db
2883
+ .prepare(
2884
+ `
2885
+ DELETE FROM messages
2886
+ WHERE chat_id = ? AND role = 'assistant' AND id > ?
2887
+ `,
2888
+ )
2889
+ .run(chatId, lastUserMsg.id);
2890
+
2891
+ if (deleteResult.changes > 0) {
2892
+ serverLog(
2893
+ "[WebSocket] Deleted",
2894
+ deleteResult.changes,
2895
+ "old assistant message(s) before regeneration",
2896
+ );
2897
+ }
2898
+ }
2899
+
2900
+ // Get chat history (EXCLUDING the last user message since runAgent appends it separately)
2901
+ // This prevents the user message from appearing twice in the conversation
2902
+ const allMessages = db
2903
+ .prepare(
2904
+ `
2905
+ SELECT role, content FROM messages WHERE chat_id = ? ORDER BY created_at ASC
2906
+ `,
2907
+ )
2908
+ .all(chatId);
2909
+
2910
+ // Remove the last user message from history (runAgent will add it back via userMessage param)
2911
+ const history =
2912
+ allMessages.length > 0 &&
2913
+ allMessages[allMessages.length - 1].role === "user"
2914
+ ? allMessages.slice(0, -1)
2915
+ : allMessages;
2916
+
2917
+ // ============================================
2918
+ // STEP 4: Set up snapshot function for new generation
2919
+ // ============================================
2920
+ const newMessageCount =
2921
+ db
2922
+ .prepare(
2923
+ "SELECT COUNT(*) as count FROM messages WHERE chat_id = ?",
2924
+ )
2925
+ .get(chatId)?.count || 0;
2926
+ const newAssistantMessageIndex = newMessageCount; // Where the new assistant message will be
2927
+
2928
+ const regenSnapshotFn = (snapshot) => {
2929
+ serverLog(
2930
+ "[WebSocket] Regen: Capturing file snapshot for undo:",
2931
+ snapshot.path,
2932
+ "existed:",
2933
+ snapshot.existed,
2934
+ );
2935
+ saveFileSnapshot(
2936
+ chatId,
2937
+ newAssistantMessageIndex,
2938
+ null,
2939
+ snapshot.path,
2940
+ snapshot.content,
2941
+ snapshot.existed,
2942
+ snapshot.createdDir ?? null,
2943
+ );
2944
+ };
2945
+
2946
+ // Run agent and stream response
2947
+ let fullResponse = "";
2948
+ let regenThinkingContent = ""; // Track thinking/reasoning tokens
2949
+
2950
+ // Track generation stats
2951
+ const regenStartTime = Date.now();
2952
+ let regenTokenCount = 0; // Fallback: chunk counting
2953
+ let regenUsageStats = null; // Real usage from API
2954
+
2955
+ // Track tool calls for persistence
2956
+ const regenToolCalls = [];
2957
+
2958
+ // Track search data from web_search tool
2959
+ let regenSearchData = null;
2960
+
2961
+ // Pass custom system message and model through config if provided
2962
+ const regenConfig = { ...config.store };
2963
+ if (regenSystemMessage) {
2964
+ regenConfig.customSystemMessage = regenSystemMessage;
2965
+ }
2966
+ // Use model from frontend if provided, otherwise fall back to config default
2967
+ if (regenModel) {
2968
+ regenConfig.model = regenModel;
2969
+ }
2970
+ // Pass user's access token so tools (e.g. search_memory) can search Supabase
2971
+ if (conn.lastAccessToken) {
2972
+ regenConfig.accessToken = conn.lastAccessToken;
2973
+ }
2974
+
2975
+ // Merge frontend API keys: CLI keys take priority, frontend keys fill gaps
2976
+ if (
2977
+ regenFrontendApiKeys &&
2978
+ typeof regenFrontendApiKeys === "object"
2979
+ ) {
2980
+ const mergedKeys = { ...(regenConfig.apiKeys || {}) };
2981
+ for (const [provider, key] of Object.entries(
2982
+ regenFrontendApiKeys,
2983
+ )) {
2984
+ if (key && typeof key === "string" && !mergedKeys[provider]) {
2985
+ mergedKeys[provider] = key.trim();
2986
+ serverLog(
2987
+ "[WebSocket] Regen: Using frontend API key for provider:",
2988
+ provider,
2989
+ );
2990
+ }
2991
+ }
2992
+ regenConfig.apiKeys = mergedKeys;
2993
+ }
2994
+
2995
+ try {
2996
+ // Check if this is an image generation model for regeneration
2997
+ const effectiveRegenModel =
2998
+ regenConfig.model || config.store.model;
2999
+ const isImageRegenModel = isImageModel(effectiveRegenModel);
3000
+
3001
+ serverLog(
3002
+ "[WebSocket] Regen Model:",
3003
+ effectiveRegenModel,
3004
+ "isImageModel:",
3005
+ isImageRegenModel,
3006
+ );
3007
+
3008
+ // Include images from the last user message for vision (e.g. regen after attaching image)
3009
+ const regenImages = (() => {
3010
+ try {
3011
+ const meta = lastUserMsg?.metadata
3012
+ ? JSON.parse(lastUserMsg.metadata)
3013
+ : {};
3014
+ return meta.images || [];
3015
+ } catch {
3016
+ return [];
3017
+ }
3018
+ })();
3019
+ const regenUserMessageWithImages = regenImages?.length
3020
+ ? { role: "user", content: userMessage, images: regenImages }
3021
+ : { role: "user", content: userMessage };
3022
+ const regenGenerator = isImageRegenModel
3023
+ ? streamInference(
3024
+ effectiveRegenModel,
3025
+ [regenUserMessageWithImages],
3026
+ "",
3027
+ regenConfig,
3028
+ )
3029
+ : runAgent(userMessage, history, regenConfig, {
3030
+ snapshotFn: regenSnapshotFn,
3031
+ images: regenImages,
3032
+ });
3033
+
3034
+ for await (const chunk of regenGenerator) {
3035
+ // Check if THIS chat's regeneration was aborted
3036
+ const thisGen = activeGenerations.get(chatId);
3037
+ if (
3038
+ !thisGen ||
3039
+ !thisGen.isGenerating ||
3040
+ thisGen.abortController?.signal.aborted
3041
+ ) {
3042
+ serverLog(
3043
+ "[WebSocket] Regeneration aborted for chat:",
3044
+ chatId,
3045
+ );
3046
+ break;
3047
+ }
3048
+
3049
+ let regenMsg = { ...chunk, chatId };
3050
+ if (chunk.type === "image") {
3051
+ let imageUrl = null;
3052
+ try {
3053
+ const imagesDir = join(process.cwd(), "generated_images");
3054
+ if (!existsSync(imagesDir))
3055
+ mkdirSync(imagesDir, { recursive: true });
3056
+ const filename = `gemini_regen_${Date.now()}.png`;
3057
+ const filePath = join(imagesDir, filename);
3058
+ writeFileSync(
3059
+ filePath,
3060
+ Buffer.from(chunk.content, "base64"),
3061
+ );
3062
+ imageUrl = `/api/generated-images/${filename}`;
3063
+ } catch (err) {
3064
+ console.error(
3065
+ "[WebSocket] Failed to save image during regen:",
3066
+ err,
3067
+ );
3068
+ }
3069
+ regenMsg = imageUrl
3070
+ ? { type: "image_url", content: imageUrl, chatId }
3071
+ : regenMsg;
3072
+ }
3073
+ conn.send(regenMsg);
3074
+
3075
+ if (chunk.type === "usage") {
3076
+ // Real usage stats from API
3077
+ regenUsageStats = {
3078
+ inputTokens: chunk.inputTokens || 0,
3079
+ outputTokens: chunk.outputTokens || 0,
3080
+ totalTokens: chunk.totalTokens || 0,
3081
+ thinkingTokens: chunk.thinkingTokens || 0,
3082
+ };
3083
+ serverLog(
3084
+ "[WebSocket] Regen real usage stats:",
3085
+ regenUsageStats,
3086
+ );
3087
+ } else if (chunk.type === "thinking") {
3088
+ // Track thinking/reasoning tokens
3089
+ regenThinkingContent += chunk.content;
3090
+ regenTokenCount++;
3091
+ } else if (chunk.type === "text") {
3092
+ fullResponse += chunk.content;
3093
+ regenTokenCount++;
3094
+ } else if (
3095
+ chunk.type === "image" ||
3096
+ chunk.type === "image_url"
3097
+ ) {
3098
+ const imageContent =
3099
+ chunk.type === "image_url"
3100
+ ? chunk.content
3101
+ : regenMsg.content;
3102
+ if (imageContent) {
3103
+ fullResponse += `\n\n![Generated Image](${imageContent})\n\n`;
3104
+ regenTokenCount += 100;
3105
+ }
3106
+ } else if (chunk.type === "tool_start") {
3107
+ // Track tool call start for persistence
3108
+ // Calculate content position, adjusting to nearest word boundary to avoid splitting words
3109
+ const regenCleanedSoFar = cleanResponseText(fullResponse);
3110
+ let regenContentPosition = regenCleanedSoFar.length;
3111
+
3112
+ // Find the nearest word boundary (previous space or newline)
3113
+ if (
3114
+ regenContentPosition > 0 &&
3115
+ regenCleanedSoFar.length > 0
3116
+ ) {
3117
+ const lastChar =
3118
+ regenCleanedSoFar[regenCleanedSoFar.length - 1];
3119
+ if (lastChar && !/\s/.test(lastChar)) {
3120
+ const lastSpaceIdx = Math.max(
3121
+ regenCleanedSoFar.lastIndexOf(" "),
3122
+ regenCleanedSoFar.lastIndexOf("\n"),
3123
+ regenCleanedSoFar.lastIndexOf("\t"),
3124
+ );
3125
+ if (lastSpaceIdx > 0) {
3126
+ regenContentPosition = lastSpaceIdx + 1;
3127
+ }
3128
+ }
3129
+ }
3130
+
3131
+ regenToolCalls.push({
3132
+ id: chunk.callId,
3133
+ tool: chunk.name,
3134
+ params: chunk.args || {},
3135
+ status: "running",
3136
+ startTime: Date.now(),
3137
+ contentPosition: regenContentPosition,
3138
+ });
3139
+ } else if (
3140
+ chunk.type === "tool_result" ||
3141
+ chunk.type === "tool_error"
3142
+ ) {
3143
+ // Update tool call with result
3144
+ const toolIndex = chunk.callId
3145
+ ? regenToolCalls.findIndex((t) => t.id === chunk.callId)
3146
+ : regenToolCalls.findIndex(
3147
+ (t) =>
3148
+ t.tool === chunk.name && t.status === "running",
3149
+ );
3150
+ if (toolIndex !== -1) {
3151
+ regenToolCalls[toolIndex] = {
3152
+ ...regenToolCalls[toolIndex],
3153
+ status:
3154
+ chunk.type === "tool_error" ? "error" : "complete",
3155
+ result: chunk.result,
3156
+ error: chunk.error,
3157
+ endTime: Date.now(),
3158
+ };
3159
+
3160
+ // Capture search data from web_search tool for persistence
3161
+ if (
3162
+ chunk.name === "web_search" &&
3163
+ chunk.result?.results
3164
+ ) {
3165
+ regenSearchData = {
3166
+ success: true,
3167
+ results: chunk.result.results,
3168
+ count: chunk.result.results?.length || 0,
3169
+ };
3170
+ }
3171
+ }
3172
+ }
3173
+ }
3174
+
3175
+ // Only save if THIS chat was not aborted and we have content
3176
+ const thisGenFinal = activeGenerations.get(chatId);
3177
+ if (thisGenFinal?.isGenerating && fullResponse.length > 0) {
3178
+ // Clean the response
3179
+ const cleanedResponse = cleanResponseText(fullResponse);
3180
+
3181
+ // Calculate generation stats - use real API stats when available
3182
+ // Token estimation: ~4 characters per token for English (more accurate than chunk counting)
3183
+ const regenElapsed = (Date.now() - regenStartTime) / 1000;
3184
+ const regenEstimatedTokens = Math.ceil(
3185
+ (fullResponse.length + regenThinkingContent.length) / 4,
3186
+ );
3187
+ const regenFinalOutputTokens =
3188
+ regenUsageStats != null
3189
+ ? (regenUsageStats.outputTokens ?? 0) +
3190
+ (regenUsageStats.thinkingTokens ?? 0)
3191
+ : regenEstimatedTokens;
3192
+ const regenFinalInputTokens =
3193
+ regenUsageStats?.inputTokens || 0;
3194
+ const regenTps =
3195
+ regenElapsed > 0
3196
+ ? Math.round(
3197
+ (regenFinalOutputTokens / regenElapsed) * 100,
3198
+ ) / 100
3199
+ : 0;
3200
+
3201
+ serverLog(
3202
+ "[WebSocket] Regen final stats - real:",
3203
+ !!regenUsageStats,
3204
+ "outputTokens:",
3205
+ regenFinalOutputTokens,
3206
+ "estimated:",
3207
+ regenEstimatedTokens,
3208
+ "tps:",
3209
+ regenTps,
3210
+ );
3211
+
3212
+ // Check if chat still exists before saving
3213
+ const chatExists = db
3214
+ .prepare("SELECT id FROM chats WHERE id = ?")
3215
+ .get(chatId);
3216
+ if (!chatExists) {
3217
+ console.warn(
3218
+ "[WebSocket] Chat",
3219
+ chatId,
3220
+ "was deleted during regeneration - skipping save",
3221
+ );
3222
+ const regenDeletedChatId = chatId;
3223
+ const regenDeletedMsg = {
3224
+ type: "done",
3225
+ chatId: regenDeletedChatId,
3226
+ tps: regenTps,
3227
+ numTokens: regenFinalOutputTokens,
3228
+ deleted: true,
3229
+ };
3230
+ conn.send(regenDeletedMsg);
3231
+ conn.broadcast(regenDeletedMsg);
3232
+ } else {
3233
+ // Build rich context content with inline tool calls and results
3234
+ const regenRichContent = buildRichContextContent(
3235
+ cleanedResponse,
3236
+ regenToolCalls,
3237
+ );
3238
+
3239
+ // Save assistant message with rich content and metadata
3240
+ db.prepare(
3241
+ `
3242
+ INSERT INTO messages (chat_id, role, content, metadata) VALUES (?, ?, ?, ?)
3243
+ `,
3244
+ ).run(
3245
+ chatId,
3246
+ "assistant",
3247
+ regenRichContent,
3248
+ JSON.stringify({
3249
+ model: regenConfig.model, // Use the model from config for this regeneration
3250
+ tps: regenTps,
3251
+ numTokens: regenFinalOutputTokens,
3252
+ inputTokens: regenFinalInputTokens,
3253
+ _searchData: regenSearchData,
3254
+ toolCalls:
3255
+ regenToolCalls.length > 0
3256
+ ? regenToolCalls
3257
+ : undefined,
3258
+ thinkingContent:
3259
+ regenThinkingContent.length > 0
3260
+ ? regenThinkingContent
3261
+ : undefined,
3262
+ }),
3263
+ );
3264
+ const regenCloudId = chatId;
3265
+ // NOTE: Frontend is the single Supabase writer — CLI only persists to local SQLite.
3266
+ // Title is set by frontend via backend /api/generate-chat-title at end of message.
3267
+
3268
+ // Update chat timestamp
3269
+ db.prepare(
3270
+ "UPDATE chats SET updated_at = CURRENT_TIMESTAMP WHERE id = ?",
3271
+ ).run(chatId);
3272
+
3273
+ const regenDoneChatId = regenCloudId ?? chatId;
3274
+ // Include complete message so frontend doesn't need to reconstruct
3275
+ const regenDoneMsg = {
3276
+ type: "done",
3277
+ chatId: regenDoneChatId,
3278
+ tps: regenTps,
3279
+ numTokens: regenFinalOutputTokens,
3280
+ message: {
3281
+ role: "assistant",
3282
+ content: cleanedResponse, // Clean version for display
3283
+ fullContent: regenRichContent, // Rich version with tool history for AI context
3284
+ toolCalls:
3285
+ regenToolCalls.length > 0
3286
+ ? regenToolCalls
3287
+ : undefined,
3288
+ _searchData: regenSearchData,
3289
+ thinkingContent:
3290
+ regenThinkingContent.length > 0
3291
+ ? regenThinkingContent
3292
+ : undefined,
3293
+ tps: regenTps,
3294
+ numTokens: regenFinalOutputTokens,
3295
+ inputTokens: regenFinalInputTokens,
3296
+ model: regenConfig.model, // Use the model from config for this regeneration
3297
+ },
3298
+ };
3299
+ conn.send(regenDoneMsg);
3300
+ conn.broadcast(regenDoneMsg);
3301
+ }
3302
+ } else if (thisGenFinal?.isGenerating) {
3303
+ // Regeneration completed but with no text content (e.g., only tool calls, or max iterations reached)
3304
+ // Still need to send done event so frontend knows generation is finished
3305
+ const regenElapsed = (Date.now() - regenStartTime) / 1000;
3306
+ const regenFinalOutputTokens =
3307
+ (regenUsageStats?.outputTokens ?? 0) +
3308
+ (regenUsageStats?.thinkingTokens ?? 0);
3309
+ const regenTps =
3310
+ regenElapsed > 0
3311
+ ? Math.round(
3312
+ (regenFinalOutputTokens / regenElapsed) * 100,
3313
+ ) / 100
3314
+ : 0;
3315
+
3316
+ serverLog(
3317
+ "[WebSocket] Sending done for regen (no content):",
3318
+ chatId,
3319
+ );
3320
+ const emptyRegenDoneChatId = chatId;
3321
+ const emptyRegenDoneMsg = {
3322
+ type: "done",
3323
+ chatId: emptyRegenDoneChatId,
3324
+ tps: regenTps,
3325
+ numTokens: regenFinalOutputTokens,
3326
+ message: {
3327
+ role: "assistant",
3328
+ content: "",
3329
+ toolCalls:
3330
+ regenToolCalls.length > 0 ? regenToolCalls : undefined,
3331
+ _searchData: regenSearchData,
3332
+ thinkingContent:
3333
+ regenThinkingContent.length > 0
3334
+ ? regenThinkingContent
3335
+ : undefined,
3336
+ tps: regenTps,
3337
+ numTokens: regenFinalOutputTokens,
3338
+ model: regenConfig.model,
3339
+ },
3340
+ };
3341
+ conn.send(emptyRegenDoneMsg);
3342
+ conn.broadcast(emptyRegenDoneMsg);
3343
+ }
3344
+
3345
+ // Clean up THIS chat's generation state
3346
+ activeGenerations.delete(chatId);
3347
+ } catch (err) {
3348
+ if (err.name === "AbortError") {
3349
+ serverLog(
3350
+ "[WebSocket] Regeneration aborted by user for chat:",
3351
+ chatId,
3352
+ );
3353
+ } else {
3354
+ console.error("Regeneration error for chat:", chatId, err);
3355
+ const errorMsg = {
3356
+ type: "error",
3357
+ message: err.message,
3358
+ chatId,
3359
+ };
3360
+ conn.send(errorMsg);
3361
+ conn.broadcast(errorMsg);
3362
+ }
3363
+ activeGenerations.delete(chatId);
3364
+ }
3365
+ }
3366
+ } catch (err) {
3367
+ console.error("WebSocket message error:", err);
3368
+ conn.send({ type: "error", message: err.message });
3369
+ }
3370
+ };
3371
+ }
3372
+ wsHandlerFactory = createWsMessageHandler;
3373
+
3374
+ const handleMessage = wsHandlerFactory(conn, activeGenerations);
3375
+ socket.on("message", (raw) => handleMessage(raw));
3376
+
3377
+ socket.on("close", () => {
3378
+ serverLog(
3379
+ "WebSocket client disconnected, remaining clients:",
3380
+ connectedClients.size - 1,
3381
+ );
3382
+ connectedClients.delete(socket);
3383
+ });
3384
+ });
3385
+ });
3386
+
3387
+ // ============================================
3388
+ // Email Webhook (MyMX)
3389
+ // ============================================
3390
+
3391
+ // MyMX webhook endpoint with raw body capture for signature verification
3392
+ fastify.post(
3393
+ "/api/email/webhook",
3394
+ {
3395
+ // Use preParsing hook to capture raw body before JSON parsing
3396
+ preParsing: async (request, reply, payload) => {
3397
+ const chunks = [];
3398
+ for await (const chunk of payload) {
3399
+ chunks.push(chunk);
3400
+ }
3401
+ const rawBody = Buffer.concat(chunks).toString("utf-8");
3402
+ request.rawBody = rawBody;
3403
+ // Return a new readable stream with the same content for Fastify to parse
3404
+ const { Readable } = await import("stream");
3405
+ return Readable.from([rawBody]);
3406
+ },
3407
+ },
3408
+ async (request, reply) => {
3409
+ const { handleEmailWebhook } = await import("./email/client.js");
3410
+ return handleEmailWebhook(request, reply, config.store);
3411
+ },
3412
+ );
3413
+
3414
+ // ============================================
3415
+ // Memory/Embeddings API
3416
+ // ============================================
3417
+
3418
+ fastify.get("/api/memory", async () => {
3419
+ // Return memory visualization data - will be implemented later
3420
+ return { points: [], clusters: [] };
3421
+ });
3422
+
3423
+ // ============================================
3424
+ // RAG (Retrieval Augmented Generation) API
3425
+ // ============================================
3426
+
3427
+ // Import RAG database functions
3428
+ const {
3429
+ createRagDocument,
3430
+ getAllRagDocuments,
3431
+ getRagDocument,
3432
+ updateRagDocument: updateRagDoc,
3433
+ deleteRagDocument: deleteRagDoc,
3434
+ addRagChunks,
3435
+ getRagChunksByDocument,
3436
+ getRagChunksByDocuments,
3437
+ } = await import("./storage/db.js");
3438
+
3439
+ // List all RAG documents
3440
+ fastify.get("/api/rag/documents", async () => {
3441
+ try {
3442
+ const documents = getAllRagDocuments();
3443
+ return { success: true, documents };
3444
+ } catch (err) {
3445
+ console.error("[Server] RAG list error:", err);
3446
+ return { success: false, error: err.message };
3447
+ }
3448
+ });
3449
+
3450
+ // Get single RAG document
3451
+ fastify.get("/api/rag/documents/:id", async (request) => {
3452
+ const { id } = request.params;
3453
+ try {
3454
+ const document = getRagDocument(parseInt(id, 10));
3455
+ if (!document) {
3456
+ return { success: false, error: "Document not found" };
3457
+ }
3458
+ return { success: true, document };
3459
+ } catch (err) {
3460
+ console.error("[Server] RAG get error:", err);
3461
+ return { success: false, error: err.message };
3462
+ }
3463
+ });
3464
+
3465
+ // Get chunks for a RAG document (for memory visualization when docs are on server)
3466
+ fastify.get("/api/rag/documents/:id/chunks", async (request) => {
3467
+ const { id } = request.params;
3468
+ try {
3469
+ const docId = parseInt(id, 10);
3470
+ const document = getRagDocument(docId);
3471
+ if (!document) {
3472
+ return { success: false, error: "Document not found" };
3473
+ }
3474
+ const chunks = getRagChunksByDocument(docId);
3475
+ return { success: true, chunks };
3476
+ } catch (err) {
3477
+ console.error("[Server] RAG chunks error:", err);
3478
+ return { success: false, error: err.message };
3479
+ }
3480
+ });
3481
+
3482
+ // Create RAG document with chunks (accepts pre-processed data from frontend)
3483
+ fastify.post("/api/rag/documents", async (request) => {
3484
+ const { name, chunks, summary, fileCount, files } = request.body;
3485
+
3486
+ if (!name || !chunks || !Array.isArray(chunks) || chunks.length === 0) {
3487
+ return { success: false, error: "Name and chunks are required" };
3488
+ }
3489
+
3490
+ serverLog(
3491
+ "[Server] Creating RAG document:",
3492
+ name,
3493
+ "with",
3494
+ chunks.length,
3495
+ "chunks",
3496
+ );
3497
+
3498
+ try {
3499
+ // Create the document
3500
+ const docId = createRagDocument({
3501
+ name,
3502
+ chunkCount: chunks.length,
3503
+ fileCount: fileCount || 1,
3504
+ files: files || null,
3505
+ summary: summary || null,
3506
+ uploadDate: new Date().toISOString(),
3507
+ });
3508
+
3509
+ // Add chunks (embeddings should already be included)
3510
+ addRagChunks(docId, chunks);
3511
+
3512
+ serverLog("[Server] RAG document created with ID:", docId);
3513
+
3514
+ return {
3515
+ success: true,
3516
+ document: {
3517
+ id: docId,
3518
+ name,
3519
+ chunkCount: chunks.length,
3520
+ fileCount: fileCount || 1,
3521
+ uploadDate: new Date().toISOString(),
3522
+ summary,
3523
+ },
3524
+ };
3525
+ } catch (err) {
3526
+ console.error("[Server] RAG create error:", err);
3527
+ return { success: false, error: err.message };
3528
+ }
3529
+ });
3530
+
3531
+ // Update RAG document (rename)
3532
+ fastify.put("/api/rag/documents/:id", async (request) => {
3533
+ const { id } = request.params;
3534
+ const { name } = request.body;
3535
+
3536
+ try {
3537
+ updateRagDoc(parseInt(id, 10), { name });
3538
+ return { success: true };
3539
+ } catch (err) {
3540
+ console.error("[Server] RAG update error:", err);
3541
+ return { success: false, error: err.message };
3542
+ }
3543
+ });
3544
+
3545
+ // Delete RAG document
3546
+ fastify.delete("/api/rag/documents/:id", async (request) => {
3547
+ const { id } = request.params;
3548
+
3549
+ serverLog("[Server] Deleting RAG document:", id);
3550
+
3551
+ try {
3552
+ deleteRagDoc(parseInt(id, 10));
3553
+ return { success: true };
3554
+ } catch (err) {
3555
+ console.error("[Server] RAG delete error:", err);
3556
+ return { success: false, error: err.message };
3557
+ }
3558
+ });
3559
+
3560
+ // Generate embeddings for text chunks using OpenAI API
3561
+ fastify.post("/api/rag/embed", async (request) => {
3562
+ const { texts } = request.body;
3563
+
3564
+ if (!texts || !Array.isArray(texts) || texts.length === 0) {
3565
+ return { success: false, error: "Texts array is required" };
3566
+ }
3567
+
3568
+ const apiKey = config.get("apiKeys.openai");
3569
+ if (!apiKey) {
3570
+ return {
3571
+ success: false,
3572
+ error: "OpenAI API key not configured on server",
3573
+ };
3574
+ }
3575
+
3576
+ serverLog("[Server] Generating embeddings for", texts.length, "texts");
3577
+
3578
+ try {
3579
+ // Call OpenAI embeddings API
3580
+ const response = await fetch("https://api.openai.com/v1/embeddings", {
3581
+ method: "POST",
3582
+ headers: {
3583
+ "Content-Type": "application/json",
3584
+ Authorization: `Bearer ${apiKey}`,
3585
+ },
3586
+ body: JSON.stringify({
3587
+ model: "text-embedding-3-small",
3588
+ input: texts,
3589
+ }),
3590
+ });
3591
+
3592
+ if (!response.ok) {
3593
+ const errorData = await response.json().catch(() => ({}));
3594
+ throw new Error(
3595
+ errorData.error?.message || `OpenAI API error: ${response.status}`,
3596
+ );
3597
+ }
3598
+
3599
+ const data = await response.json();
3600
+ const embeddings = data.data.map((item) => item.embedding);
3601
+
3602
+ return { success: true, embeddings };
3603
+ } catch (err) {
3604
+ console.error("[Server] Embedding error:", err);
3605
+ return { success: false, error: err.message };
3606
+ }
3607
+ });
3608
+
3609
+ // RAG search - find relevant chunks for a query
3610
+ fastify.post("/api/rag/search", async (request) => {
3611
+ const { query, docIds, topK = 10, minSimilarity = 0.4 } = request.body;
3612
+
3613
+ if (!query) {
3614
+ return { success: false, error: "Query is required" };
3615
+ }
3616
+
3617
+ if (!docIds || !Array.isArray(docIds) || docIds.length === 0) {
3618
+ return { success: false, error: "Document IDs are required" };
3619
+ }
3620
+
3621
+ const apiKey = config.get("apiKeys.openai");
3622
+ if (!apiKey) {
3623
+ return {
3624
+ success: false,
3625
+ error: "OpenAI API key not configured on server",
3626
+ };
3627
+ }
3628
+
3629
+ serverLog(
3630
+ "[Server] RAG search:",
3631
+ query.substring(0, 50),
3632
+ "... in",
3633
+ docIds.length,
3634
+ "documents",
3635
+ );
3636
+
3637
+ try {
3638
+ // 1. Generate embedding for the query
3639
+ const embedResponse = await fetch(
3640
+ "https://api.openai.com/v1/embeddings",
3641
+ {
3642
+ method: "POST",
3643
+ headers: {
3644
+ "Content-Type": "application/json",
3645
+ Authorization: `Bearer ${apiKey}`,
3646
+ },
3647
+ body: JSON.stringify({
3648
+ model: "text-embedding-3-small",
3649
+ input: query,
3650
+ }),
3651
+ },
3652
+ );
3653
+
3654
+ if (!embedResponse.ok) {
3655
+ const errorData = await embedResponse.json().catch(() => ({}));
3656
+ throw new Error(
3657
+ errorData.error?.message || "Failed to generate query embedding",
3658
+ );
3659
+ }
3660
+
3661
+ const embedData = await embedResponse.json();
3662
+ const queryEmbedding = embedData.data[0].embedding;
3663
+
3664
+ // 2. Get chunks from specified documents
3665
+ const chunks = getRagChunksByDocuments(
3666
+ docIds.map((id) => parseInt(id, 10)),
3667
+ );
3668
+
3669
+ if (chunks.length === 0) {
3670
+ return { success: true, results: [], query };
3671
+ }
3672
+
3673
+ // 3. Calculate cosine similarity for each chunk
3674
+ const cosineSimilarity = (vec1, vec2) => {
3675
+ if (!vec1 || !vec2 || vec1.length !== vec2.length) return 0;
3676
+ let dotProduct = 0,
3677
+ norm1 = 0,
3678
+ norm2 = 0;
3679
+ for (let i = 0; i < vec1.length; i++) {
3680
+ dotProduct += vec1[i] * vec2[i];
3681
+ norm1 += vec1[i] * vec1[i];
3682
+ norm2 += vec2[i] * vec2[i];
3683
+ }
3684
+ norm1 = Math.sqrt(norm1);
3685
+ norm2 = Math.sqrt(norm2);
3686
+ if (norm1 === 0 || norm2 === 0) return 0;
3687
+ return dotProduct / (norm1 * norm2);
3688
+ };
3689
+
3690
+ // 4. Score and rank chunks
3691
+ const results = chunks
3692
+ .map((chunk) => ({
3693
+ ...chunk,
3694
+ similarity: cosineSimilarity(queryEmbedding, chunk.embedding),
3695
+ }))
3696
+ .filter((chunk) => chunk.similarity >= minSimilarity)
3697
+ .sort((a, b) => b.similarity - a.similarity)
3698
+ .slice(0, topK)
3699
+ .map(({ embedding, ...rest }) => rest); // Remove embedding from response
3700
+
3701
+ serverLog("[Server] RAG search found", results.length, "relevant chunks");
3702
+
3703
+ return { success: true, results, query };
3704
+ } catch (err) {
3705
+ console.error("[Server] RAG search error:", err);
3706
+ return { success: false, error: err.message };
3707
+ }
3708
+ });
3709
+
3710
+ // Explicit /login route - serve SPA so login page loads (GitHub, Google, email)
3711
+ fastify.get("/login", async (request, reply) => {
3712
+ if (hasFrontend) {
3713
+ return reply.sendFile("index.html");
3714
+ }
3715
+ return reply
3716
+ .status(404)
3717
+ .send({ error: "Frontend not built. Run: npm run build:frontend" });
3718
+ });
3719
+
3720
+ // Fallback to index.html for SPA routing
3721
+ fastify.setNotFoundHandler(async (request, reply) => {
3722
+ if (request.url.startsWith("/api/")) {
3723
+ return reply.status(404).send({ error: "Not found" });
3724
+ }
3725
+
3726
+ // Serve index.html for client-side routing (only if frontend is built)
3727
+ if (hasFrontend) {
3728
+ return reply.sendFile("index.html");
3729
+ }
3730
+
3731
+ // No frontend built - show helpful message
3732
+ return reply.status(200).type("text/html").send(`
3733
+ <!DOCTYPE html>
3734
+ <html>
3735
+ <head>
3736
+ <title>Otherwise AI</title>
3737
+ <style>
3738
+ body { font-family: system-ui, sans-serif; max-width: 600px; margin: 100px auto; padding: 20px; }
3739
+ h1 { color: #333; }
3740
+ code { background: #f0f0f0; padding: 2px 8px; border-radius: 4px; }
3741
+ pre { background: #f0f0f0; padding: 16px; border-radius: 8px; overflow-x: auto; }
3742
+ </style>
3743
+ </head>
3744
+ <body>
3745
+ <h1>Otherwise AI Server Running</h1>
3746
+ <p>The server is running, but the frontend hasn't been built yet.</p>
3747
+ <p>To build the frontend, run:</p>
3748
+ <pre>cd /Users/thomasstahura/Desktop/Ultimasite/cli
3749
+ npm run build:frontend</pre>
3750
+ <p>Then restart the server.</p>
3751
+ <hr>
3752
+ <p><strong>API Status:</strong> <a href="/api/health">/api/health</a></p>
3753
+ <p><strong>Config:</strong> <code>otherwise config</code></p>
3754
+ </body>
3755
+ </html>
3756
+ `);
3757
+ });
3758
+
3759
+ // Start server
3760
+ await fastify.listen({ port, host: "0.0.0.0" });
3761
+
3762
+ // Optional: connect to backend for remote access (otherwise.ai -> backend -> this CLI).
3763
+ // Connect immediately so Azure sees [WS] CLI connected; message handler uses wsHandlerFactory when set (after first local /ws client).
3764
+ const remoteToken =
3765
+ options.remotePairingToken ?? config.get("remote.pairingToken");
3766
+ if (remoteToken) {
3767
+ remoteLinked = true;
3768
+ const { connectToBackend, getBackendWsUrl } =
3769
+ await import("./remote/client.js");
3770
+ const backendUrl = config.get("remote.backendUrl") || getBackendWsUrl();
3771
+ const remoteActiveGenerations = new Map();
3772
+ connectToBackend(
3773
+ remoteToken,
3774
+ (raw, sendReply) => {
3775
+ if (!wsHandlerFactory) return;
3776
+ // send = to backend only; broadcast = to local Ink only. Handler calls both per chunk,
3777
+ // so backend and Ink each get the stream once (avoid duplicate rendering in CLI).
3778
+ // sendLocal/broadcastLocal added so the handler doesn't crash (TypeError) for the
3779
+ // chat handler's local-only events; these are no-ops since the remote browser
3780
+ // (on Azure) should handle its own Supabase persistence.
3781
+ const remoteConn = {
3782
+ send: (msg) => sendReply(msg),
3783
+ broadcast: (msg) => broadcastToAllLocalClients(msg),
3784
+ sendLocal: () => {},
3785
+ broadcastLocal: () => {},
3786
+ };
3787
+ const handler = wsHandlerFactory(remoteConn, remoteActiveGenerations);
3788
+ return handler(typeof raw === "string" ? raw : raw.toString());
3789
+ },
3790
+ {
3791
+ backendUrl,
3792
+ onConnect: async (reply) => {
3793
+ remoteRelaySend = reply;
3794
+ serverLog(
3795
+ "[Server] Remote relay active: CLI messages will sync to otherwise.ai",
3796
+ );
3797
+ try {
3798
+ const publicConfig = getPublicConfig();
3799
+ const { listOllamaModels } = await import("./inference/ollama.js");
3800
+ const raw = await listOllamaModels(config);
3801
+ const ollamaModels = raw.map((m) => {
3802
+ const name = (m.name || "").toLowerCase();
3803
+ const type = [];
3804
+ if (
3805
+ /deepseek-r1|deepseek-v3|qwq|qwen3|gpt-oss|thinking|reason/.test(
3806
+ name,
3807
+ )
3808
+ )
3809
+ type.push("reasoning");
3810
+ if (/llava|vision|bakllava|moondream/.test(name))
3811
+ type.push("image-input");
3812
+ if (/codellama|codegemma|starcoder|deepseek-coder/.test(name))
3813
+ type.push("code");
3814
+ return {
3815
+ id: m.id,
3816
+ name:
3817
+ m.name?.replace(/:latest$/, "").split(":")[0] ||
3818
+ m.id.replace(/^ollama:/, ""),
3819
+ provider: "Ollama",
3820
+ type: type.length ? type : [""],
3821
+ size: m.size,
3822
+ };
3823
+ });
3824
+ if (ollamaModels.length)
3825
+ reply({ type: "ollama_models", models: ollamaModels });
3826
+ let openRouterModels = [];
3827
+ if (publicConfig.apiKeys?.openrouter) {
3828
+ try {
3829
+ const { fetchOpenRouterModels } =
3830
+ await import("./inference/openrouter.js");
3831
+ const key = config.get("apiKeys.openrouter");
3832
+ if (key) openRouterModels = await fetchOpenRouterModels(key);
3833
+ } catch (e) {
3834
+ serverLog(
3835
+ "[Server] Could not fetch OpenRouter models on connect:",
3836
+ e?.message,
3837
+ );
3838
+ }
3839
+ }
3840
+ reply({
3841
+ type: "cli_config",
3842
+ apiKeys: publicConfig.apiKeys || {},
3843
+ ollamaModels,
3844
+ openRouterModels,
3845
+ browserChannel: publicConfig.browserChannel ?? null,
3846
+ model: config.get("model") || "claude-sonnet-4-20250514",
3847
+ });
3848
+ } catch (e) {
3849
+ serverLog("[Server] Could not build remote config:", e?.message);
3850
+ }
3851
+ },
3852
+ onDisconnect: () => {
3853
+ remoteRelaySend = null;
3854
+ serverLog("[Server] Remote relay inactive");
3855
+ },
3856
+ onInvalidToken: () => {
3857
+ config.set("remote.pairingToken", null);
3858
+ serverLog(
3859
+ "[Server] Remote token cleared (expired or invalid). Run otherwise connect to link again.",
3860
+ );
3861
+ },
3862
+ },
3863
+ );
3864
+ serverLog("[Server] Remote backend connection started (otherwise.ai)");
3865
+ }
3866
+
3867
+ return fastify;
3868
+ }
3869
+
3870
+ export async function stopServer() {
3871
+ stopScheduler();
3872
+ if (fastify) {
3873
+ await fastify.close();
3874
+ fastify = null;
3875
+ }
3876
+ }