npm - @projectservan8n/cnapse - Versions diffs - 0.5.6 → 0.5.7 - Mend

@projectservan8n/cnapse 0.5.6 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js CHANGED Viewed

@@ -745,134 +745,163 @@ async function chatOpenAI(messages, model) {
   const content = data.choices?.[0]?.message?.content || "";
   return { content, model };
 }
-// src/lib/screen.ts
-import { exec as exec3 } from "child_process";
-import { promisify as promisify3 } from "util";
-var execAsync3 = promisify3(exec3);
-async function getScreenDescription() {
-  try {
-    const platform = process.platform;
-    if (platform === "win32") {
-      const { stdout } = await execAsync3(`
-        Add-Type -AssemblyName System.Windows.Forms
-        $screen = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds
-        Write-Output "$($screen.Width)x$($screen.Height)"
-      `, { shell: "powershell.exe" });
-      return `Screen ${stdout.trim()} captured`;
-    } else if (platform === "darwin") {
-      const { stdout } = await execAsync3(`system_profiler SPDisplaysDataType | grep Resolution | head -1`);
-      return `Screen ${stdout.trim()}`;
-    } else {
-      const { stdout } = await execAsync3(`xdpyinfo | grep dimensions | awk '{print $2}'`);
-      return `Screen ${stdout.trim()} captured`;
+async function chatWithVision(messages, screenshotBase64) {
+  const config = getConfig();
+  const systemPrompt = await getSystemPrompt();
+  const visionPrompt = systemPrompt + "\n\nYou can see the user's screen. Describe what you see and help them with their request.";
+  switch (config.provider) {
+    case "openrouter":
+      return chatWithVisionOpenRouter(messages, screenshotBase64, visionPrompt);
+    case "ollama":
+      return chatWithVisionOllama(messages, screenshotBase64, visionPrompt);
+    case "anthropic":
+      return chatWithVisionAnthropic(messages, screenshotBase64, visionPrompt);
+    case "openai":
+      return chatWithVisionOpenAI(messages, screenshotBase64, visionPrompt);
+    default:
+      throw new Error(`Vision not supported for provider: ${config.provider}`);
+  }
+}
+async function chatWithVisionOpenRouter(messages, screenshot, systemPrompt) {
+  const apiKey = getApiKey("openrouter");
+  if (!apiKey) throw new Error("OpenRouter API key not configured");
+  const config = getConfig();
+  let model = config.model;
+  if (!model.includes("gpt-5") && !model.includes("claude") && !model.includes("gemini")) {
+    model = "openai/gpt-5-nano";
+  }
+  const lastUserIdx = messages.length - 1;
+  const visionMessages = messages.map((m, i) => {
+    if (i === lastUserIdx && m.role === "user") {
+      return {
+        role: "user",
+        content: [
+          { type: "text", text: m.content },
+          { type: "image_url", image_url: { url: `data:image/png;base64,${screenshot}` } }
+        ]
+      };
     }
-  } catch {
-    return null;
+    return m;
+  });
+  const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
+    method: "POST",
+    headers: {
+      "Authorization": `Bearer ${apiKey}`,
+      "Content-Type": "application/json",
+      "HTTP-Referer": config.openrouter.siteUrl,
+      "X-Title": config.openrouter.appName
+    },
+    body: JSON.stringify({
+      model,
+      messages: [{ role: "system", content: systemPrompt }, ...visionMessages],
+      max_tokens: 2048
+    })
+  });
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`OpenRouter vision error: ${response.status} - ${error}`);
   }
+  const data = await response.json();
+  return { content: data.choices?.[0]?.message?.content || "", model };
 }
+async function chatWithVisionOllama(messages, screenshot, systemPrompt) {
+  const config = getConfig();
+  const visionModels = ["llava", "llama3.2-vision", "bakllava"];
+  const model = visionModels.find((m) => config.model.includes(m)) || "llava";
+  const lastUserMsg = messages.filter((m) => m.role === "user").pop();
+  const response = await fetch(`${config.ollamaHost}/api/generate`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model,
+      prompt: `${systemPrompt}
-// src/hooks/useChat.ts
-var WELCOME_MESSAGE = {
-  id: "0",
-  role: "system",
-  content: "Welcome to C-napse! Type your message and press Enter.\n\nShortcuts: Ctrl+H for help, Ctrl+P for provider",
-  timestamp: /* @__PURE__ */ new Date()
-};
-function useChat(screenWatch = false) {
-  const [messages, setMessages] = useState3([WELCOME_MESSAGE]);
-  const [isProcessing, setIsProcessing] = useState3(false);
-  const [error, setError] = useState3(null);
-  const screenContextRef = useRef(null);
-  useEffect2(() => {
-    if (!screenWatch) {
-      screenContextRef.current = null;
-      return;
+User: ${lastUserMsg?.content || "What do you see?"}`,
+      images: [screenshot],
+      stream: false
+    })
+  });
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`Ollama vision error: ${error}`);
+  }
+  const data = await response.json();
+  return { content: data.response || "", model };
+}
+async function chatWithVisionAnthropic(messages, screenshot, systemPrompt) {
+  const apiKey = getApiKey("anthropic");
+  if (!apiKey) throw new Error("Anthropic API key not configured");
+  const chatMessages = messages.filter((m) => m.role !== "system");
+  const lastUserIdx = chatMessages.length - 1;
+  const visionMessages = chatMessages.map((m, i) => {
+    if (i === lastUserIdx && m.role === "user") {
+      return {
+        role: "user",
+        content: [
+          { type: "image", source: { type: "base64", media_type: "image/png", data: screenshot } },
+          { type: "text", text: m.content }
+        ]
+      };
     }
-    const checkScreen = async () => {
-      const desc = await getScreenDescription();
-      if (desc) {
-        screenContextRef.current = desc;
-      }
-    };
-    checkScreen();
-    const interval = setInterval(checkScreen, 5e3);
-    return () => clearInterval(interval);
-  }, [screenWatch]);
-  const addSystemMessage = useCallback((content) => {
-    setMessages((prev) => [
-      ...prev,
-      {
-        id: Date.now().toString(),
-        role: "system",
-        content,
-        timestamp: /* @__PURE__ */ new Date()
-      }
-    ]);
-  }, []);
-  const sendMessage = useCallback(async (content) => {
-    if (!content.trim() || isProcessing) return;
-    setError(null);
-    const userMsg = {
-      id: Date.now().toString(),
-      role: "user",
-      content,
-      timestamp: /* @__PURE__ */ new Date()
-    };
-    const assistantId = (Date.now() + 1).toString();
-    const assistantMsg = {
-      id: assistantId,
-      role: "assistant",
-      content: "",
-      timestamp: /* @__PURE__ */ new Date(),
-      isStreaming: true
-    };
-    setMessages((prev) => [...prev, userMsg, assistantMsg]);
-    setIsProcessing(true);
-    try {
-      const apiMessages = messages.filter((m) => m.role === "user" || m.role === "assistant").slice(-10).map((m) => ({ role: m.role, content: m.content }));
-      let finalContent = content;
-      if (screenWatch && screenContextRef.current) {
-        finalContent = `[Screen context: ${screenContextRef.current}]
-${content}`;
-      }
-      apiMessages.push({ role: "user", content: finalContent });
-      const response = await chat(apiMessages);
-      setMessages(
-        (prev) => prev.map(
-          (m) => m.id === assistantId ? { ...m, content: response.content || "(no response)", isStreaming: false } : m
-        )
-      );
-    } catch (err2) {
-      const errorMsg = err2 instanceof Error ? err2.message : "Unknown error";
-      setError(errorMsg);
-      setMessages(
-        (prev) => prev.map(
-          (m) => m.id === assistantId ? { ...m, content: `Error: ${errorMsg}`, isStreaming: false } : m
-        )
-      );
-    } finally {
-      setIsProcessing(false);
+    return { role: m.role, content: m.content };
+  });
+  const response = await fetch("https://api.anthropic.com/v1/messages", {
+    method: "POST",
+    headers: {
+      "x-api-key": apiKey,
+      "anthropic-version": "2023-06-01",
+      "Content-Type": "application/json"
+    },
+    body: JSON.stringify({
+      model: "claude-3-5-sonnet-20241022",
+      max_tokens: 2048,
+      system: systemPrompt,
+      messages: visionMessages
+    })
+  });
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`Anthropic vision error: ${error}`);
+  }
+  const data = await response.json();
+  return { content: data.content?.[0]?.text || "", model: "claude-3-5-sonnet-20241022" };
+}
+async function chatWithVisionOpenAI(messages, screenshot, systemPrompt) {
+  const apiKey = getApiKey("openai");
+  if (!apiKey) throw new Error("OpenAI API key not configured");
+  const lastUserIdx = messages.length - 1;
+  const visionMessages = messages.map((m, i) => {
+    if (i === lastUserIdx && m.role === "user") {
+      return {
+        role: "user",
+        content: [
+          { type: "text", text: m.content },
+          { type: "image_url", image_url: { url: `data:image/png;base64,${screenshot}` } }
+        ]
+      };
     }
-  }, [messages, isProcessing, screenWatch]);
-  const clearMessages = useCallback(() => {
-    setMessages([WELCOME_MESSAGE]);
-    setError(null);
-  }, []);
-  return {
-    messages,
-    isProcessing,
-    error,
-    sendMessage,
-    addSystemMessage,
-    clearMessages
-  };
+    return m;
+  });
+  const response = await fetch("https://api.openai.com/v1/chat/completions", {
+    method: "POST",
+    headers: {
+      "Authorization": `Bearer ${apiKey}`,
+      "Content-Type": "application/json"
+    },
+    body: JSON.stringify({
+      model: "gpt-4o",
+      messages: [{ role: "system", content: systemPrompt }, ...visionMessages],
+      max_tokens: 2048
+    })
+  });
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`OpenAI vision error: ${error}`);
+  }
+  const data = await response.json();
+  return { content: data.choices?.[0]?.message?.content || "", model: "gpt-4o" };
 }
-// src/hooks/useVision.ts
-import { useState as useState4, useCallback as useCallback2 } from "react";
 // src/lib/vision.ts
 async function describeScreen() {
   const screenshot = await captureScreenshot();
@@ -893,17 +922,17 @@ async function captureScreenshot() {
   }
 }
 async function captureScreenFallback() {
-  const { exec: exec7 } = await import("child_process");
-  const { promisify: promisify7 } = await import("util");
+  const { exec: exec6 } = await import("child_process");
+  const { promisify: promisify6 } = await import("util");
   const { tmpdir } = await import("os");
   const { join: join2 } = await import("path");
   const { readFile, unlink } = await import("fs/promises");
-  const execAsync7 = promisify7(exec7);
+  const execAsync6 = promisify6(exec6);
   const tempFile = join2(tmpdir(), `cnapse-screen-${Date.now()}.png`);
   try {
     const platform = process.platform;
     if (platform === "win32") {
-      await execAsync7(`
+      await execAsync6(`
         Add-Type -AssemblyName System.Windows.Forms
         $screen = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds
         $bitmap = New-Object System.Drawing.Bitmap($screen.Width, $screen.Height)
@@ -914,9 +943,9 @@ async function captureScreenFallback() {
         $bitmap.Dispose()
       `, { shell: "powershell.exe" });
     } else if (platform === "darwin") {
-      await execAsync7(`screencapture -x "${tempFile}"`);
+      await execAsync6(`screencapture -x "${tempFile}"`);
     } else {
-      await execAsync7(`gnome-screenshot -f "${tempFile}" 2>/dev/null || scrot "${tempFile}" 2>/dev/null || import -window root "${tempFile}"`);
+      await execAsync6(`gnome-screenshot -f "${tempFile}" 2>/dev/null || scrot "${tempFile}" 2>/dev/null || import -window root "${tempFile}"`);
     }
     const imageBuffer = await readFile(tempFile);
     await unlink(tempFile).catch(() => {
@@ -1077,7 +1106,98 @@ async function analyzeWithOpenAI(base64Image, prompt) {
   return data.choices?.[0]?.message?.content || "Unable to analyze image";
 }
+// src/hooks/useChat.ts
+var WELCOME_MESSAGE = {
+  id: "0",
+  role: "system",
+  content: "Welcome to C-napse! Type your message and press Enter.\n\nShortcuts: Ctrl+H for help, Ctrl+P for provider",
+  timestamp: /* @__PURE__ */ new Date()
+};
+function useChat(screenWatch = false) {
+  const [messages, setMessages] = useState3([WELCOME_MESSAGE]);
+  const [isProcessing, setIsProcessing] = useState3(false);
+  const [error, setError] = useState3(null);
+  const screenWatchRef = useRef(screenWatch);
+  useEffect2(() => {
+    screenWatchRef.current = screenWatch;
+  }, [screenWatch]);
+  const addSystemMessage = useCallback((content) => {
+    setMessages((prev) => [
+      ...prev,
+      {
+        id: Date.now().toString(),
+        role: "system",
+        content,
+        timestamp: /* @__PURE__ */ new Date()
+      }
+    ]);
+  }, []);
+  const sendMessage = useCallback(async (content) => {
+    if (!content.trim() || isProcessing) return;
+    setError(null);
+    const userMsg = {
+      id: Date.now().toString(),
+      role: "user",
+      content,
+      timestamp: /* @__PURE__ */ new Date()
+    };
+    const assistantId = (Date.now() + 1).toString();
+    const assistantMsg = {
+      id: assistantId,
+      role: "assistant",
+      content: "",
+      timestamp: /* @__PURE__ */ new Date(),
+      isStreaming: true
+    };
+    setMessages((prev) => [...prev, userMsg, assistantMsg]);
+    setIsProcessing(true);
+    try {
+      const apiMessages = messages.filter((m) => m.role === "user" || m.role === "assistant").slice(-10).map((m) => ({ role: m.role, content: m.content }));
+      apiMessages.push({ role: "user", content });
+      let response;
+      if (screenWatchRef.current) {
+        const screenshot = await captureScreenshot();
+        if (screenshot) {
+          response = await chatWithVision(apiMessages, screenshot);
+        } else {
+          response = await chat(apiMessages);
+        }
+      } else {
+        response = await chat(apiMessages);
+      }
+      setMessages(
+        (prev) => prev.map(
+          (m) => m.id === assistantId ? { ...m, content: response.content || "(no response)", isStreaming: false } : m
+        )
+      );
+    } catch (err2) {
+      const errorMsg = err2 instanceof Error ? err2.message : "Unknown error";
+      setError(errorMsg);
+      setMessages(
+        (prev) => prev.map(
+          (m) => m.id === assistantId ? { ...m, content: `Error: ${errorMsg}`, isStreaming: false } : m
+        )
+      );
+    } finally {
+      setIsProcessing(false);
+    }
+  }, [messages, isProcessing]);
+  const clearMessages = useCallback(() => {
+    setMessages([WELCOME_MESSAGE]);
+    setError(null);
+  }, []);
+  return {
+    messages,
+    isProcessing,
+    error,
+    sendMessage,
+    addSystemMessage,
+    clearMessages
+  };
+}
 // src/hooks/useVision.ts
+import { useState as useState4, useCallback as useCallback2 } from "react";
 function useVision() {
   const [isAnalyzing, setIsAnalyzing] = useState4(false);
   const [lastDescription, setLastDescription] = useState4(null);
@@ -1115,21 +1235,21 @@ import { useState as useState5, useCallback as useCallback3, useEffect as useEff
 import { EventEmitter } from "events";
 // src/tools/shell.ts
-import { exec as exec6 } from "child_process";
-import { promisify as promisify6 } from "util";
+import { exec as exec5 } from "child_process";
+import { promisify as promisify5 } from "util";
 // src/tools/clipboard.ts
 import clipboardy from "clipboardy";
 // src/tools/process.ts
+import { exec as exec3 } from "child_process";
+import { promisify as promisify3 } from "util";
+var execAsync3 = promisify3(exec3);
+// src/tools/computer.ts
 import { exec as exec4 } from "child_process";
 import { promisify as promisify4 } from "util";
 var execAsync4 = promisify4(exec4);
-// src/tools/computer.ts
-import { exec as exec5 } from "child_process";
-import { promisify as promisify5 } from "util";
-var execAsync5 = promisify5(exec5);
 async function clickMouse(button = "left") {
   try {
     if (process.platform === "win32") {
@@ -1139,12 +1259,12 @@ Add-Type -MemberDefinition @"
 public static extern void mouse_event(long dwFlags, long dx, long dy, long cButtons, long dwExtraInfo);
 "@ -Name Mouse -Namespace Win32
 ${button === "left" ? "[Win32.Mouse]::mouse_event(0x02, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x04, 0, 0, 0, 0)" : button === "right" ? "[Win32.Mouse]::mouse_event(0x08, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x10, 0, 0, 0, 0)" : "[Win32.Mouse]::mouse_event(0x20, 0, 0, 0, 0); [Win32.Mouse]::mouse_event(0x40, 0, 0, 0, 0)"}`;
-      await execAsync5(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
+      await execAsync4(`powershell -Command "${script.replace(/\n/g, " ")}"`, { shell: "cmd.exe" });
     } else if (process.platform === "darwin") {
-      await execAsync5(`cliclick c:.`);
+      await execAsync4(`cliclick c:.`);
     } else {
       const btn = button === "left" ? "1" : button === "right" ? "3" : "2";
-      await execAsync5(`xdotool click ${btn}`);
+      await execAsync4(`xdotool click ${btn}`);
     }
     return ok(`Clicked ${button} button`);
   } catch (error) {
@@ -1155,13 +1275,13 @@ async function typeText(text) {
   try {
     if (process.platform === "win32") {
       const escapedText = text.replace(/'/g, "''").replace(/[+^%~(){}[\]]/g, "{$&}");
-      await execAsync5(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${escapedText}')"`, { shell: "cmd.exe" });
+      await execAsync4(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${escapedText}')"`, { shell: "cmd.exe" });
     } else if (process.platform === "darwin") {
       const escaped = text.replace(/'/g, "'\\''");
-      await execAsync5(`osascript -e 'tell application "System Events" to keystroke "${escaped}"'`);
+      await execAsync4(`osascript -e 'tell application "System Events" to keystroke "${escaped}"'`);
     } else {
       const escaped = text.replace(/'/g, "'\\''");
-      await execAsync5(`xdotool type '${escaped}'`);
+      await execAsync4(`xdotool type '${escaped}'`);
     }
     return ok(`Typed: ${text}`);
   } catch (error) {
@@ -1202,7 +1322,7 @@ async function pressKey(key) {
         "f12": "{F12}"
       };
       const winKey = winKeyMap[key.toLowerCase()] || key;
-      await execAsync5(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${winKey}')"`, { shell: "cmd.exe" });
+      await execAsync4(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${winKey}')"`, { shell: "cmd.exe" });
     } else if (process.platform === "darwin") {
       const macKeyMap = {
         "return": 36,
@@ -1220,12 +1340,12 @@ async function pressKey(key) {
       };
       const keyCode = macKeyMap[key.toLowerCase()];
       if (keyCode) {
-        await execAsync5(`osascript -e 'tell application "System Events" to key code ${keyCode}'`);
+        await execAsync4(`osascript -e 'tell application "System Events" to key code ${keyCode}'`);
       } else {
-        await execAsync5(`osascript -e 'tell application "System Events" to keystroke "${key}"'`);
+        await execAsync4(`osascript -e 'tell application "System Events" to keystroke "${key}"'`);
       }
     } else {
-      await execAsync5(`xdotool key ${key}`);
+      await execAsync4(`xdotool key ${key}`);
     }
     return ok(`Pressed: ${key}`);
   } catch (error) {
@@ -1238,7 +1358,7 @@ async function keyCombo(keys) {
       const hasWin = keys.some((k) => k.toLowerCase() === "meta" || k.toLowerCase() === "win");
       const hasR = keys.some((k) => k.toLowerCase() === "r");
       if (hasWin && hasR) {
-        await execAsync5(`powershell -Command "$shell = New-Object -ComObject WScript.Shell; $shell.Run('explorer shell:::{2559a1f3-21d7-11d4-bdaf-00c04f60b9f0}')"`, { shell: "cmd.exe" });
+        await execAsync4(`powershell -Command "$shell = New-Object -ComObject WScript.Shell; $shell.Run('explorer shell:::{2559a1f3-21d7-11d4-bdaf-00c04f60b9f0}')"`, { shell: "cmd.exe" });
         return ok(`Pressed: ${keys.join("+")}`);
       }
       const modifierMap = {
@@ -1258,7 +1378,7 @@ async function keyCombo(keys) {
         }
       }
       combo += regularKeys.join("");
-      await execAsync5(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${combo}')"`, { shell: "cmd.exe" });
+      await execAsync4(`powershell -Command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${combo}')"`, { shell: "cmd.exe" });
     } else if (process.platform === "darwin") {
       const modifiers = keys.filter((k) => ["control", "ctrl", "alt", "shift", "command", "meta"].includes(k.toLowerCase()));
       const regular = keys.filter((k) => !["control", "ctrl", "alt", "shift", "command", "meta"].includes(k.toLowerCase()));
@@ -1274,9 +1394,9 @@ async function keyCombo(keys) {
         };
         cmd += " using {" + modifiers.map((m) => modMap[m.toLowerCase()]).join(", ") + "}";
       }
-      await execAsync5(`osascript -e '${cmd}'`);
+      await execAsync4(`osascript -e '${cmd}'`);
     } else {
-      await execAsync5(`xdotool key ${keys.join("+")}`);
+      await execAsync4(`xdotool key ${keys.join("+")}`);
     }
     return ok(`Pressed: ${keys.join("+")}`);
   } catch (error) {
@@ -1287,11 +1407,11 @@ async function focusWindow(title) {
   try {
     if (process.platform === "win32") {
       const escaped = title.replace(/'/g, "''");
-      await execAsync5(`powershell -Command "$wshell = New-Object -ComObject wscript.shell; $wshell.AppActivate('${escaped}')"`, { shell: "cmd.exe" });
+      await execAsync4(`powershell -Command "$wshell = New-Object -ComObject wscript.shell; $wshell.AppActivate('${escaped}')"`, { shell: "cmd.exe" });
     } else if (process.platform === "darwin") {
-      await execAsync5(`osascript -e 'tell application "${title}" to activate'`);
+      await execAsync4(`osascript -e 'tell application "${title}" to activate'`);
     } else {
-      await execAsync5(`wmctrl -a "${title}"`);
+      await execAsync4(`wmctrl -a "${title}"`);
     }
     return ok(`Focused window: ${title}`);
   } catch (error) {
@@ -1308,13 +1428,13 @@ function err(error) {
 }
 // src/tools/shell.ts
-var execAsync6 = promisify6(exec6);
+var execAsync5 = promisify5(exec5);
 async function runCommand(cmd, timeout = 3e4) {
   try {
     const isWindows = process.platform === "win32";
     const shell = isWindows ? "cmd.exe" : "/bin/sh";
     const shellArg = isWindows ? "/C" : "-c";
-    const { stdout, stderr } = await execAsync6(cmd, {
+    const { stdout, stderr } = await execAsync5(cmd, {
       shell,
       timeout,
       maxBuffer: 10 * 1024 * 1024

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@projectservan8n/cnapse",
-  "version": "0.5.6",
+  "version": "0.5.7",
   "description": "Autonomous PC intelligence - AI assistant for desktop automation",
   "type": "module",
   "main": "dist/index.js",

package/src/hooks/useChat.ts CHANGED Viewed

@@ -3,8 +3,8 @@
  */
 import { useState, useCallback, useRef, useEffect } from 'react';
-import { chat, Message } from '../lib/api.js';
-import { getScreenDescription } from '../lib/screen.js';
+import { chat, chatWithVision, Message } from '../lib/api.js';
+import { captureScreenshot } from '../lib/vision.js';
 export interface ChatMessage {
   id: string;
@@ -34,25 +34,11 @@ export function useChat(screenWatch: boolean = false): UseChatResult {
   const [messages, setMessages] = useState<ChatMessage[]>([WELCOME_MESSAGE]);
   const [isProcessing, setIsProcessing] = useState(false);
   const [error, setError] = useState<string | null>(null);
-  const screenContextRef = useRef<string | null>(null);
+  const screenWatchRef = useRef(screenWatch);
-  // Screen watching effect
+  // Keep ref in sync with prop
   useEffect(() => {
-    if (!screenWatch) {
-      screenContextRef.current = null;
-      return;
-    }
-    const checkScreen = async () => {
-      const desc = await getScreenDescription();
-      if (desc) {
-        screenContextRef.current = desc;
-      }
-    };
-    checkScreen();
-    const interval = setInterval(checkScreen, 5000);
-    return () => clearInterval(interval);
+    screenWatchRef.current = screenWatch;
   }, [screenWatch]);
   const addSystemMessage = useCallback((content: string) => {
@@ -100,16 +86,23 @@ export function useChat(screenWatch: boolean = false): UseChatResult {
         .slice(-10)
         .map(m => ({ role: m.role as 'user' | 'assistant', content: m.content }));
-      // Add screen context if watching
-      let finalContent = content;
-      if (screenWatch && screenContextRef.current) {
-        finalContent = `[Screen context: ${screenContextRef.current}]\n\n${content}`;
+      apiMessages.push({ role: 'user', content });
+      let response;
+      // If screen watching is enabled, capture screenshot and use vision API
+      if (screenWatchRef.current) {
+        const screenshot = await captureScreenshot();
+        if (screenshot) {
+          response = await chatWithVision(apiMessages, screenshot);
+        } else {
+          // Fallback to regular chat if screenshot fails
+          response = await chat(apiMessages);
+        }
+      } else {
+        response = await chat(apiMessages);
       }
-      apiMessages.push({ role: 'user', content: finalContent });
-      const response = await chat(apiMessages);
       // Update assistant message
       setMessages(prev =>
         prev.map(m =>
@@ -131,7 +124,7 @@ export function useChat(screenWatch: boolean = false): UseChatResult {
     } finally {
       setIsProcessing(false);
     }
-  }, [messages, isProcessing, screenWatch]);
+  }, [messages, isProcessing]);
   const clearMessages = useCallback(() => {
     setMessages([WELCOME_MESSAGE]);

package/src/lib/api.ts CHANGED Viewed

@@ -184,6 +184,194 @@ async function chatOpenAI(messages: Message[], model: string): Promise<ChatRespo
   return { content, model };
 }
+/**
+ * Chat with vision - sends screenshot along with messages
+ */
+export async function chatWithVision(messages: Message[], screenshotBase64: string): Promise<ChatResponse> {
+  const config = getConfig();
+  const systemPrompt = await getSystemPrompt();
+  // Add vision context to system prompt
+  const visionPrompt = systemPrompt + '\n\nYou can see the user\'s screen. Describe what you see and help them with their request.';
+  switch (config.provider) {
+    case 'openrouter':
+      return chatWithVisionOpenRouter(messages, screenshotBase64, visionPrompt);
+    case 'ollama':
+      return chatWithVisionOllama(messages, screenshotBase64, visionPrompt);
+    case 'anthropic':
+      return chatWithVisionAnthropic(messages, screenshotBase64, visionPrompt);
+    case 'openai':
+      return chatWithVisionOpenAI(messages, screenshotBase64, visionPrompt);
+    default:
+      throw new Error(`Vision not supported for provider: ${config.provider}`);
+  }
+}
+async function chatWithVisionOpenRouter(messages: Message[], screenshot: string, systemPrompt: string): Promise<ChatResponse> {
+  const apiKey = getApiKey('openrouter');
+  if (!apiKey) throw new Error('OpenRouter API key not configured');
+  const config = getConfig();
+  // Use vision-capable model - prefer GPT-5 Nano or Claude
+  let model = config.model;
+  if (!model.includes('gpt-5') && !model.includes('claude') && !model.includes('gemini')) {
+    model = 'openai/gpt-5-nano'; // Default to GPT-5 Nano for vision
+  }
+  // Build messages with image in the last user message
+  const lastUserIdx = messages.length - 1;
+  const visionMessages = messages.map((m, i) => {
+    if (i === lastUserIdx && m.role === 'user') {
+      return {
+        role: 'user',
+        content: [
+          { type: 'text', text: m.content },
+          { type: 'image_url', image_url: { url: `data:image/png;base64,${screenshot}` } },
+        ],
+      };
+    }
+    return m;
+  });
+  const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
+    method: 'POST',
+    headers: {
+      'Authorization': `Bearer ${apiKey}`,
+      'Content-Type': 'application/json',
+      'HTTP-Referer': config.openrouter.siteUrl,
+      'X-Title': config.openrouter.appName,
+    },
+    body: JSON.stringify({
+      model,
+      messages: [{ role: 'system', content: systemPrompt }, ...visionMessages],
+      max_tokens: 2048,
+    }),
+  });
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`OpenRouter vision error: ${response.status} - ${error}`);
+  }
+  const data = await response.json() as any;
+  return { content: data.choices?.[0]?.message?.content || '', model };
+}
+async function chatWithVisionOllama(messages: Message[], screenshot: string, systemPrompt: string): Promise<ChatResponse> {
+  const config = getConfig();
+  // Use vision model
+  const visionModels = ['llava', 'llama3.2-vision', 'bakllava'];
+  const model = visionModels.find(m => config.model.includes(m)) || 'llava';
+  const lastUserMsg = messages.filter(m => m.role === 'user').pop();
+  const response = await fetch(`${config.ollamaHost}/api/generate`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      model,
+      prompt: `${systemPrompt}\n\nUser: ${lastUserMsg?.content || 'What do you see?'}`,
+      images: [screenshot],
+      stream: false,
+    }),
+  });
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`Ollama vision error: ${error}`);
+  }
+  const data = await response.json() as any;
+  return { content: data.response || '', model };
+}
+async function chatWithVisionAnthropic(messages: Message[], screenshot: string, systemPrompt: string): Promise<ChatResponse> {
+  const apiKey = getApiKey('anthropic');
+  if (!apiKey) throw new Error('Anthropic API key not configured');
+  const chatMessages = messages.filter(m => m.role !== 'system');
+  const lastUserIdx = chatMessages.length - 1;
+  const visionMessages = chatMessages.map((m, i) => {
+    if (i === lastUserIdx && m.role === 'user') {
+      return {
+        role: 'user',
+        content: [
+          { type: 'image', source: { type: 'base64', media_type: 'image/png', data: screenshot } },
+          { type: 'text', text: m.content },
+        ],
+      };
+    }
+    return { role: m.role, content: m.content };
+  });
+  const response = await fetch('https://api.anthropic.com/v1/messages', {
+    method: 'POST',
+    headers: {
+      'x-api-key': apiKey,
+      'anthropic-version': '2023-06-01',
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      model: 'claude-3-5-sonnet-20241022',
+      max_tokens: 2048,
+      system: systemPrompt,
+      messages: visionMessages,
+    }),
+  });
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`Anthropic vision error: ${error}`);
+  }
+  const data = await response.json() as any;
+  return { content: data.content?.[0]?.text || '', model: 'claude-3-5-sonnet-20241022' };
+}
+async function chatWithVisionOpenAI(messages: Message[], screenshot: string, systemPrompt: string): Promise<ChatResponse> {
+  const apiKey = getApiKey('openai');
+  if (!apiKey) throw new Error('OpenAI API key not configured');
+  const lastUserIdx = messages.length - 1;
+  const visionMessages = messages.map((m, i) => {
+    if (i === lastUserIdx && m.role === 'user') {
+      return {
+        role: 'user',
+        content: [
+          { type: 'text', text: m.content },
+          { type: 'image_url', image_url: { url: `data:image/png;base64,${screenshot}` } },
+        ],
+      };
+    }
+    return m;
+  });
+  const response = await fetch('https://api.openai.com/v1/chat/completions', {
+    method: 'POST',
+    headers: {
+      'Authorization': `Bearer ${apiKey}`,
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      model: 'gpt-4o',
+      messages: [{ role: 'system', content: systemPrompt }, ...visionMessages],
+      max_tokens: 2048,
+    }),
+  });
+  if (!response.ok) {
+    const error = await response.text();
+    throw new Error(`OpenAI vision error: ${error}`);
+  }
+  const data = await response.json() as any;
+  return { content: data.choices?.[0]?.message?.content || '', model: 'gpt-4o' };
+}
 export async function testConnection(): Promise<boolean> {
   try {
     await chat([{ role: 'user', content: 'hi' }]);