npm - @projectservan8n/cnapse - Versions diffs - 0.6.1 → 0.6.3 - Mend

@projectservan8n/cnapse 0.6.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js CHANGED Viewed

@@ -925,10 +925,10 @@ async function captureScreenFallback() {
   const { exec: exec6 } = await import("child_process");
   const { promisify: promisify6 } = await import("util");
   const { tmpdir } = await import("os");
-  const { join: join2 } = await import("path");
-  const { readFile, unlink } = await import("fs/promises");
+  const { join: join3 } = await import("path");
+  const { readFile: readFile2, unlink } = await import("fs/promises");
   const execAsync6 = promisify6(exec6);
-  const tempFile = join2(tmpdir(), `cnapse-screen-${Date.now()}.png`);
+  const tempFile = join3(tmpdir(), `cnapse-screen-${Date.now()}.png`);
   try {
     const platform = process.platform;
     if (platform === "win32") {
@@ -947,7 +947,7 @@ async function captureScreenFallback() {
     } else {
       await execAsync6(`gnome-screenshot -f "${tempFile}" 2>/dev/null || scrot "${tempFile}" 2>/dev/null || import -window root "${tempFile}"`);
     }
-    const imageBuffer = await readFile(tempFile);
+    const imageBuffer = await readFile2(tempFile);
     await unlink(tempFile).catch(() => {
     });
     return imageBuffer.toString("base64");
@@ -1238,6 +1238,56 @@ import { EventEmitter } from "events";
 import { exec as exec5 } from "child_process";
 import { promisify as promisify5 } from "util";
+// src/tools/filesystem.ts
+import { promises as fs } from "fs";
+import { join, dirname } from "path";
+async function readFile(path2) {
+  try {
+    const content = await fs.readFile(path2, "utf-8");
+    return ok(content);
+  } catch (error) {
+    return err(`Failed to read file: ${error.message}`);
+  }
+}
+async function writeFile(path2, content) {
+  try {
+    const dir = dirname(path2);
+    await fs.mkdir(dir, { recursive: true });
+    await fs.writeFile(path2, content, "utf-8");
+    return ok(`Written ${content.length} bytes to ${path2}`);
+  } catch (error) {
+    return err(`Failed to write file: ${error.message}`);
+  }
+}
+async function listDir(path2, recursive = false) {
+  try {
+    const stat = await fs.stat(path2);
+    if (!stat.isDirectory()) {
+      return err(`Not a directory: ${path2}`);
+    }
+    const entries = [];
+    async function walkDir(dir, prefix) {
+      const items = await fs.readdir(dir, { withFileTypes: true });
+      for (const item of items) {
+        const displayPath = prefix ? `${prefix}/${item.name}` : item.name;
+        if (item.isDirectory()) {
+          entries.push(`${displayPath}/`);
+          if (recursive) {
+            await walkDir(join(dir, item.name), displayPath);
+          }
+        } else {
+          entries.push(displayPath);
+        }
+      }
+    }
+    await walkDir(path2, "");
+    entries.sort();
+    return ok(entries.join("\n"));
+  } catch (error) {
+    return err(`Failed to list directory: ${error.message}`);
+  }
+}
 // src/tools/clipboard.ts
 import clipboardy from "clipboardy";
@@ -1748,6 +1798,65 @@ ${stderr}`
 }
 // src/services/telegram.ts
+function formatForTelegram(text) {
+  const hasMarkdown = /[*_`\[\]()]/.test(text);
+  if (!hasMarkdown) {
+    return { text, parseMode: void 0 };
+  }
+  try {
+    let formatted = text;
+    const escapeChars = ["\\", "_", "*", "[", "]", "(", ")", "~", "`", ">", "#", "+", "-", "=", "|", "{", "}", ".", "!"];
+    const placeholders = [];
+    let placeholderIndex = 0;
+    formatted = formatted.replace(/```([\s\S]*?)```/g, (match, code) => {
+      const placeholder = `__CODEBLOCK_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: "```" + code.replace(/\\/g, "\\\\") + "```" });
+      return placeholder;
+    });
+    formatted = formatted.replace(/`([^`]+)`/g, (match, code) => {
+      const placeholder = `__INLINECODE_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: "`" + code.replace(/\\/g, "\\\\") + "`" });
+      return placeholder;
+    });
+    formatted = formatted.replace(/\*\*(.+?)\*\*/g, (match, text2) => {
+      const placeholder = `__BOLD_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: "*" + text2 + "*" });
+      return placeholder;
+    });
+    formatted = formatted.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, (match, text2) => {
+      const placeholder = `__ITALIC_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: "_" + text2 + "_" });
+      return placeholder;
+    });
+    formatted = formatted.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text2, url) => {
+      const placeholder = `__LINK_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: "[" + text2 + "](" + url + ")" });
+      return placeholder;
+    });
+    for (const char of escapeChars) {
+      if (char === "\\") continue;
+      formatted = formatted.split(char).join("\\" + char);
+    }
+    for (const { placeholder, original } of placeholders) {
+      formatted = formatted.replace(placeholder, original);
+    }
+    return { text: formatted, parseMode: "MarkdownV2" };
+  } catch {
+    return { text, parseMode: void 0 };
+  }
+}
+async function sendFormattedMessage(ctx, text) {
+  const { text: formatted, parseMode } = formatForTelegram(text);
+  try {
+    if (parseMode) {
+      await ctx.reply(formatted, { parse_mode: parseMode });
+    } else {
+      await ctx.reply(text);
+    }
+  } catch {
+    await ctx.reply(text);
+  }
+}
 var TelegramBotService = class extends EventEmitter {
   bot = null;
   isRunning = false;
@@ -1931,7 +2040,7 @@ ${result.error}
         await ctx.sendChatAction("typing");
         const computerControlResult = await this.tryComputerControl(userText);
         if (computerControlResult) {
-          await ctx.reply(computerControlResult);
+          await sendFormattedMessage(ctx, computerControlResult);
           history.push({ role: "assistant", content: computerControlResult });
           return;
         }
@@ -1952,10 +2061,10 @@ ${result.error}
         if (responseText.length > 4e3) {
           const chunks = responseText.match(/.{1,4000}/gs) || [responseText];
           for (const chunk of chunks) {
-            await ctx.reply(chunk);
+            await sendFormattedMessage(ctx, chunk);
           }
         } else {
-          await ctx.reply(responseText);
+          await sendFormattedMessage(ctx, responseText);
         }
       } catch (error) {
         const errorMsg = error instanceof Error ? error.message : "Unknown error";
@@ -2150,14 +2259,14 @@ function useTelegram(onMessage) {
 import { useState as useState6, useCallback as useCallback4 } from "react";
 // src/lib/tasks.ts
-import * as fs from "fs";
+import * as fs2 from "fs";
 import * as path from "path";
 import * as os2 from "os";
 var TASK_MEMORY_FILE = path.join(os2.homedir(), ".cnapse", "task-memory.json");
 function loadTaskMemory() {
   try {
-    if (fs.existsSync(TASK_MEMORY_FILE)) {
-      const data = fs.readFileSync(TASK_MEMORY_FILE, "utf-8");
+    if (fs2.existsSync(TASK_MEMORY_FILE)) {
+      const data = fs2.readFileSync(TASK_MEMORY_FILE, "utf-8");
       return JSON.parse(data);
     }
   } catch {
@@ -2184,10 +2293,10 @@ function saveTaskPattern(input, steps) {
     }
     memory.patterns = memory.patterns.sort((a, b) => b.successCount - a.successCount).slice(0, 100);
     const dir = path.dirname(TASK_MEMORY_FILE);
-    if (!fs.existsSync(dir)) {
-      fs.mkdirSync(dir, { recursive: true });
+    if (!fs2.existsSync(dir)) {
+      fs2.mkdirSync(dir, { recursive: true });
     }
-    fs.writeFileSync(TASK_MEMORY_FILE, JSON.stringify(memory, null, 2));
+    fs2.writeFileSync(TASK_MEMORY_FILE, JSON.stringify(memory, null, 2));
   } catch {
   }
 }
@@ -2247,14 +2356,36 @@ Before outputting steps, THINK through these questions:
    - Typing too fast -> add small waits
 ## AVAILABLE ACTIONS
+### App Control
 - open_app: Open app via Run dialog (e.g., "open_app:notepad", "open_app:code", "open_app:chrome")
+- open_folder: Open VS Code with folder (e.g., "open_folder:E:/MyProject")
+- focus_window: Focus by title (e.g., "focus_window:Notepad")
+### Input
 - type_text: Type text string (e.g., "type_text:Hello World")
 - press_key: Single key (e.g., "press_key:enter", "press_key:escape", "press_key:tab")
 - key_combo: Key combination (e.g., "key_combo:control+s", "key_combo:alt+f4", "key_combo:meta+r")
 - click: Mouse click (e.g., "click:left", "click:right")
+### File Operations
+- read_file: Read file contents (e.g., "read_file:E:/test/index.html")
+- write_file: Write content to file (e.g., "write_file:E:/test/output.txt|Hello World")
+- list_files: List files in directory (e.g., "list_files:E:/test")
+### AI Coding
+- generate_code: AI generates code based on description (e.g., "generate_code:E:/test/index.html|create an HTML page with input on left, output on right")
+- edit_code: AI modifies existing code (e.g., "edit_code:E:/test/app.js|add error handling to the fetch calls")
+### Web Browsing
+- open_url: Open URL in default browser (e.g., "open_url:https://perplexity.ai")
+- browse_and_ask: Open AI website, type question, wait for response (e.g., "browse_and_ask:perplexity|What is the capital of France?")
+- browse_and_ask: Supports: perplexity, chatgpt, claude, google
+### Utility
 - wait: Wait N seconds (e.g., "wait:2" - use 1-3s for app loads)
-- focus_window: Focus by title (e.g., "focus_window:Notepad")
 - screenshot: Capture and describe screen
+- shell: Run shell command (e.g., "shell:npm install")
 ${learnedExamples}
 ## EXAMPLES WITH REASONING
@@ -2297,6 +2428,59 @@ Output:
   { "description": "Close active window with Alt+F4", "action": "key_combo:alt+f4" }
 ]
+### Example 4: "open folder E:/Test in vscode and create an HTML editor"
+Thinking:
+- Goal: Open VS Code with folder, then create/edit HTML file to be an editor
+- How: Use open_folder to launch VS Code with the folder, then use AI to generate code
+- Sequence: Open folder -> List files to see what exists -> Generate/edit the HTML
+- Edge case: File might not exist yet
+Output:
+[
+  { "description": "Open VS Code with the Test folder", "action": "open_folder:E:/Test" },
+  { "description": "Wait for VS Code to load", "action": "wait:3" },
+  { "description": "List files in the folder", "action": "list_files:E:/Test" },
+  { "description": "Generate HTML editor code", "action": "generate_code:E:/Test/editor.html|Create an HTML page with a code editor layout: textarea input on the left side, live preview output on the right side. Include basic CSS for split layout and JavaScript to update preview on input." }
+]
+### Example 5: "read the config.json and add a new setting"
+Thinking:
+- Goal: Read existing file, understand it, modify it
+- How: read_file to get contents, then edit_code to modify
+- Sequence: Read first, then edit
+Output:
+[
+  { "description": "Read the config file", "action": "read_file:config.json" },
+  { "description": "Add new setting to config", "action": "edit_code:config.json|add a new setting called 'darkMode' with value true" }
+]
+### Example 6: "ask perplexity what is the best programming language"
+Thinking:
+- Goal: Open Perplexity AI in browser and ask a question
+- How: Use browse_and_ask with perplexity target
+- Sequence: Open site -> type question -> wait for response -> screenshot result
+Output:
+[
+  { "description": "Ask Perplexity the question", "action": "browse_and_ask:perplexity|what is the best programming language" },
+  { "description": "Wait for response to generate", "action": "wait:5" },
+  { "description": "Capture the response", "action": "screenshot" }
+]
+### Example 7: "search google for weather today"
+Thinking:
+- Goal: Open Google and search for something
+- How: Use browse_and_ask with google target
+- Sequence: Open Google, search, capture results
+Output:
+[
+  { "description": "Search Google", "action": "browse_and_ask:google|weather today" },
+  { "description": "Wait for results", "action": "wait:2" },
+  { "description": "Capture search results", "action": "screenshot" }
+]
 ## YOUR TASK
 Now parse this request: "${input}"
@@ -2386,6 +2570,168 @@ async function executeStep(step) {
       await focusWindow(params);
       step.result = `Focused window: ${params}`;
       break;
+    case "open_folder":
+      await runCommand(`code "${params}"`, 1e4);
+      step.result = `Opened VS Code with folder: ${params}`;
+      break;
+    case "read_file": {
+      const readResult = await readFile(params);
+      if (readResult.success) {
+        step.result = readResult.output;
+      } else {
+        throw new Error(readResult.error || "Failed to read file");
+      }
+      break;
+    }
+    case "write_file": {
+      const [filePath, ...contentParts] = params.split("|");
+      const content = contentParts.join("|");
+      const writeResult = await writeFile(filePath, content);
+      if (writeResult.success) {
+        step.result = `Written to ${filePath}`;
+      } else {
+        throw new Error(writeResult.error || "Failed to write file");
+      }
+      break;
+    }
+    case "list_files": {
+      const listResult = await listDir(params, false);
+      if (listResult.success) {
+        step.result = listResult.output;
+      } else {
+        throw new Error(listResult.error || "Failed to list files");
+      }
+      break;
+    }
+    case "generate_code": {
+      const [codePath, ...descParts] = params.split("|");
+      const codeDescription = descParts.join("|");
+      const codePrompt = `Generate complete, working code for this request. Output ONLY the code, no explanations or markdown:
+Request: ${codeDescription}
+File: ${codePath}`;
+      const codeResponse = await chat([{ role: "user", content: codePrompt }]);
+      let generatedCode = codeResponse.content;
+      generatedCode = generatedCode.replace(/^```[\w]*\n?/gm, "").replace(/\n?```$/gm, "").trim();
+      const genResult = await writeFile(codePath, generatedCode);
+      if (genResult.success) {
+        step.result = `Generated and saved code to ${codePath}`;
+      } else {
+        throw new Error(genResult.error || "Failed to write generated code");
+      }
+      break;
+    }
+    case "edit_code": {
+      const [editPath, ...instrParts] = params.split("|");
+      const instructions = instrParts.join("|");
+      const existingResult = await readFile(editPath);
+      if (!existingResult.success) {
+        throw new Error(`Cannot read file: ${existingResult.error}`);
+      }
+      const editPrompt = `Edit this code according to the instructions. Output ONLY the complete modified code, no explanations or markdown:
+Instructions: ${instructions}
+Current code:
+${existingResult.output}`;
+      const editResponse = await chat([{ role: "user", content: editPrompt }]);
+      let editedCode = editResponse.content;
+      editedCode = editedCode.replace(/^```[\w]*\n?/gm, "").replace(/\n?```$/gm, "").trim();
+      const editWriteResult = await writeFile(editPath, editedCode);
+      if (editWriteResult.success) {
+        step.result = `Edited and saved ${editPath}`;
+      } else {
+        throw new Error(editWriteResult.error || "Failed to write edited code");
+      }
+      break;
+    }
+    case "shell": {
+      const shellResult = await runCommand(params, 3e4);
+      if (shellResult.success) {
+        step.result = shellResult.output || "Command completed";
+      } else {
+        throw new Error(shellResult.error || "Command failed");
+      }
+      break;
+    }
+    case "open_url": {
+      const url = params.startsWith("http") ? params : `https://${params}`;
+      if (process.platform === "win32") {
+        await runCommand(`start "" "${url}"`, 5e3);
+      } else if (process.platform === "darwin") {
+        await runCommand(`open "${url}"`, 5e3);
+      } else {
+        await runCommand(`xdg-open "${url}"`, 5e3);
+      }
+      step.result = `Opened ${url} in browser`;
+      break;
+    }
+    case "browse_and_ask": {
+      const [site, ...questionParts] = params.split("|");
+      const question = questionParts.join("|");
+      const sites = {
+        perplexity: { url: "https://www.perplexity.ai", loadTime: 3, responseTime: 10 },
+        chatgpt: { url: "https://chat.openai.com", loadTime: 4, responseTime: 15 },
+        claude: { url: "https://claude.ai", loadTime: 4, responseTime: 15 },
+        google: { url: "https://www.google.com", loadTime: 2, responseTime: 3 },
+        bing: { url: "https://www.bing.com", loadTime: 2, responseTime: 3 },
+        bard: { url: "https://bard.google.com", loadTime: 3, responseTime: 12 },
+        copilot: { url: "https://copilot.microsoft.com", loadTime: 3, responseTime: 12 }
+      };
+      const siteConfig = sites[site.toLowerCase()] || { url: `https://${site}`, loadTime: 3, responseTime: 10 };
+      if (process.platform === "win32") {
+        await runCommand(`start "" "${siteConfig.url}"`, 5e3);
+      } else if (process.platform === "darwin") {
+        await runCommand(`open "${siteConfig.url}"`, 5e3);
+      } else {
+        await runCommand(`xdg-open "${siteConfig.url}"`, 5e3);
+      }
+      await sleep(siteConfig.loadTime * 1e3);
+      await typeText(question);
+      await sleep(300);
+      await pressKey("Return");
+      await sleep(siteConfig.responseTime * 1e3);
+      const extractedParts = [];
+      const maxScrolls = 5;
+      for (let scrollIndex = 0; scrollIndex < maxScrolls; scrollIndex++) {
+        const screenResult = await describeScreen();
+        const extractPrompt = `You are looking at screenshot ${scrollIndex + 1} of ${site}. The user asked: "${question}"
+Extract ONLY the AI's response/answer text visible on screen. Do NOT include:
+- The user's question
+- Any UI elements, buttons, navigation, or headers
+- Any disclaimers, suggestions, or "related questions"
+- Any "Sources" or citation links
+- Any text you already extracted (avoid duplicates)
+${scrollIndex > 0 ? `Previous parts already extracted:
+${extractedParts.join("\n---\n")}
+Only extract NEW text that continues from where we left off.` : ""}
+Just give me the actual answer text, word for word as it appears. If there's no more response text visible, respond with exactly: "END_OF_RESPONSE"`;
+        const extractResponse = await chat([{ role: "user", content: extractPrompt }]);
+        const extracted = extractResponse.content.trim();
+        if (extracted === "END_OF_RESPONSE" || extracted.includes("END_OF_RESPONSE")) {
+          break;
+        }
+        if (extracted.toLowerCase().includes("response not ready") || extracted.toLowerCase().includes("no response visible") || extracted.toLowerCase().includes("no additional text")) {
+          if (scrollIndex === 0) {
+            extractedParts.push("Response not ready yet or page still loading.");
+          }
+          break;
+        }
+        extractedParts.push(extracted);
+        await scrollMouse(-5);
+        await sleep(1e3);
+      }
+      const fullResponse = extractedParts.join("\n\n");
+      step.result = `\u{1F4DD} ${site.charAt(0).toUpperCase() + site.slice(1)} says:
+${fullResponse}`;
+      break;
+    }
     case "screenshot":
       const vision = await describeScreen();
       step.result = vision.description;
@@ -2442,8 +2788,8 @@ function getTaskMemoryStats() {
 }
 function clearTaskMemory() {
   try {
-    if (fs.existsSync(TASK_MEMORY_FILE)) {
-      fs.unlinkSync(TASK_MEMORY_FILE);
+    if (fs2.existsSync(TASK_MEMORY_FILE)) {
+      fs2.unlinkSync(TASK_MEMORY_FILE);
     }
   } catch {
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@projectservan8n/cnapse",
-  "version": "0.6.1",
+  "version": "0.6.3",
   "description": "Autonomous PC intelligence - AI assistant for desktop automation",
   "type": "module",
   "main": "dist/index.js",

package/src/lib/tasks.ts CHANGED Viewed

@@ -7,6 +7,8 @@
 import { chat, Message } from './api.js';
 import * as computer from '../tools/computer.js';
 import { describeScreen } from './vision.js';
+import * as filesystem from '../tools/filesystem.js';
+import { runCommand } from '../tools/shell.js';
 import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
@@ -185,14 +187,36 @@ Before outputting steps, THINK through these questions:
    - Typing too fast -> add small waits
 ## AVAILABLE ACTIONS
+### App Control
 - open_app: Open app via Run dialog (e.g., "open_app:notepad", "open_app:code", "open_app:chrome")
+- open_folder: Open VS Code with folder (e.g., "open_folder:E:/MyProject")
+- focus_window: Focus by title (e.g., "focus_window:Notepad")
+### Input
 - type_text: Type text string (e.g., "type_text:Hello World")
 - press_key: Single key (e.g., "press_key:enter", "press_key:escape", "press_key:tab")
 - key_combo: Key combination (e.g., "key_combo:control+s", "key_combo:alt+f4", "key_combo:meta+r")
 - click: Mouse click (e.g., "click:left", "click:right")
+### File Operations
+- read_file: Read file contents (e.g., "read_file:E:/test/index.html")
+- write_file: Write content to file (e.g., "write_file:E:/test/output.txt|Hello World")
+- list_files: List files in directory (e.g., "list_files:E:/test")
+### AI Coding
+- generate_code: AI generates code based on description (e.g., "generate_code:E:/test/index.html|create an HTML page with input on left, output on right")
+- edit_code: AI modifies existing code (e.g., "edit_code:E:/test/app.js|add error handling to the fetch calls")
+### Web Browsing
+- open_url: Open URL in default browser (e.g., "open_url:https://perplexity.ai")
+- browse_and_ask: Open AI website, type question, wait for response (e.g., "browse_and_ask:perplexity|What is the capital of France?")
+- browse_and_ask: Supports: perplexity, chatgpt, claude, google
+### Utility
 - wait: Wait N seconds (e.g., "wait:2" - use 1-3s for app loads)
-- focus_window: Focus by title (e.g., "focus_window:Notepad")
 - screenshot: Capture and describe screen
+- shell: Run shell command (e.g., "shell:npm install")
 ${learnedExamples}
 ## EXAMPLES WITH REASONING
@@ -235,6 +259,59 @@ Output:
   { "description": "Close active window with Alt+F4", "action": "key_combo:alt+f4" }
 ]
+### Example 4: "open folder E:/Test in vscode and create an HTML editor"
+Thinking:
+- Goal: Open VS Code with folder, then create/edit HTML file to be an editor
+- How: Use open_folder to launch VS Code with the folder, then use AI to generate code
+- Sequence: Open folder -> List files to see what exists -> Generate/edit the HTML
+- Edge case: File might not exist yet
+Output:
+[
+  { "description": "Open VS Code with the Test folder", "action": "open_folder:E:/Test" },
+  { "description": "Wait for VS Code to load", "action": "wait:3" },
+  { "description": "List files in the folder", "action": "list_files:E:/Test" },
+  { "description": "Generate HTML editor code", "action": "generate_code:E:/Test/editor.html|Create an HTML page with a code editor layout: textarea input on the left side, live preview output on the right side. Include basic CSS for split layout and JavaScript to update preview on input." }
+]
+### Example 5: "read the config.json and add a new setting"
+Thinking:
+- Goal: Read existing file, understand it, modify it
+- How: read_file to get contents, then edit_code to modify
+- Sequence: Read first, then edit
+Output:
+[
+  { "description": "Read the config file", "action": "read_file:config.json" },
+  { "description": "Add new setting to config", "action": "edit_code:config.json|add a new setting called 'darkMode' with value true" }
+]
+### Example 6: "ask perplexity what is the best programming language"
+Thinking:
+- Goal: Open Perplexity AI in browser and ask a question
+- How: Use browse_and_ask with perplexity target
+- Sequence: Open site -> type question -> wait for response -> screenshot result
+Output:
+[
+  { "description": "Ask Perplexity the question", "action": "browse_and_ask:perplexity|what is the best programming language" },
+  { "description": "Wait for response to generate", "action": "wait:5" },
+  { "description": "Capture the response", "action": "screenshot" }
+]
+### Example 7: "search google for weather today"
+Thinking:
+- Goal: Open Google and search for something
+- How: Use browse_and_ask with google target
+- Sequence: Open Google, search, capture results
+Output:
+[
+  { "description": "Search Google", "action": "browse_and_ask:google|weather today" },
+  { "description": "Wait for results", "action": "wait:2" },
+  { "description": "Capture search results", "action": "screenshot" }
+]
 ## YOUR TASK
 Now parse this request: "${input}"
@@ -349,6 +426,225 @@ async function executeStep(step: TaskStep): Promise<void> {
       step.result = `Focused window: ${params}`;
       break;
+    case 'open_folder':
+      // Open VS Code with a specific folder
+      await runCommand(`code "${params}"`, 10000);
+      step.result = `Opened VS Code with folder: ${params}`;
+      break;
+    case 'read_file': {
+      const readResult = await filesystem.readFile(params);
+      if (readResult.success) {
+        step.result = readResult.output;
+      } else {
+        throw new Error(readResult.error || 'Failed to read file');
+      }
+      break;
+    }
+    case 'write_file': {
+      // Format: write_file:path|content
+      const [filePath, ...contentParts] = params.split('|');
+      const content = contentParts.join('|');
+      const writeResult = await filesystem.writeFile(filePath, content);
+      if (writeResult.success) {
+        step.result = `Written to ${filePath}`;
+      } else {
+        throw new Error(writeResult.error || 'Failed to write file');
+      }
+      break;
+    }
+    case 'list_files': {
+      const listResult = await filesystem.listDir(params, false);
+      if (listResult.success) {
+        step.result = listResult.output;
+      } else {
+        throw new Error(listResult.error || 'Failed to list files');
+      }
+      break;
+    }
+    case 'generate_code': {
+      // Format: generate_code:path|description
+      const [codePath, ...descParts] = params.split('|');
+      const codeDescription = descParts.join('|');
+      // Ask AI to generate the code
+      const codePrompt = `Generate complete, working code for this request. Output ONLY the code, no explanations or markdown:
+Request: ${codeDescription}
+File: ${codePath}`;
+      const codeResponse = await chat([{ role: 'user', content: codePrompt }]);
+      let generatedCode = codeResponse.content;
+      // Strip markdown code blocks if present
+      generatedCode = generatedCode.replace(/^```[\w]*\n?/gm, '').replace(/\n?```$/gm, '').trim();
+      // Write the generated code to file
+      const genResult = await filesystem.writeFile(codePath, generatedCode);
+      if (genResult.success) {
+        step.result = `Generated and saved code to ${codePath}`;
+      } else {
+        throw new Error(genResult.error || 'Failed to write generated code');
+      }
+      break;
+    }
+    case 'edit_code': {
+      // Format: edit_code:path|instructions
+      const [editPath, ...instrParts] = params.split('|');
+      const instructions = instrParts.join('|');
+      // Read existing file
+      const existingResult = await filesystem.readFile(editPath);
+      if (!existingResult.success) {
+        throw new Error(`Cannot read file: ${existingResult.error}`);
+      }
+      // Ask AI to edit the code
+      const editPrompt = `Edit this code according to the instructions. Output ONLY the complete modified code, no explanations or markdown:
+Instructions: ${instructions}
+Current code:
+${existingResult.output}`;
+      const editResponse = await chat([{ role: 'user', content: editPrompt }]);
+      let editedCode = editResponse.content;
+      // Strip markdown code blocks if present
+      editedCode = editedCode.replace(/^```[\w]*\n?/gm, '').replace(/\n?```$/gm, '').trim();
+      // Write the edited code back
+      const editWriteResult = await filesystem.writeFile(editPath, editedCode);
+      if (editWriteResult.success) {
+        step.result = `Edited and saved ${editPath}`;
+      } else {
+        throw new Error(editWriteResult.error || 'Failed to write edited code');
+      }
+      break;
+    }
+    case 'shell': {
+      const shellResult = await runCommand(params, 30000);
+      if (shellResult.success) {
+        step.result = shellResult.output || 'Command completed';
+      } else {
+        throw new Error(shellResult.error || 'Command failed');
+      }
+      break;
+    }
+    case 'open_url': {
+      // Open URL in default browser
+      const url = params.startsWith('http') ? params : `https://${params}`;
+      if (process.platform === 'win32') {
+        await runCommand(`start "" "${url}"`, 5000);
+      } else if (process.platform === 'darwin') {
+        await runCommand(`open "${url}"`, 5000);
+      } else {
+        await runCommand(`xdg-open "${url}"`, 5000);
+      }
+      step.result = `Opened ${url} in browser`;
+      break;
+    }
+    case 'browse_and_ask': {
+      // Format: browse_and_ask:site|question
+      const [site, ...questionParts] = params.split('|');
+      const question = questionParts.join('|');
+      // Site-specific URLs and response wait times
+      const sites: Record<string, { url: string; loadTime: number; responseTime: number }> = {
+        perplexity: { url: 'https://www.perplexity.ai', loadTime: 3, responseTime: 10 },
+        chatgpt: { url: 'https://chat.openai.com', loadTime: 4, responseTime: 15 },
+        claude: { url: 'https://claude.ai', loadTime: 4, responseTime: 15 },
+        google: { url: 'https://www.google.com', loadTime: 2, responseTime: 3 },
+        bing: { url: 'https://www.bing.com', loadTime: 2, responseTime: 3 },
+        bard: { url: 'https://bard.google.com', loadTime: 3, responseTime: 12 },
+        copilot: { url: 'https://copilot.microsoft.com', loadTime: 3, responseTime: 12 },
+      };
+      const siteConfig = sites[site.toLowerCase()] || { url: `https://${site}`, loadTime: 3, responseTime: 10 };
+      // Open the site
+      if (process.platform === 'win32') {
+        await runCommand(`start "" "${siteConfig.url}"`, 5000);
+      } else if (process.platform === 'darwin') {
+        await runCommand(`open "${siteConfig.url}"`, 5000);
+      } else {
+        await runCommand(`xdg-open "${siteConfig.url}"`, 5000);
+      }
+      // Wait for page to load
+      await sleep(siteConfig.loadTime * 1000);
+      // Type the question (most sites have autofocus on search/input)
+      await computer.typeText(question);
+      await sleep(300);
+      // Press Enter to submit
+      await computer.pressKey('Return');
+      // Wait for AI to generate response
+      await sleep(siteConfig.responseTime * 1000);
+      // Capture multiple screenshots by scrolling to get full response
+      const extractedParts: string[] = [];
+      const maxScrolls = 5; // Maximum number of scroll captures
+      for (let scrollIndex = 0; scrollIndex < maxScrolls; scrollIndex++) {
+        // Capture current view
+        const screenResult = await describeScreen();
+        // Ask AI to extract just the response text from what it sees
+        const extractPrompt = `You are looking at screenshot ${scrollIndex + 1} of ${site}. The user asked: "${question}"
+Extract ONLY the AI's response/answer text visible on screen. Do NOT include:
+- The user's question
+- Any UI elements, buttons, navigation, or headers
+- Any disclaimers, suggestions, or "related questions"
+- Any "Sources" or citation links
+- Any text you already extracted (avoid duplicates)
+${scrollIndex > 0 ? `Previous parts already extracted:\n${extractedParts.join('\n---\n')}\n\nOnly extract NEW text that continues from where we left off.` : ''}
+Just give me the actual answer text, word for word as it appears. If there's no more response text visible, respond with exactly: "END_OF_RESPONSE"`;
+        const extractResponse = await chat([{ role: 'user', content: extractPrompt }]);
+        const extracted = extractResponse.content.trim();
+        // Check if we've reached the end
+        if (extracted === 'END_OF_RESPONSE' || extracted.includes('END_OF_RESPONSE')) {
+          break;
+        }
+        // Check for "no response" indicators
+        if (extracted.toLowerCase().includes('response not ready') ||
+            extracted.toLowerCase().includes('no response visible') ||
+            extracted.toLowerCase().includes('no additional text')) {
+          if (scrollIndex === 0) {
+            extractedParts.push('Response not ready yet or page still loading.');
+          }
+          break;
+        }
+        extractedParts.push(extracted);
+        // Scroll down to see more content
+        await computer.scrollMouse(-5); // Scroll down
+        await sleep(1000); // Wait for scroll animation
+      }
+      // Combine all extracted parts
+      const fullResponse = extractedParts.join('\n\n');
+      step.result = `📝 ${site.charAt(0).toUpperCase() + site.slice(1)} says:\n\n${fullResponse}`;
+      break;
+    }
     case 'screenshot':
       const vision = await describeScreen();
       step.result = vision.description;

package/src/services/telegram.ts CHANGED Viewed

@@ -23,6 +23,101 @@ export interface TelegramBotEvents {
   stopped: () => void;
 }
+/**
+ * Convert markdown to Telegram-safe format (MarkdownV2)
+ * Escapes special characters and converts some markdown syntax
+ */
+function formatForTelegram(text: string): { text: string; parseMode: 'MarkdownV2' | undefined } {
+  // Check if text has markdown that could be rendered
+  const hasMarkdown = /[*_`\[\]()]/.test(text);
+  if (!hasMarkdown) {
+    return { text, parseMode: undefined };
+  }
+  try {
+    // Convert to Telegram MarkdownV2 format
+    let formatted = text;
+    // First, escape special characters that aren't part of markdown
+    // MarkdownV2 requires escaping: _ * [ ] ( ) ~ ` > # + - = | { } . !
+    const escapeChars = ['\\', '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!'];
+    // Temporarily replace valid markdown with placeholders
+    const placeholders: { placeholder: string; original: string }[] = [];
+    let placeholderIndex = 0;
+    // Protect code blocks (```code```)
+    formatted = formatted.replace(/```([\s\S]*?)```/g, (match, code) => {
+      const placeholder = `__CODEBLOCK_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: '```' + code.replace(/\\/g, '\\\\') + '```' });
+      return placeholder;
+    });
+    // Protect inline code (`code`)
+    formatted = formatted.replace(/`([^`]+)`/g, (match, code) => {
+      const placeholder = `__INLINECODE_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: '`' + code.replace(/\\/g, '\\\\') + '`' });
+      return placeholder;
+    });
+    // Protect bold (**text** or __text__)
+    formatted = formatted.replace(/\*\*(.+?)\*\*/g, (match, text) => {
+      const placeholder = `__BOLD_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: '*' + text + '*' });
+      return placeholder;
+    });
+    // Protect italic (*text* or _text_) - but only single asterisks
+    formatted = formatted.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, (match, text) => {
+      const placeholder = `__ITALIC_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: '_' + text + '_' });
+      return placeholder;
+    });
+    // Protect links [text](url)
+    formatted = formatted.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, url) => {
+      const placeholder = `__LINK_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: '[' + text + '](' + url + ')' });
+      return placeholder;
+    });
+    // Now escape remaining special characters
+    for (const char of escapeChars) {
+      if (char === '\\') continue; // Skip backslash for now
+      formatted = formatted.split(char).join('\\' + char);
+    }
+    // Restore placeholders
+    for (const { placeholder, original } of placeholders) {
+      formatted = formatted.replace(placeholder, original);
+    }
+    return { text: formatted, parseMode: 'MarkdownV2' };
+  } catch {
+    // If formatting fails, return plain text
+    return { text, parseMode: undefined };
+  }
+}
+/**
+ * Send a message with proper formatting, falling back to plain text if markdown fails
+ */
+async function sendFormattedMessage(ctx: any, text: string): Promise<void> {
+  const { text: formatted, parseMode } = formatForTelegram(text);
+  try {
+    if (parseMode) {
+      await ctx.reply(formatted, { parse_mode: parseMode });
+    } else {
+      await ctx.reply(text);
+    }
+  } catch {
+    // If markdown parsing fails, send as plain text
+    await ctx.reply(text);
+  }
+}
 export class TelegramBotService extends EventEmitter {
   private bot: any = null;
   private isRunning = false;
@@ -252,7 +347,7 @@ export class TelegramBotService extends EventEmitter {
         // Check if this looks like a computer control request
         const computerControlResult = await this.tryComputerControl(userText);
         if (computerControlResult) {
-          await ctx.reply(computerControlResult);
+          await sendFormattedMessage(ctx, computerControlResult);
           history.push({ role: 'assistant', content: computerControlResult });
           return;
         }
@@ -276,16 +371,16 @@ export class TelegramBotService extends EventEmitter {
         // Add assistant response to history
         history.push({ role: 'assistant', content: response.content });
-        // Send response (split if too long for Telegram)
+        // Send response with proper formatting (split if too long for Telegram)
         const responseText = response.content || '(no response)';
         if (responseText.length > 4000) {
           // Split into chunks
           const chunks = responseText.match(/.{1,4000}/gs) || [responseText];
           for (const chunk of chunks) {
-            await ctx.reply(chunk);
+            await sendFormattedMessage(ctx, chunk);
           }
         } else {
-          await ctx.reply(responseText);
+          await sendFormattedMessage(ctx, responseText);
         }
       } catch (error) {
         const errorMsg = error instanceof Error ? error.message : 'Unknown error';