npm - @projectservan8n/cnapse - Versions diffs - 0.6.0 → 0.6.2 - Mend

@projectservan8n/cnapse 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js +710 -16
package/package.json +1 -1
package/src/components/App.tsx +20 -1
package/src/lib/tasks.ts +243 -1
package/src/services/telegram.ts +218 -3

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@projectservan8n/cnapse",
-  "version": "0.6.0",
+  "version": "0.6.2",
   "description": "Autonomous PC intelligence - AI assistant for desktop automation",
   "type": "module",
   "main": "dist/index.js",

package/src/components/App.tsx CHANGED Viewed

@@ -202,6 +202,21 @@ export function App() {
     }
   }, [chat, tasks]);
+  // Check if message looks like a computer control request
+  const isComputerControlRequest = useCallback((text: string): boolean => {
+    const lower = text.toLowerCase();
+    const patterns = [
+      /^(can you |please |)?(open|close|minimize|maximize|restore|focus|click|type|press|scroll|move|drag)/i,
+      /^(can you |please |)?move (the |my |)mouse/i,
+      /^(can you |please |)?(start|launch|run) [a-z]/i,
+      /(open|close|minimize|maximize) (the |my |)?[a-z]/i,
+      /click (on |the |)/i,
+      /type ["'].+["']/i,
+      /press (enter|escape|tab|ctrl|alt|shift|space|backspace|delete|f\d+)/i,
+    ];
+    return patterns.some(p => p.test(lower));
+  }, []);
   // Submit handler
   const handleSubmit = useCallback(async (value: string) => {
     if (!value.trim()) return;
@@ -209,12 +224,16 @@ export function App() {
     if (value.startsWith('/')) {
       await handleCommand(value);
+    } else if (isComputerControlRequest(value)) {
+      // Auto-route to task system for computer control
+      chat.addSystemMessage(`🤖 Executing: ${value}`);
+      await handleTaskCommand(value);
     } else {
       setStatus('Thinking...');
       await chat.sendMessage(value);
       setStatus('Ready');
     }
-  }, [chat, handleCommand]);
+  }, [chat, handleCommand, handleTaskCommand, isComputerControlRequest]);
   // Provider selection callback
   const handleProviderSelect = useCallback((provider: string, model: string) => {

package/src/lib/tasks.ts CHANGED Viewed

@@ -7,6 +7,8 @@
 import { chat, Message } from './api.js';
 import * as computer from '../tools/computer.js';
 import { describeScreen } from './vision.js';
+import * as filesystem from '../tools/filesystem.js';
+import { runCommand } from '../tools/shell.js';
 import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
@@ -185,14 +187,36 @@ Before outputting steps, THINK through these questions:
    - Typing too fast -> add small waits
 ## AVAILABLE ACTIONS
+### App Control
 - open_app: Open app via Run dialog (e.g., "open_app:notepad", "open_app:code", "open_app:chrome")
+- open_folder: Open VS Code with folder (e.g., "open_folder:E:/MyProject")
+- focus_window: Focus by title (e.g., "focus_window:Notepad")
+### Input
 - type_text: Type text string (e.g., "type_text:Hello World")
 - press_key: Single key (e.g., "press_key:enter", "press_key:escape", "press_key:tab")
 - key_combo: Key combination (e.g., "key_combo:control+s", "key_combo:alt+f4", "key_combo:meta+r")
 - click: Mouse click (e.g., "click:left", "click:right")
+### File Operations
+- read_file: Read file contents (e.g., "read_file:E:/test/index.html")
+- write_file: Write content to file (e.g., "write_file:E:/test/output.txt|Hello World")
+- list_files: List files in directory (e.g., "list_files:E:/test")
+### AI Coding
+- generate_code: AI generates code based on description (e.g., "generate_code:E:/test/index.html|create an HTML page with input on left, output on right")
+- edit_code: AI modifies existing code (e.g., "edit_code:E:/test/app.js|add error handling to the fetch calls")
+### Web Browsing
+- open_url: Open URL in default browser (e.g., "open_url:https://perplexity.ai")
+- browse_and_ask: Open AI website, type question, wait for response (e.g., "browse_and_ask:perplexity|What is the capital of France?")
+- browse_and_ask: Supports: perplexity, chatgpt, claude, google
+### Utility
 - wait: Wait N seconds (e.g., "wait:2" - use 1-3s for app loads)
-- focus_window: Focus by title (e.g., "focus_window:Notepad")
 - screenshot: Capture and describe screen
+- shell: Run shell command (e.g., "shell:npm install")
 ${learnedExamples}
 ## EXAMPLES WITH REASONING
@@ -235,6 +259,59 @@ Output:
   { "description": "Close active window with Alt+F4", "action": "key_combo:alt+f4" }
 ]
+### Example 4: "open folder E:/Test in vscode and create an HTML editor"
+Thinking:
+- Goal: Open VS Code with folder, then create/edit HTML file to be an editor
+- How: Use open_folder to launch VS Code with the folder, then use AI to generate code
+- Sequence: Open folder -> List files to see what exists -> Generate/edit the HTML
+- Edge case: File might not exist yet
+Output:
+[
+  { "description": "Open VS Code with the Test folder", "action": "open_folder:E:/Test" },
+  { "description": "Wait for VS Code to load", "action": "wait:3" },
+  { "description": "List files in the folder", "action": "list_files:E:/Test" },
+  { "description": "Generate HTML editor code", "action": "generate_code:E:/Test/editor.html|Create an HTML page with a code editor layout: textarea input on the left side, live preview output on the right side. Include basic CSS for split layout and JavaScript to update preview on input." }
+]
+### Example 5: "read the config.json and add a new setting"
+Thinking:
+- Goal: Read existing file, understand it, modify it
+- How: read_file to get contents, then edit_code to modify
+- Sequence: Read first, then edit
+Output:
+[
+  { "description": "Read the config file", "action": "read_file:config.json" },
+  { "description": "Add new setting to config", "action": "edit_code:config.json|add a new setting called 'darkMode' with value true" }
+]
+### Example 6: "ask perplexity what is the best programming language"
+Thinking:
+- Goal: Open Perplexity AI in browser and ask a question
+- How: Use browse_and_ask with perplexity target
+- Sequence: Open site -> type question -> wait for response -> screenshot result
+Output:
+[
+  { "description": "Ask Perplexity the question", "action": "browse_and_ask:perplexity|what is the best programming language" },
+  { "description": "Wait for response to generate", "action": "wait:5" },
+  { "description": "Capture the response", "action": "screenshot" }
+]
+### Example 7: "search google for weather today"
+Thinking:
+- Goal: Open Google and search for something
+- How: Use browse_and_ask with google target
+- Sequence: Open Google, search, capture results
+Output:
+[
+  { "description": "Search Google", "action": "browse_and_ask:google|weather today" },
+  { "description": "Wait for results", "action": "wait:2" },
+  { "description": "Capture search results", "action": "screenshot" }
+]
 ## YOUR TASK
 Now parse this request: "${input}"
@@ -349,6 +426,171 @@ async function executeStep(step: TaskStep): Promise<void> {
       step.result = `Focused window: ${params}`;
       break;
+    case 'open_folder':
+      // Open VS Code with a specific folder
+      await runCommand(`code "${params}"`, 10000);
+      step.result = `Opened VS Code with folder: ${params}`;
+      break;
+    case 'read_file': {
+      const readResult = await filesystem.readFile(params);
+      if (readResult.success) {
+        step.result = readResult.output;
+      } else {
+        throw new Error(readResult.error || 'Failed to read file');
+      }
+      break;
+    }
+    case 'write_file': {
+      // Format: write_file:path|content
+      const [filePath, ...contentParts] = params.split('|');
+      const content = contentParts.join('|');
+      const writeResult = await filesystem.writeFile(filePath, content);
+      if (writeResult.success) {
+        step.result = `Written to ${filePath}`;
+      } else {
+        throw new Error(writeResult.error || 'Failed to write file');
+      }
+      break;
+    }
+    case 'list_files': {
+      const listResult = await filesystem.listDir(params, false);
+      if (listResult.success) {
+        step.result = listResult.output;
+      } else {
+        throw new Error(listResult.error || 'Failed to list files');
+      }
+      break;
+    }
+    case 'generate_code': {
+      // Format: generate_code:path|description
+      const [codePath, ...descParts] = params.split('|');
+      const codeDescription = descParts.join('|');
+      // Ask AI to generate the code
+      const codePrompt = `Generate complete, working code for this request. Output ONLY the code, no explanations or markdown:
+Request: ${codeDescription}
+File: ${codePath}`;
+      const codeResponse = await chat([{ role: 'user', content: codePrompt }]);
+      let generatedCode = codeResponse.content;
+      // Strip markdown code blocks if present
+      generatedCode = generatedCode.replace(/^```[\w]*\n?/gm, '').replace(/\n?```$/gm, '').trim();
+      // Write the generated code to file
+      const genResult = await filesystem.writeFile(codePath, generatedCode);
+      if (genResult.success) {
+        step.result = `Generated and saved code to ${codePath}`;
+      } else {
+        throw new Error(genResult.error || 'Failed to write generated code');
+      }
+      break;
+    }
+    case 'edit_code': {
+      // Format: edit_code:path|instructions
+      const [editPath, ...instrParts] = params.split('|');
+      const instructions = instrParts.join('|');
+      // Read existing file
+      const existingResult = await filesystem.readFile(editPath);
+      if (!existingResult.success) {
+        throw new Error(`Cannot read file: ${existingResult.error}`);
+      }
+      // Ask AI to edit the code
+      const editPrompt = `Edit this code according to the instructions. Output ONLY the complete modified code, no explanations or markdown:
+Instructions: ${instructions}
+Current code:
+${existingResult.output}`;
+      const editResponse = await chat([{ role: 'user', content: editPrompt }]);
+      let editedCode = editResponse.content;
+      // Strip markdown code blocks if present
+      editedCode = editedCode.replace(/^```[\w]*\n?/gm, '').replace(/\n?```$/gm, '').trim();
+      // Write the edited code back
+      const editWriteResult = await filesystem.writeFile(editPath, editedCode);
+      if (editWriteResult.success) {
+        step.result = `Edited and saved ${editPath}`;
+      } else {
+        throw new Error(editWriteResult.error || 'Failed to write edited code');
+      }
+      break;
+    }
+    case 'shell': {
+      const shellResult = await runCommand(params, 30000);
+      if (shellResult.success) {
+        step.result = shellResult.output || 'Command completed';
+      } else {
+        throw new Error(shellResult.error || 'Command failed');
+      }
+      break;
+    }
+    case 'open_url': {
+      // Open URL in default browser
+      const url = params.startsWith('http') ? params : `https://${params}`;
+      if (process.platform === 'win32') {
+        await runCommand(`start "" "${url}"`, 5000);
+      } else if (process.platform === 'darwin') {
+        await runCommand(`open "${url}"`, 5000);
+      } else {
+        await runCommand(`xdg-open "${url}"`, 5000);
+      }
+      step.result = `Opened ${url} in browser`;
+      break;
+    }
+    case 'browse_and_ask': {
+      // Format: browse_and_ask:site|question
+      const [site, ...questionParts] = params.split('|');
+      const question = questionParts.join('|');
+      // Site-specific URLs and input selectors
+      const sites: Record<string, { url: string; waitTime: number; searchSelector?: string }> = {
+        perplexity: { url: 'https://www.perplexity.ai', waitTime: 3 },
+        chatgpt: { url: 'https://chat.openai.com', waitTime: 4 },
+        claude: { url: 'https://claude.ai', waitTime: 4 },
+        google: { url: 'https://www.google.com', waitTime: 2 },
+        bing: { url: 'https://www.bing.com', waitTime: 2 },
+      };
+      const siteConfig = sites[site.toLowerCase()] || { url: `https://${site}`, waitTime: 3 };
+      // Open the site
+      if (process.platform === 'win32') {
+        await runCommand(`start "" "${siteConfig.url}"`, 5000);
+      } else if (process.platform === 'darwin') {
+        await runCommand(`open "${siteConfig.url}"`, 5000);
+      } else {
+        await runCommand(`xdg-open "${siteConfig.url}"`, 5000);
+      }
+      // Wait for page to load
+      await sleep(siteConfig.waitTime * 1000);
+      // Type the question (most sites have autofocus on search/input)
+      await computer.typeText(question);
+      await sleep(300);
+      // Press Enter to submit
+      await computer.pressKey('Return');
+      step.result = `Asked ${site}: "${question}"`;
+      break;
+    }
     case 'screenshot':
       const vision = await describeScreen();
       step.result = vision.description;

package/src/services/telegram.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { getConfig, getApiKey } from '../lib/config.js';
 import { describeScreen, captureScreenshot } from '../lib/vision.js';
 import { runCommand } from '../tools/shell.js';
 import { chat as chatWithAI, chatWithVision, Message } from '../lib/api.js';
+import * as computer from '../tools/computer.js';
 export interface TelegramMessage {
   chatId: number;
@@ -22,6 +23,101 @@ export interface TelegramBotEvents {
   stopped: () => void;
 }
+/**
+ * Convert markdown to Telegram-safe format (MarkdownV2)
+ * Escapes special characters and converts some markdown syntax
+ */
+function formatForTelegram(text: string): { text: string; parseMode: 'MarkdownV2' | undefined } {
+  // Check if text has markdown that could be rendered
+  const hasMarkdown = /[*_`\[\]()]/.test(text);
+  if (!hasMarkdown) {
+    return { text, parseMode: undefined };
+  }
+  try {
+    // Convert to Telegram MarkdownV2 format
+    let formatted = text;
+    // First, escape special characters that aren't part of markdown
+    // MarkdownV2 requires escaping: _ * [ ] ( ) ~ ` > # + - = | { } . !
+    const escapeChars = ['\\', '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!'];
+    // Temporarily replace valid markdown with placeholders
+    const placeholders: { placeholder: string; original: string }[] = [];
+    let placeholderIndex = 0;
+    // Protect code blocks (```code```)
+    formatted = formatted.replace(/```([\s\S]*?)```/g, (match, code) => {
+      const placeholder = `__CODEBLOCK_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: '```' + code.replace(/\\/g, '\\\\') + '```' });
+      return placeholder;
+    });
+    // Protect inline code (`code`)
+    formatted = formatted.replace(/`([^`]+)`/g, (match, code) => {
+      const placeholder = `__INLINECODE_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: '`' + code.replace(/\\/g, '\\\\') + '`' });
+      return placeholder;
+    });
+    // Protect bold (**text** or __text__)
+    formatted = formatted.replace(/\*\*(.+?)\*\*/g, (match, text) => {
+      const placeholder = `__BOLD_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: '*' + text + '*' });
+      return placeholder;
+    });
+    // Protect italic (*text* or _text_) - but only single asterisks
+    formatted = formatted.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, (match, text) => {
+      const placeholder = `__ITALIC_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: '_' + text + '_' });
+      return placeholder;
+    });
+    // Protect links [text](url)
+    formatted = formatted.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text, url) => {
+      const placeholder = `__LINK_${placeholderIndex++}__`;
+      placeholders.push({ placeholder, original: '[' + text + '](' + url + ')' });
+      return placeholder;
+    });
+    // Now escape remaining special characters
+    for (const char of escapeChars) {
+      if (char === '\\') continue; // Skip backslash for now
+      formatted = formatted.split(char).join('\\' + char);
+    }
+    // Restore placeholders
+    for (const { placeholder, original } of placeholders) {
+      formatted = formatted.replace(placeholder, original);
+    }
+    return { text: formatted, parseMode: 'MarkdownV2' };
+  } catch {
+    // If formatting fails, return plain text
+    return { text, parseMode: undefined };
+  }
+}
+/**
+ * Send a message with proper formatting, falling back to plain text if markdown fails
+ */
+async function sendFormattedMessage(ctx: any, text: string): Promise<void> {
+  const { text: formatted, parseMode } = formatForTelegram(text);
+  try {
+    if (parseMode) {
+      await ctx.reply(formatted, { parse_mode: parseMode });
+    } else {
+      await ctx.reply(text);
+    }
+  } catch {
+    // If markdown parsing fails, send as plain text
+    await ctx.reply(text);
+  }
+}
 export class TelegramBotService extends EventEmitter {
   private bot: any = null;
   private isRunning = false;
@@ -248,6 +344,14 @@ export class TelegramBotService extends EventEmitter {
         // Send typing indicator
         await ctx.sendChatAction('typing');
+        // Check if this looks like a computer control request
+        const computerControlResult = await this.tryComputerControl(userText);
+        if (computerControlResult) {
+          await sendFormattedMessage(ctx, computerControlResult);
+          history.push({ role: 'assistant', content: computerControlResult });
+          return;
+        }
         // Check if this looks like a screen/vision request
         const isVisionRequest = /screen|see|look|what('?s| is) (on|visible)|show me|screenshot/i.test(userText);
@@ -267,16 +371,16 @@ export class TelegramBotService extends EventEmitter {
         // Add assistant response to history
         history.push({ role: 'assistant', content: response.content });
-        // Send response (split if too long for Telegram)
+        // Send response with proper formatting (split if too long for Telegram)
         const responseText = response.content || '(no response)';
         if (responseText.length > 4000) {
           // Split into chunks
           const chunks = responseText.match(/.{1,4000}/gs) || [responseText];
           for (const chunk of chunks) {
-            await ctx.reply(chunk);
+            await sendFormattedMessage(ctx, chunk);
           }
         } else {
-          await ctx.reply(responseText);
+          await sendFormattedMessage(ctx, responseText);
         }
       } catch (error) {
         const errorMsg = error instanceof Error ? error.message : 'Unknown error';
@@ -302,6 +406,117 @@ export class TelegramBotService extends EventEmitter {
     return this.allowedChatIds.has(chatId);
   }
+  /**
+   * Try to execute computer control commands directly
+   * Returns response string if handled, null if not a computer command
+   */
+  private async tryComputerControl(text: string): Promise<string | null> {
+    const lower = text.toLowerCase();
+    // Minimize window
+    let match = lower.match(/minimize\s+(?:the\s+)?(.+)/i);
+    if (match) {
+      const result = await computer.minimizeWindow(match[1].trim());
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Maximize window
+    match = lower.match(/maximize\s+(?:the\s+)?(.+)/i);
+    if (match) {
+      const result = await computer.maximizeWindow(match[1].trim());
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Close window
+    match = lower.match(/close\s+(?:the\s+)?(.+)/i);
+    if (match) {
+      const result = await computer.closeWindow(match[1].trim());
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Restore window
+    match = lower.match(/restore\s+(?:the\s+)?(.+)/i);
+    if (match) {
+      const result = await computer.restoreWindow(match[1].trim());
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Focus/open window
+    match = lower.match(/(?:focus|open|switch to)\s+(?:the\s+)?(.+)/i);
+    if (match) {
+      const result = await computer.focusWindow(match[1].trim());
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Type text
+    match = text.match(/type\s+["'](.+)["']/i);
+    if (match) {
+      const result = await computer.typeText(match[1]);
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Press key
+    match = lower.match(/press\s+(?:the\s+)?(\w+)/i);
+    if (match) {
+      const result = await computer.pressKey(match[1]);
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Click
+    if (/^click$/i.test(lower) || /click\s+(?:the\s+)?mouse/i.test(lower)) {
+      const result = await computer.clickMouse('left');
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Right click
+    if (/right\s*click/i.test(lower)) {
+      const result = await computer.clickMouse('right');
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Double click
+    if (/double\s*click/i.test(lower)) {
+      const result = await computer.doubleClick();
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Move mouse to coordinates
+    match = lower.match(/move\s+(?:the\s+)?mouse\s+(?:to\s+)?(\d+)[,\s]+(\d+)/i);
+    if (match) {
+      const result = await computer.moveMouse(parseInt(match[1]), parseInt(match[2]));
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Scroll
+    match = lower.match(/scroll\s+(up|down)(?:\s+(\d+))?/i);
+    if (match) {
+      const amount = match[1] === 'up' ? (parseInt(match[2]) || 3) : -(parseInt(match[2]) || 3);
+      const result = await computer.scrollMouse(amount);
+      return result.success ? `✅ ${result.output}` : `❌ ${result.error}`;
+    }
+    // List windows
+    if (/list\s+(?:all\s+)?windows/i.test(lower) || /what\s+windows/i.test(lower)) {
+      const result = await computer.listWindows();
+      return result.success ? `📋 Open Windows:\n${result.output}` : `❌ ${result.error}`;
+    }
+    // Get active window
+    if (/(?:active|current|focused)\s+window/i.test(lower) || /what\s+(?:window|app)/i.test(lower)) {
+      const result = await computer.getActiveWindow();
+      return result.success ? `🪟 Active: ${result.output}` : `❌ ${result.error}`;
+    }
+    // Mouse position
+    if (/mouse\s+position/i.test(lower) || /where.*mouse/i.test(lower)) {
+      const result = await computer.getMousePosition();
+      return result.success ? `🖱️ ${result.output}` : `❌ ${result.error}`;
+    }
+    // Not a computer control command
+    return null;
+  }
   /**
    * Send a message to a specific chat
    */