@projectservan8n/cnapse 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -586,10 +586,10 @@ async function captureScreenFallback() {
586
586
  const { exec: exec5 } = await import("child_process");
587
587
  const { promisify: promisify5 } = await import("util");
588
588
  const { tmpdir } = await import("os");
589
- const { join: join3 } = await import("path");
589
+ const { join: join4 } = await import("path");
590
590
  const { readFile: readFile2, unlink } = await import("fs/promises");
591
591
  const execAsync5 = promisify5(exec5);
592
- const tempFile = join3(tmpdir(), `cnapse-screen-${Date.now()}.png`);
592
+ const tempFile = join4(tmpdir(), `cnapse-screen-${Date.now()}.png`);
593
593
  try {
594
594
  const platform = process.platform;
595
595
  if (platform === "win32") {
@@ -902,29 +902,29 @@ import { promisify as promisify4 } from "util";
902
902
  // src/tools/filesystem.ts
903
903
  import { promises as fs } from "fs";
904
904
  import { join, dirname } from "path";
905
- async function readFile(path2) {
905
+ async function readFile(path3) {
906
906
  try {
907
- const content = await fs.readFile(path2, "utf-8");
907
+ const content = await fs.readFile(path3, "utf-8");
908
908
  return ok(content);
909
909
  } catch (error) {
910
910
  return err(`Failed to read file: ${error.message}`);
911
911
  }
912
912
  }
913
- async function writeFile(path2, content) {
913
+ async function writeFile(path3, content) {
914
914
  try {
915
- const dir = dirname(path2);
915
+ const dir = dirname(path3);
916
916
  await fs.mkdir(dir, { recursive: true });
917
- await fs.writeFile(path2, content, "utf-8");
918
- return ok(`Written ${content.length} bytes to ${path2}`);
917
+ await fs.writeFile(path3, content, "utf-8");
918
+ return ok(`Written ${content.length} bytes to ${path3}`);
919
919
  } catch (error) {
920
920
  return err(`Failed to write file: ${error.message}`);
921
921
  }
922
922
  }
923
- async function listDir(path2, recursive = false) {
923
+ async function listDir(path3, recursive = false) {
924
924
  try {
925
- const stat = await fs.stat(path2);
925
+ const stat = await fs.stat(path3);
926
926
  if (!stat.isDirectory()) {
927
- return err(`Not a directory: ${path2}`);
927
+ return err(`Not a directory: ${path3}`);
928
928
  }
929
929
  const entries = [];
930
930
  async function walkDir(dir, prefix) {
@@ -941,7 +941,7 @@ async function listDir(path2, recursive = false) {
941
941
  }
942
942
  }
943
943
  }
944
- await walkDir(path2, "");
944
+ await walkDir(path3, "");
945
945
  entries.sort();
946
946
  return ok(entries.join("\n"));
947
947
  } catch (error) {
@@ -1458,648 +1458,235 @@ ${stderr}`
1458
1458
  }
1459
1459
  }
1460
1460
 
1461
- // src/services/telegram.ts
1462
- function formatForTelegram(text) {
1463
- const hasMarkdown = /[*_`\[\]()]/.test(text);
1464
- if (!hasMarkdown) {
1465
- return { text, parseMode: void 0 };
1466
- }
1467
- try {
1468
- let formatted = text;
1469
- const escapeChars = ["\\", "_", "*", "[", "]", "(", ")", "~", "`", ">", "#", "+", "-", "=", "|", "{", "}", ".", "!"];
1470
- const placeholders = [];
1471
- let placeholderIndex = 0;
1472
- formatted = formatted.replace(/```([\s\S]*?)```/g, (match, code) => {
1473
- const placeholder = `__CODEBLOCK_${placeholderIndex++}__`;
1474
- placeholders.push({ placeholder, original: "```" + code.replace(/\\/g, "\\\\") + "```" });
1475
- return placeholder;
1476
- });
1477
- formatted = formatted.replace(/`([^`]+)`/g, (match, code) => {
1478
- const placeholder = `__INLINECODE_${placeholderIndex++}__`;
1479
- placeholders.push({ placeholder, original: "`" + code.replace(/\\/g, "\\\\") + "`" });
1480
- return placeholder;
1481
- });
1482
- formatted = formatted.replace(/\*\*(.+?)\*\*/g, (match, text2) => {
1483
- const placeholder = `__BOLD_${placeholderIndex++}__`;
1484
- placeholders.push({ placeholder, original: "*" + text2 + "*" });
1485
- return placeholder;
1486
- });
1487
- formatted = formatted.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, (match, text2) => {
1488
- const placeholder = `__ITALIC_${placeholderIndex++}__`;
1489
- placeholders.push({ placeholder, original: "_" + text2 + "_" });
1490
- return placeholder;
1491
- });
1492
- formatted = formatted.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text2, url) => {
1493
- const placeholder = `__LINK_${placeholderIndex++}__`;
1494
- placeholders.push({ placeholder, original: "[" + text2 + "](" + url + ")" });
1495
- return placeholder;
1496
- });
1497
- for (const char of escapeChars) {
1498
- if (char === "\\") continue;
1499
- formatted = formatted.split(char).join("\\" + char);
1461
+ // src/services/browser.ts
1462
+ import { chromium } from "playwright";
1463
+ import * as path from "path";
1464
+ import * as os2 from "os";
1465
+ import * as fs2 from "fs";
1466
+ var context = null;
1467
+ var activePage = null;
1468
+ var defaultConfig = {
1469
+ headless: false,
1470
+ // Show browser so user can see what's happening
1471
+ slowMo: 50,
1472
+ // Slight delay for visibility
1473
+ viewport: { width: 1280, height: 800 },
1474
+ useSystemBrowser: true
1475
+ // Default to using system Chrome
1476
+ };
1477
+ function findSystemBrowser() {
1478
+ const possiblePaths = [
1479
+ // Chrome paths
1480
+ path.join(process.env["PROGRAMFILES"] || "", "Google", "Chrome", "Application", "chrome.exe"),
1481
+ path.join(process.env["PROGRAMFILES(X86)"] || "", "Google", "Chrome", "Application", "chrome.exe"),
1482
+ path.join(process.env["LOCALAPPDATA"] || "", "Google", "Chrome", "Application", "chrome.exe"),
1483
+ // Edge paths (fallback)
1484
+ path.join(process.env["PROGRAMFILES"] || "", "Microsoft", "Edge", "Application", "msedge.exe"),
1485
+ path.join(process.env["PROGRAMFILES(X86)"] || "", "Microsoft", "Edge", "Application", "msedge.exe")
1486
+ ];
1487
+ for (const browserPath of possiblePaths) {
1488
+ if (fs2.existsSync(browserPath)) {
1489
+ return browserPath;
1500
1490
  }
1501
- for (const { placeholder, original } of placeholders) {
1502
- formatted = formatted.replace(placeholder, original);
1491
+ }
1492
+ return null;
1493
+ }
1494
+ function getChromeUserDataDir() {
1495
+ const cnapseProfile = path.join(os2.homedir(), ".cnapse", "chrome-profile");
1496
+ if (!fs2.existsSync(cnapseProfile)) {
1497
+ fs2.mkdirSync(cnapseProfile, { recursive: true });
1498
+ }
1499
+ return cnapseProfile;
1500
+ }
1501
+ async function initBrowser(config = {}) {
1502
+ const cfg = { ...defaultConfig, ...config };
1503
+ if (!context) {
1504
+ const browserPath = cfg.useSystemBrowser ? findSystemBrowser() : null;
1505
+ const userDataDir = getChromeUserDataDir();
1506
+ if (browserPath && cfg.useSystemBrowser) {
1507
+ context = await chromium.launchPersistentContext(userDataDir, {
1508
+ headless: cfg.headless,
1509
+ slowMo: cfg.slowMo,
1510
+ viewport: cfg.viewport,
1511
+ executablePath: browserPath,
1512
+ channel: void 0,
1513
+ // Don't use channel when specifying executablePath
1514
+ args: [
1515
+ "--disable-blink-features=AutomationControlled",
1516
+ // Less bot detection
1517
+ "--no-first-run",
1518
+ "--no-default-browser-check"
1519
+ ]
1520
+ });
1521
+ } else {
1522
+ context = await chromium.launchPersistentContext(userDataDir, {
1523
+ headless: cfg.headless,
1524
+ slowMo: cfg.slowMo,
1525
+ viewport: cfg.viewport,
1526
+ args: [
1527
+ "--disable-blink-features=AutomationControlled"
1528
+ ]
1529
+ });
1503
1530
  }
1504
- return { text: formatted, parseMode: "MarkdownV2" };
1531
+ }
1532
+ const pages = context.pages();
1533
+ if (pages.length > 0) {
1534
+ activePage = pages[0];
1535
+ } else {
1536
+ activePage = await context.newPage();
1537
+ }
1538
+ return activePage;
1539
+ }
1540
+ async function getPage() {
1541
+ if (!activePage) {
1542
+ return initBrowser();
1543
+ }
1544
+ return activePage;
1545
+ }
1546
+ async function navigateTo(url) {
1547
+ const page = await getPage();
1548
+ await page.goto(url, { waitUntil: "domcontentloaded" });
1549
+ }
1550
+ async function takeScreenshot() {
1551
+ const page = await getPage();
1552
+ const buffer = await page.screenshot({ type: "png" });
1553
+ return buffer.toString("base64");
1554
+ }
1555
+ async function clickElement(selector, timeout = 1e4) {
1556
+ const page = await getPage();
1557
+ try {
1558
+ await page.click(selector, { timeout });
1559
+ return true;
1505
1560
  } catch {
1506
- return { text, parseMode: void 0 };
1561
+ return false;
1507
1562
  }
1508
1563
  }
1509
- async function sendFormattedMessage(ctx, text) {
1510
- const { text: formatted, parseMode } = formatForTelegram(text);
1564
+ async function typeInElement(selector, text, timeout = 1e4) {
1565
+ const page = await getPage();
1511
1566
  try {
1512
- if (parseMode) {
1513
- await ctx.reply(formatted, { parse_mode: parseMode });
1514
- } else {
1515
- await ctx.reply(text);
1516
- }
1567
+ await page.fill(selector, text, { timeout });
1568
+ return true;
1517
1569
  } catch {
1518
- await ctx.reply(text);
1570
+ return false;
1519
1571
  }
1520
1572
  }
1521
- var TelegramBotService = class extends EventEmitter {
1522
- bot = null;
1523
- isRunning = false;
1524
- allowedChatIds = /* @__PURE__ */ new Set();
1525
- chatHistory = /* @__PURE__ */ new Map();
1526
- constructor() {
1527
- super();
1573
+ async function pressKey2(key) {
1574
+ const page = await getPage();
1575
+ await page.keyboard.press(key);
1576
+ }
1577
+ async function scroll(direction, amount = 500) {
1578
+ const page = await getPage();
1579
+ await page.mouse.wheel(0, direction === "down" ? amount : -amount);
1580
+ }
1581
+ async function getPageText() {
1582
+ const page = await getPage();
1583
+ return await page.evaluate(() => document.body.innerText);
1584
+ }
1585
+ async function elementExists(selector) {
1586
+ const page = await getPage();
1587
+ try {
1588
+ const element = await page.$(selector);
1589
+ return element !== null;
1590
+ } catch {
1591
+ return false;
1528
1592
  }
1529
- /**
1530
- * Start the Telegram bot
1531
- */
1532
- async start() {
1533
- if (this.isRunning) {
1534
- return;
1535
- }
1536
- const botToken = getApiKey("telegram");
1537
- if (!botToken) {
1538
- throw new Error("Telegram bot token not configured. Use: cnapse auth telegram YOUR_BOT_TOKEN");
1593
+ }
1594
+ var aiChatConfigs = {
1595
+ perplexity: {
1596
+ url: "https://www.perplexity.ai",
1597
+ inputSelector: 'textarea[placeholder*="Ask"]',
1598
+ submitKey: "Enter",
1599
+ responseSelector: '.prose, [class*="answer"], [class*="response"]',
1600
+ waitForResponse: 15e3
1601
+ },
1602
+ chatgpt: {
1603
+ url: "https://chat.openai.com",
1604
+ inputSelector: 'textarea[id="prompt-textarea"], textarea[data-id="root"]',
1605
+ submitSelector: 'button[data-testid="send-button"]',
1606
+ responseSelector: '[data-message-author-role="assistant"]',
1607
+ waitForResponse: 2e4
1608
+ },
1609
+ claude: {
1610
+ url: "https://claude.ai",
1611
+ inputSelector: '[contenteditable="true"], textarea',
1612
+ submitKey: "Enter",
1613
+ responseSelector: '[data-testid="message-content"]',
1614
+ waitForResponse: 2e4
1615
+ },
1616
+ copilot: {
1617
+ url: "https://copilot.microsoft.com",
1618
+ inputSelector: 'textarea, [contenteditable="true"]',
1619
+ submitKey: "Enter",
1620
+ responseSelector: '[class*="response"], [class*="message"]',
1621
+ waitForResponse: 15e3
1622
+ },
1623
+ google: {
1624
+ url: "https://www.google.com",
1625
+ inputSelector: 'textarea[name="q"], input[name="q"]',
1626
+ submitKey: "Enter",
1627
+ responseSelector: "#search",
1628
+ waitForResponse: 5e3
1629
+ }
1630
+ };
1631
+ async function askAI(site, question, includeScreenshot = false) {
1632
+ const config = aiChatConfigs[site];
1633
+ if (!config) {
1634
+ throw new Error(`Unknown AI site: ${site}`);
1635
+ }
1636
+ const page = await getPage();
1637
+ await page.goto(config.url, { waitUntil: "domcontentloaded" });
1638
+ await page.waitForTimeout(2e3);
1639
+ try {
1640
+ await page.waitForSelector(config.inputSelector, { timeout: 1e4 });
1641
+ await page.fill(config.inputSelector, question);
1642
+ } catch {
1643
+ await page.click(config.inputSelector);
1644
+ await page.type(config.inputSelector, question, { delay: 30 });
1645
+ }
1646
+ if (config.submitSelector) {
1647
+ await page.click(config.submitSelector);
1648
+ } else if (config.submitKey) {
1649
+ await page.keyboard.press(config.submitKey);
1650
+ }
1651
+ await page.waitForTimeout(config.waitForResponse);
1652
+ let response = "";
1653
+ try {
1654
+ const elements = await page.$$(config.responseSelector);
1655
+ if (elements.length > 0) {
1656
+ const lastElement = elements[elements.length - 1];
1657
+ response = await lastElement.textContent() || "";
1539
1658
  }
1659
+ } catch {
1660
+ response = await getPageText();
1661
+ }
1662
+ let screenshot;
1663
+ if (includeScreenshot) {
1664
+ screenshot = await takeScreenshot();
1665
+ }
1666
+ return { response: response.trim(), screenshot };
1667
+ }
1668
+ async function getFullAIResponse(site, maxScrolls = 5) {
1669
+ const config = aiChatConfigs[site];
1670
+ const page = await getPage();
1671
+ const responseParts = [];
1672
+ for (let i = 0; i < maxScrolls; i++) {
1540
1673
  try {
1541
- const { Telegraf } = await import("telegraf");
1542
- this.bot = new Telegraf(botToken);
1543
- const config = getConfig();
1544
- if (config.telegram?.chatId) {
1545
- this.allowedChatIds.add(config.telegram.chatId);
1674
+ const elements = await page.$$(config.responseSelector);
1675
+ if (elements.length > 0) {
1676
+ const lastElement = elements[elements.length - 1];
1677
+ const text = await lastElement.textContent();
1678
+ if (text) {
1679
+ responseParts.push(text.trim());
1680
+ }
1546
1681
  }
1547
- this.setupHandlers();
1548
- await this.bot.launch();
1549
- this.isRunning = true;
1550
- this.emit("started");
1551
- } catch (error) {
1552
- throw new Error(`Failed to start Telegram bot: ${error instanceof Error ? error.message : "Unknown error"}`);
1553
- }
1554
- }
1555
- /**
1556
- * Stop the Telegram bot
1557
- */
1558
- async stop() {
1559
- if (!this.isRunning || !this.bot) {
1560
- return;
1561
- }
1562
- this.bot.stop("SIGTERM");
1563
- this.isRunning = false;
1564
- this.bot = null;
1565
- this.emit("stopped");
1566
- }
1567
- /**
1568
- * Check if bot is running
1569
- */
1570
- get running() {
1571
- return this.isRunning;
1572
- }
1573
- /**
1574
- * Setup message and command handlers
1575
- */
1576
- setupHandlers() {
1577
- if (!this.bot) return;
1578
- this.bot.command("start", async (ctx) => {
1579
- const chatId = ctx.chat.id;
1580
- this.allowedChatIds.add(chatId);
1581
- await ctx.reply(
1582
- `\u{1F916} C-napse connected!
1583
-
1584
- Commands:
1585
- /screen - Take screenshot
1586
- /describe - Screenshot + AI description
1587
- /run <cmd> - Execute command
1588
- /status - System status
1589
-
1590
- Your chat ID: ${chatId}`
1591
- );
1592
- });
1593
- this.bot.command("screen", async (ctx) => {
1594
- if (!this.isAllowed(ctx.chat.id)) {
1595
- await ctx.reply("\u26D4 Not authorized. Send /start first.");
1596
- return;
1597
- }
1598
- await ctx.reply("\u{1F4F8} Taking screenshot...");
1599
- try {
1600
- const screenshot = await captureScreenshot();
1601
- if (!screenshot) {
1602
- await ctx.reply("\u274C Failed to capture screenshot");
1603
- return;
1604
- }
1605
- const buffer = Buffer.from(screenshot, "base64");
1606
- await ctx.replyWithPhoto({ source: buffer }, { caption: "\u{1F4F8} Current screen" });
1607
- } catch (error) {
1608
- await ctx.reply(`\u274C Error: ${error instanceof Error ? error.message : "Unknown error"}`);
1609
- }
1610
- });
1611
- this.bot.command("describe", async (ctx) => {
1612
- if (!this.isAllowed(ctx.chat.id)) {
1613
- await ctx.reply("\u26D4 Not authorized. Send /start first.");
1614
- return;
1615
- }
1616
- await ctx.reply("\u{1F50D} Analyzing screen...");
1617
- try {
1618
- const result = await describeScreen();
1619
- const buffer = Buffer.from(result.screenshot, "base64");
1620
- const caption = `\u{1F5A5}\uFE0F Screen Analysis:
1621
-
1622
- ${result.description}`.slice(0, 1024);
1623
- await ctx.replyWithPhoto({ source: buffer }, { caption });
1624
- if (result.description.length > 900) {
1625
- await ctx.reply(result.description);
1626
- }
1627
- } catch (error) {
1628
- await ctx.reply(`\u274C Error: ${error instanceof Error ? error.message : "Unknown error"}`);
1629
- }
1630
- });
1631
- this.bot.command("run", async (ctx) => {
1632
- if (!this.isAllowed(ctx.chat.id)) {
1633
- await ctx.reply("\u26D4 Not authorized. Send /start first.");
1634
- return;
1635
- }
1636
- const cmd = ctx.message.text.replace("/run ", "").trim();
1637
- if (!cmd) {
1638
- await ctx.reply("Usage: /run <command>\nExample: /run dir");
1639
- return;
1640
- }
1641
- await ctx.reply(`\u2699\uFE0F Running: ${cmd}`);
1642
- try {
1643
- const result = await runCommand(cmd, 3e4);
1644
- if (result.success) {
1645
- const output = result.output.slice(0, 4e3) || "(no output)";
1646
- await ctx.reply(`\u2705 Output:
1647
- \`\`\`
1648
- ${output}
1649
- \`\`\``, { parse_mode: "Markdown" });
1650
- } else {
1651
- await ctx.reply(`\u274C Error:
1652
- \`\`\`
1653
- ${result.error}
1654
- \`\`\``, { parse_mode: "Markdown" });
1655
- }
1656
- } catch (error) {
1657
- await ctx.reply(`\u274C Error: ${error instanceof Error ? error.message : "Unknown error"}`);
1658
- }
1659
- });
1660
- this.bot.command("status", async (ctx) => {
1661
- if (!this.isAllowed(ctx.chat.id)) {
1662
- await ctx.reply("\u26D4 Not authorized. Send /start first.");
1663
- return;
1664
- }
1665
- const config = getConfig();
1666
- const status = [
1667
- "\u{1F4CA} C-napse Status",
1668
- "",
1669
- `Provider: ${config.provider}`,
1670
- `Model: ${config.model}`,
1671
- `Platform: ${process.platform}`,
1672
- `Node: ${process.version}`
1673
- ].join("\n");
1674
- await ctx.reply(status);
1675
- });
1676
- this.bot.on("text", async (ctx) => {
1677
- if (!this.isAllowed(ctx.chat.id)) {
1678
- return;
1679
- }
1680
- if (ctx.message.text.startsWith("/")) {
1681
- return;
1682
- }
1683
- const chatId = ctx.chat.id;
1684
- const userText = ctx.message.text;
1685
- const from = ctx.from.username || ctx.from.first_name || "User";
1686
- const message = {
1687
- chatId,
1688
- text: userText,
1689
- from
1690
- };
1691
- this.emit("message", message);
1692
- if (!this.chatHistory.has(chatId)) {
1693
- this.chatHistory.set(chatId, []);
1694
- }
1695
- const history = this.chatHistory.get(chatId);
1696
- history.push({ role: "user", content: userText });
1697
- if (history.length > 10) {
1698
- history.splice(0, history.length - 10);
1699
- }
1700
- try {
1701
- await ctx.sendChatAction("typing");
1702
- const computerControlResult = await this.tryComputerControl(userText);
1703
- if (computerControlResult) {
1704
- await sendFormattedMessage(ctx, computerControlResult);
1705
- history.push({ role: "assistant", content: computerControlResult });
1706
- return;
1707
- }
1708
- const isVisionRequest = /screen|see|look|what('?s| is) (on|visible)|show me|screenshot/i.test(userText);
1709
- let response;
1710
- if (isVisionRequest) {
1711
- const screenshot = await captureScreenshot();
1712
- if (screenshot) {
1713
- response = await chatWithVision(history, screenshot);
1714
- } else {
1715
- response = await chat(history);
1716
- }
1717
- } else {
1718
- response = await chat(history);
1719
- }
1720
- history.push({ role: "assistant", content: response.content });
1721
- const responseText = response.content || "(no response)";
1722
- if (responseText.length > 4e3) {
1723
- const chunks = responseText.match(/.{1,4000}/gs) || [responseText];
1724
- for (const chunk of chunks) {
1725
- await sendFormattedMessage(ctx, chunk);
1726
- }
1727
- } else {
1728
- await sendFormattedMessage(ctx, responseText);
1729
- }
1730
- } catch (error) {
1731
- const errorMsg = error instanceof Error ? error.message : "Unknown error";
1732
- await ctx.reply(`\u274C Error: ${errorMsg}`);
1733
- this.emit("error", new Error(errorMsg));
1734
- }
1735
- });
1736
- this.bot.catch((err2) => {
1737
- this.emit("error", err2);
1738
- });
1739
- }
1740
- /**
1741
- * Check if chat is authorized
1742
- */
1743
- isAllowed(chatId) {
1744
- if (this.allowedChatIds.size === 0) {
1745
- return true;
1746
- }
1747
- return this.allowedChatIds.has(chatId);
1748
- }
1749
- /**
1750
- * Try to execute computer control commands directly
1751
- * Returns response string if handled, null if not a computer command
1752
- */
1753
- async tryComputerControl(text) {
1754
- const lower = text.toLowerCase();
1755
- let match = lower.match(/minimize\s+(?:the\s+)?(.+)/i);
1756
- if (match) {
1757
- const result = await minimizeWindow(match[1].trim());
1758
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1759
- }
1760
- match = lower.match(/maximize\s+(?:the\s+)?(.+)/i);
1761
- if (match) {
1762
- const result = await maximizeWindow(match[1].trim());
1763
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1764
- }
1765
- match = lower.match(/close\s+(?:the\s+)?(.+)/i);
1766
- if (match) {
1767
- const result = await closeWindow(match[1].trim());
1768
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1769
- }
1770
- match = lower.match(/restore\s+(?:the\s+)?(.+)/i);
1771
- if (match) {
1772
- const result = await restoreWindow(match[1].trim());
1773
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1774
- }
1775
- match = lower.match(/(?:focus|open|switch to)\s+(?:the\s+)?(.+)/i);
1776
- if (match) {
1777
- const result = await focusWindow(match[1].trim());
1778
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1779
- }
1780
- match = text.match(/type\s+["'](.+)["']/i);
1781
- if (match) {
1782
- const result = await typeText(match[1]);
1783
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1784
- }
1785
- match = lower.match(/press\s+(?:the\s+)?(\w+)/i);
1786
- if (match) {
1787
- const result = await pressKey(match[1]);
1788
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1789
- }
1790
- if (/^click$/i.test(lower) || /click\s+(?:the\s+)?mouse/i.test(lower)) {
1791
- const result = await clickMouse("left");
1792
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1793
- }
1794
- if (/right\s*click/i.test(lower)) {
1795
- const result = await clickMouse("right");
1796
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1797
- }
1798
- if (/double\s*click/i.test(lower)) {
1799
- const result = await doubleClick();
1800
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1801
- }
1802
- match = lower.match(/move\s+(?:the\s+)?mouse\s+(?:to\s+)?(\d+)[,\s]+(\d+)/i);
1803
- if (match) {
1804
- const result = await moveMouse(parseInt(match[1]), parseInt(match[2]));
1805
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1806
- }
1807
- match = lower.match(/scroll\s+(up|down)(?:\s+(\d+))?/i);
1808
- if (match) {
1809
- const amount = match[1] === "up" ? parseInt(match[2]) || 3 : -(parseInt(match[2]) || 3);
1810
- const result = await scrollMouse(amount);
1811
- return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
1812
- }
1813
- if (/list\s+(?:all\s+)?windows/i.test(lower) || /what\s+windows/i.test(lower)) {
1814
- const result = await listWindows();
1815
- return result.success ? `\u{1F4CB} Open Windows:
1816
- ${result.output}` : `\u274C ${result.error}`;
1817
- }
1818
- if (/(?:active|current|focused)\s+window/i.test(lower) || /what\s+(?:window|app)/i.test(lower)) {
1819
- const result = await getActiveWindow();
1820
- return result.success ? `\u{1FA9F} Active: ${result.output}` : `\u274C ${result.error}`;
1821
- }
1822
- if (/mouse\s+position/i.test(lower) || /where.*mouse/i.test(lower)) {
1823
- const result = await getMousePosition();
1824
- return result.success ? `\u{1F5B1}\uFE0F ${result.output}` : `\u274C ${result.error}`;
1825
- }
1826
- return null;
1827
- }
1828
- /**
1829
- * Send a message to a specific chat
1830
- */
1831
- async sendMessage(chatId, text) {
1832
- if (!this.bot || !this.isRunning) {
1833
- throw new Error("Telegram bot is not running");
1834
- }
1835
- await this.bot.telegram.sendMessage(chatId, text);
1836
- }
1837
- /**
1838
- * Send a photo to a specific chat
1839
- */
1840
- async sendPhoto(chatId, base64Image, caption) {
1841
- if (!this.bot || !this.isRunning) {
1842
- throw new Error("Telegram bot is not running");
1843
- }
1844
- const buffer = Buffer.from(base64Image, "base64");
1845
- await this.bot.telegram.sendPhoto(chatId, { source: buffer }, { caption });
1846
- }
1847
- };
1848
- var instance = null;
1849
- function getTelegramBot() {
1850
- if (!instance) {
1851
- instance = new TelegramBotService();
1852
- }
1853
- return instance;
1854
- }
1855
-
1856
- // src/hooks/useTelegram.ts
1857
- function useTelegram(onMessage) {
1858
- const [isEnabled, setIsEnabled] = useState4(false);
1859
- const [isStarting, setIsStarting] = useState4(false);
1860
- const [error, setError] = useState4(null);
1861
- const [lastMessage, setLastMessage] = useState4(null);
1862
- const onMessageRef = useRef2(onMessage);
1863
- useEffect2(() => {
1864
- onMessageRef.current = onMessage;
1865
- }, [onMessage]);
1866
- const start = useCallback3(async () => {
1867
- if (isEnabled) return;
1868
- setIsStarting(true);
1869
- setError(null);
1870
- try {
1871
- const bot = getTelegramBot();
1872
- bot.on("message", (msg) => {
1873
- setLastMessage(msg);
1874
- onMessageRef.current?.(msg);
1875
- });
1876
- bot.on("error", (err2) => {
1877
- setError(err2.message);
1878
- });
1879
- await bot.start();
1880
- setIsEnabled(true);
1881
- } catch (err2) {
1882
- const errorMsg = err2 instanceof Error ? err2.message : "Failed to start Telegram bot";
1883
- setError(errorMsg);
1884
- throw err2;
1885
- } finally {
1886
- setIsStarting(false);
1887
- }
1888
- }, [isEnabled]);
1889
- const stop = useCallback3(async () => {
1890
- if (!isEnabled) return;
1891
- try {
1892
- const bot = getTelegramBot();
1893
- await bot.stop();
1894
- setIsEnabled(false);
1895
- } catch (err2) {
1896
- const errorMsg = err2 instanceof Error ? err2.message : "Failed to stop Telegram bot";
1897
- setError(errorMsg);
1898
- throw err2;
1899
- }
1900
- }, [isEnabled]);
1901
- const toggle = useCallback3(async () => {
1902
- if (isEnabled) {
1903
- await stop();
1904
- } else {
1905
- await start();
1906
- }
1907
- }, [isEnabled, start, stop]);
1908
- return {
1909
- isEnabled,
1910
- isStarting,
1911
- error,
1912
- lastMessage,
1913
- toggle,
1914
- start,
1915
- stop
1916
- };
1917
- }
1918
-
1919
- // src/hooks/useTasks.ts
1920
- import { useState as useState5, useCallback as useCallback4 } from "react";
1921
-
1922
- // src/services/browser.ts
1923
- import { chromium } from "playwright";
1924
- var browser = null;
1925
- var context = null;
1926
- var activePage = null;
1927
- var defaultConfig = {
1928
- headless: false,
1929
- // Show browser so user can see what's happening
1930
- slowMo: 50,
1931
- // Slight delay for visibility
1932
- viewport: { width: 1280, height: 800 }
1933
- };
1934
- async function initBrowser(config = {}) {
1935
- const cfg = { ...defaultConfig, ...config };
1936
- if (!browser) {
1937
- browser = await chromium.launch({
1938
- headless: cfg.headless,
1939
- slowMo: cfg.slowMo
1940
- });
1941
- }
1942
- if (!context) {
1943
- context = await browser.newContext({
1944
- viewport: cfg.viewport,
1945
- userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
1946
- });
1947
- }
1948
- if (!activePage) {
1949
- activePage = await context.newPage();
1950
- }
1951
- return activePage;
1952
- }
1953
- async function getPage() {
1954
- if (!activePage) {
1955
- return initBrowser();
1956
- }
1957
- return activePage;
1958
- }
1959
- async function navigateTo(url) {
1960
- const page = await getPage();
1961
- await page.goto(url, { waitUntil: "domcontentloaded" });
1962
- }
1963
- async function takeScreenshot() {
1964
- const page = await getPage();
1965
- const buffer = await page.screenshot({ type: "png" });
1966
- return buffer.toString("base64");
1967
- }
1968
- async function clickElement(selector, timeout = 1e4) {
1969
- const page = await getPage();
1970
- try {
1971
- await page.click(selector, { timeout });
1972
- return true;
1973
- } catch {
1974
- return false;
1975
- }
1976
- }
1977
- async function typeInElement(selector, text, timeout = 1e4) {
1978
- const page = await getPage();
1979
- try {
1980
- await page.fill(selector, text, { timeout });
1981
- return true;
1982
- } catch {
1983
- return false;
1984
- }
1985
- }
1986
- async function pressKey2(key) {
1987
- const page = await getPage();
1988
- await page.keyboard.press(key);
1989
- }
1990
- async function scroll(direction, amount = 500) {
1991
- const page = await getPage();
1992
- await page.mouse.wheel(0, direction === "down" ? amount : -amount);
1993
- }
1994
- async function getPageText() {
1995
- const page = await getPage();
1996
- return await page.evaluate(() => document.body.innerText);
1997
- }
1998
- async function elementExists(selector) {
1999
- const page = await getPage();
2000
- try {
2001
- const element = await page.$(selector);
2002
- return element !== null;
2003
- } catch {
2004
- return false;
2005
- }
2006
- }
2007
- var aiChatConfigs = {
2008
- perplexity: {
2009
- url: "https://www.perplexity.ai",
2010
- inputSelector: 'textarea[placeholder*="Ask"]',
2011
- submitKey: "Enter",
2012
- responseSelector: '.prose, [class*="answer"], [class*="response"]',
2013
- waitForResponse: 15e3
2014
- },
2015
- chatgpt: {
2016
- url: "https://chat.openai.com",
2017
- inputSelector: 'textarea[id="prompt-textarea"], textarea[data-id="root"]',
2018
- submitSelector: 'button[data-testid="send-button"]',
2019
- responseSelector: '[data-message-author-role="assistant"]',
2020
- waitForResponse: 2e4
2021
- },
2022
- claude: {
2023
- url: "https://claude.ai",
2024
- inputSelector: '[contenteditable="true"], textarea',
2025
- submitKey: "Enter",
2026
- responseSelector: '[data-testid="message-content"]',
2027
- waitForResponse: 2e4
2028
- },
2029
- copilot: {
2030
- url: "https://copilot.microsoft.com",
2031
- inputSelector: 'textarea, [contenteditable="true"]',
2032
- submitKey: "Enter",
2033
- responseSelector: '[class*="response"], [class*="message"]',
2034
- waitForResponse: 15e3
2035
- },
2036
- google: {
2037
- url: "https://www.google.com",
2038
- inputSelector: 'textarea[name="q"], input[name="q"]',
2039
- submitKey: "Enter",
2040
- responseSelector: "#search",
2041
- waitForResponse: 5e3
2042
- }
2043
- };
2044
- async function askAI(site, question, includeScreenshot = false) {
2045
- const config = aiChatConfigs[site];
2046
- if (!config) {
2047
- throw new Error(`Unknown AI site: ${site}`);
2048
- }
2049
- const page = await getPage();
2050
- await page.goto(config.url, { waitUntil: "domcontentloaded" });
2051
- await page.waitForTimeout(2e3);
2052
- try {
2053
- await page.waitForSelector(config.inputSelector, { timeout: 1e4 });
2054
- await page.fill(config.inputSelector, question);
2055
- } catch {
2056
- await page.click(config.inputSelector);
2057
- await page.type(config.inputSelector, question, { delay: 30 });
2058
- }
2059
- if (config.submitSelector) {
2060
- await page.click(config.submitSelector);
2061
- } else if (config.submitKey) {
2062
- await page.keyboard.press(config.submitKey);
2063
- }
2064
- await page.waitForTimeout(config.waitForResponse);
2065
- let response = "";
2066
- try {
2067
- const elements = await page.$$(config.responseSelector);
2068
- if (elements.length > 0) {
2069
- const lastElement = elements[elements.length - 1];
2070
- response = await lastElement.textContent() || "";
2071
- }
2072
- } catch {
2073
- response = await getPageText();
2074
- }
2075
- let screenshot;
2076
- if (includeScreenshot) {
2077
- screenshot = await takeScreenshot();
2078
- }
2079
- return { response: response.trim(), screenshot };
2080
- }
2081
- async function getFullAIResponse(site, maxScrolls = 5) {
2082
- const config = aiChatConfigs[site];
2083
- const page = await getPage();
2084
- const responseParts = [];
2085
- for (let i = 0; i < maxScrolls; i++) {
2086
- try {
2087
- const elements = await page.$$(config.responseSelector);
2088
- if (elements.length > 0) {
2089
- const lastElement = elements[elements.length - 1];
2090
- const text = await lastElement.textContent();
2091
- if (text) {
2092
- responseParts.push(text.trim());
2093
- }
2094
- }
2095
- await page.mouse.wheel(0, 500);
2096
- await page.waitForTimeout(1e3);
2097
- const atBottom = await page.evaluate(() => {
2098
- return window.innerHeight + window.scrollY >= document.body.scrollHeight - 100;
2099
- });
2100
- if (atBottom) break;
2101
- } catch {
2102
- break;
1682
+ await page.mouse.wheel(0, 500);
1683
+ await page.waitForTimeout(1e3);
1684
+ const atBottom = await page.evaluate(() => {
1685
+ return window.innerHeight + window.scrollY >= document.body.scrollHeight - 100;
1686
+ });
1687
+ if (atBottom) break;
1688
+ } catch {
1689
+ break;
2103
1690
  }
2104
1691
  }
2105
1692
  return responseParts;
@@ -2237,14 +1824,14 @@ async function research(topic, maxSources = 3) {
2237
1824
  }
2238
1825
 
2239
1826
  // src/lib/tasks.ts
2240
- import * as fs2 from "fs";
2241
- import * as path from "path";
2242
- import * as os2 from "os";
2243
- var TASK_MEMORY_FILE = path.join(os2.homedir(), ".cnapse", "task-memory.json");
1827
+ import * as fs3 from "fs";
1828
+ import * as path2 from "path";
1829
+ import * as os3 from "os";
1830
+ var TASK_MEMORY_FILE = path2.join(os3.homedir(), ".cnapse", "task-memory.json");
2244
1831
  function loadTaskMemory() {
2245
1832
  try {
2246
- if (fs2.existsSync(TASK_MEMORY_FILE)) {
2247
- const data = fs2.readFileSync(TASK_MEMORY_FILE, "utf-8");
1833
+ if (fs3.existsSync(TASK_MEMORY_FILE)) {
1834
+ const data = fs3.readFileSync(TASK_MEMORY_FILE, "utf-8");
2248
1835
  return JSON.parse(data);
2249
1836
  }
2250
1837
  } catch {
@@ -2270,11 +1857,11 @@ function saveTaskPattern(input, steps) {
2270
1857
  });
2271
1858
  }
2272
1859
  memory.patterns = memory.patterns.sort((a, b) => b.successCount - a.successCount).slice(0, 100);
2273
- const dir = path.dirname(TASK_MEMORY_FILE);
2274
- if (!fs2.existsSync(dir)) {
2275
- fs2.mkdirSync(dir, { recursive: true });
1860
+ const dir = path2.dirname(TASK_MEMORY_FILE);
1861
+ if (!fs3.existsSync(dir)) {
1862
+ fs3.mkdirSync(dir, { recursive: true });
2276
1863
  }
2277
- fs2.writeFileSync(TASK_MEMORY_FILE, JSON.stringify(memory, null, 2));
1864
+ fs3.writeFileSync(TASK_MEMORY_FILE, JSON.stringify(memory, null, 2));
2278
1865
  } catch {
2279
1866
  }
2280
1867
  }
@@ -2902,256 +2489,787 @@ Create a well-organized summary with:
2902
2489
  3. Any notable facts or statistics
2903
2490
  4. Conclusion
2904
2491
 
2905
- Be thorough but concise.`
2906
- }]);
2907
- step.result = `\u{1F52C} Research Summary: ${researchQuery}
2492
+ Be thorough but concise.`
2493
+ }]);
2494
+ step.result = `\u{1F52C} Research Summary: ${researchQuery}
2495
+
2496
+ ${synthesis.content}`;
2497
+ break;
2498
+ }
2499
+ case "ask_llm": {
2500
+ const [llmName, ...questionParts] = params.split("|");
2501
+ const question = questionParts.join("|");
2502
+ const currentScreen = await describeScreen();
2503
+ const fullQuestion = `I'm looking at my screen and I need help. ${question}
2504
+
2505
+ Here's what I see on my screen: ${currentScreen.description}`;
2506
+ const supportedLLMs = ["perplexity", "chatgpt", "claude", "copilot"];
2507
+ const llmLower = llmName.toLowerCase();
2508
+ if (!supportedLLMs.includes(llmLower)) {
2509
+ throw new Error(`Unknown LLM: ${llmName}. Supported: ${supportedLLMs.join(", ")}`);
2510
+ }
2511
+ const result = await askAI(llmLower, fullQuestion, false);
2512
+ const fullParts = await getFullAIResponse(llmLower, 3);
2513
+ const finalResponse = fullParts.length > 0 ? fullParts.join("\n\n") : result.response;
2514
+ step.result = `\u{1F916} ${llmName} says:
2515
+
2516
+ ${finalResponse}`;
2517
+ break;
2518
+ }
2519
+ case "learn_ui": {
2520
+ const uiScreen = await describeScreen();
2521
+ const uiAnalysis = await chat([{
2522
+ role: "user",
2523
+ content: `Analyze this screenshot and identify all interactive UI elements. List:
2524
+ 1. All clickable buttons and their likely functions
2525
+ 2. Text input fields
2526
+ 3. Menus and dropdowns
2527
+ 4. Links
2528
+ 5. Any keyboard shortcuts visible
2529
+ 6. The main actions available in this interface
2530
+
2531
+ Question: ${params}
2532
+
2533
+ Be specific about locations (top-left, center, etc.) and what each element does.`
2534
+ }]);
2535
+ step.result = `\u{1F50D} UI Analysis:
2536
+
2537
+ ${uiAnalysis.content}`;
2538
+ break;
2539
+ }
2540
+ case "adaptive_do": {
2541
+ const goal = params;
2542
+ const maxAttempts = 5;
2543
+ const actionHistory = [];
2544
+ let accomplished = false;
2545
+ const page = await getPage();
2546
+ for (let attempt = 0; attempt < maxAttempts && !accomplished; attempt++) {
2547
+ const screenshot = await takeScreenshot();
2548
+ const currentState = await chat([{
2549
+ role: "user",
2550
+ content: `Describe what you see on this screen. What app/website is it? What elements are visible?`
2551
+ }]);
2552
+ const nextAction = await chat([{
2553
+ role: "user",
2554
+ content: `GOAL: ${goal}
2555
+
2556
+ CURRENT SCREEN: ${currentState.content}
2557
+
2558
+ PREVIOUS ACTIONS TAKEN:
2559
+ ${actionHistory.length > 0 ? actionHistory.join("\n") : "None yet"}
2560
+
2561
+ Based on what you see, what's the SINGLE next action to take?
2562
+ Options:
2563
+ - click: Click element (describe CSS selector or visible text)
2564
+ - type: Type something (specify selector and text)
2565
+ - press: Press a key (specify key)
2566
+ - scroll: Scroll up/down
2567
+ - navigate: Go to URL
2568
+ - done: Goal is accomplished
2569
+ - stuck: Can't figure out what to do
2570
+
2571
+ Respond in format:
2572
+ ACTION: <action_type>
2573
+ SELECTOR: <css selector or text to find>
2574
+ VALUE: <text to type or URL>
2575
+ REASONING: <why>`
2576
+ }]);
2577
+ const actionContent = nextAction.content;
2578
+ const actionMatch = actionContent.match(/ACTION:\s*(\w+)/i);
2579
+ const selectorMatch = actionContent.match(/SELECTOR:\s*(.+?)(?:\n|$)/i);
2580
+ const valueMatch = actionContent.match(/VALUE:\s*(.+?)(?:\n|$)/i);
2581
+ if (!actionMatch) {
2582
+ actionHistory.push(`Attempt ${attempt + 1}: Couldn't parse action`);
2583
+ continue;
2584
+ }
2585
+ const action = actionMatch[1].toLowerCase();
2586
+ const selector = selectorMatch?.[1]?.trim() || "";
2587
+ const value = valueMatch?.[1]?.trim() || "";
2588
+ if (action === "done") {
2589
+ accomplished = true;
2590
+ actionHistory.push(`Attempt ${attempt + 1}: Goal accomplished!`);
2591
+ break;
2592
+ }
2593
+ if (action === "stuck") {
2594
+ actionHistory.push(`Attempt ${attempt + 1}: Got stuck, asking Perplexity for help...`);
2595
+ const helpRequest = `I'm trying to: ${goal}
2596
+
2597
+ I'm stuck. What should I do next? Be specific about what to click or type.`;
2598
+ const advice = await askAI("perplexity", helpRequest, false);
2599
+ actionHistory.push(`Got advice: ${advice.response.slice(0, 200)}...`);
2600
+ await navigateTo(page.url());
2601
+ continue;
2602
+ }
2603
+ try {
2604
+ switch (action) {
2605
+ case "click":
2606
+ if (selector) {
2607
+ const clicked = await clickElement(selector);
2608
+ if (!clicked) {
2609
+ await page.getByText(selector).first().click({ timeout: 5e3 });
2610
+ }
2611
+ }
2612
+ actionHistory.push(`Attempt ${attempt + 1}: Clicked "${selector}"`);
2613
+ break;
2614
+ case "type":
2615
+ if (selector && value) {
2616
+ const typed = await typeInElement(selector, value);
2617
+ if (!typed) {
2618
+ await page.getByPlaceholder(selector).first().fill(value);
2619
+ }
2620
+ }
2621
+ actionHistory.push(`Attempt ${attempt + 1}: Typed "${value}" in "${selector}"`);
2622
+ break;
2623
+ case "press":
2624
+ await pressKey2(value || selector);
2625
+ actionHistory.push(`Attempt ${attempt + 1}: Pressed ${value || selector}`);
2626
+ break;
2627
+ case "scroll":
2628
+ await scroll(value.toLowerCase().includes("up") ? "up" : "down");
2629
+ actionHistory.push(`Attempt ${attempt + 1}: Scrolled ${value || "down"}`);
2630
+ break;
2631
+ case "navigate":
2632
+ const url = value.startsWith("http") ? value : `https://${value}`;
2633
+ await navigateTo(url);
2634
+ actionHistory.push(`Attempt ${attempt + 1}: Navigated to ${url}`);
2635
+ break;
2636
+ default:
2637
+ actionHistory.push(`Attempt ${attempt + 1}: Unknown action ${action}`);
2638
+ }
2639
+ } catch (e) {
2640
+ actionHistory.push(`Attempt ${attempt + 1}: Action failed - ${e}`);
2641
+ }
2642
+ await sleep(2e3);
2643
+ }
2644
+ step.result = `\u{1F3AF} Adaptive Agent Result:
2645
+
2646
+ Goal: ${goal}
2647
+ Accomplished: ${accomplished ? "Yes \u2705" : "Partial/No \u274C"}
2908
2648
 
2909
- ${synthesis.content}`;
2649
+ Action Log:
2650
+ ${actionHistory.join("\n")}`;
2910
2651
  break;
2911
2652
  }
2912
- case "ask_llm": {
2913
- const [llmName, ...questionParts] = params.split("|");
2914
- const question = questionParts.join("|");
2915
- const currentScreen = await describeScreen();
2916
- const fullQuestion = `I'm looking at my screen and I need help. ${question}
2917
-
2918
- Here's what I see on my screen: ${currentScreen.description}`;
2919
- const supportedLLMs = ["perplexity", "chatgpt", "claude", "copilot"];
2920
- const llmLower = llmName.toLowerCase();
2921
- if (!supportedLLMs.includes(llmLower)) {
2922
- throw new Error(`Unknown LLM: ${llmName}. Supported: ${supportedLLMs.join(", ")}`);
2923
- }
2924
- const result = await askAI(llmLower, fullQuestion, false);
2925
- const fullParts = await getFullAIResponse(llmLower, 3);
2926
- const finalResponse = fullParts.length > 0 ? fullParts.join("\n\n") : result.response;
2927
- step.result = `\u{1F916} ${llmName} says:
2928
-
2929
- ${finalResponse}`;
2653
+ case "chat":
2654
+ step.result = `Task noted: ${params}`;
2930
2655
  break;
2656
+ default:
2657
+ throw new Error(`Unknown action: ${actionType}`);
2658
+ }
2659
+ }
2660
+ async function executeTask(task, onProgress) {
2661
+ task.status = "running";
2662
+ for (const step of task.steps) {
2663
+ if (task.status === "failed") {
2664
+ step.status = "skipped";
2665
+ continue;
2931
2666
  }
2932
- case "learn_ui": {
2933
- const uiScreen = await describeScreen();
2934
- const uiAnalysis = await chat([{
2935
- role: "user",
2936
- content: `Analyze this screenshot and identify all interactive UI elements. List:
2937
- 1. All clickable buttons and their likely functions
2938
- 2. Text input fields
2939
- 3. Menus and dropdowns
2940
- 4. Links
2941
- 5. Any keyboard shortcuts visible
2942
- 6. The main actions available in this interface
2943
-
2944
- Question: ${params}
2667
+ step.status = "running";
2668
+ onProgress?.(task, step);
2669
+ try {
2670
+ await executeStep(step);
2671
+ step.status = "completed";
2672
+ } catch (error) {
2673
+ step.status = "failed";
2674
+ step.error = error instanceof Error ? error.message : "Unknown error";
2675
+ task.status = "failed";
2676
+ }
2677
+ onProgress?.(task, step);
2678
+ }
2679
+ if (task.status !== "failed") {
2680
+ task.status = "completed";
2681
+ const steps = task.steps.map((s) => ({
2682
+ description: s.description,
2683
+ action: s.action
2684
+ }));
2685
+ saveTaskPattern(task.description, steps);
2686
+ }
2687
+ task.completedAt = /* @__PURE__ */ new Date();
2688
+ return task;
2689
+ }
2690
+ function sleep(ms) {
2691
+ return new Promise((resolve) => setTimeout(resolve, ms));
2692
+ }
2693
+ function getTaskMemoryStats() {
2694
+ const memory = loadTaskMemory();
2695
+ const totalUses = memory.patterns.reduce((sum, p) => sum + p.successCount, 0);
2696
+ const topPatterns = memory.patterns.sort((a, b) => b.successCount - a.successCount).slice(0, 5).map((p) => `"${p.input}" (${p.successCount}x)`);
2697
+ return {
2698
+ patternCount: memory.patterns.length,
2699
+ totalUses,
2700
+ topPatterns
2701
+ };
2702
+ }
2703
+ function clearTaskMemory() {
2704
+ try {
2705
+ if (fs3.existsSync(TASK_MEMORY_FILE)) {
2706
+ fs3.unlinkSync(TASK_MEMORY_FILE);
2707
+ }
2708
+ } catch {
2709
+ }
2710
+ }
2711
+ function formatTask(task) {
2712
+ const statusEmoji = {
2713
+ pending: "\u23F3",
2714
+ running: "\u{1F504}",
2715
+ completed: "\u2705",
2716
+ failed: "\u274C"
2717
+ };
2718
+ const stepStatusEmoji = {
2719
+ pending: "\u25CB",
2720
+ running: "\u25D0",
2721
+ completed: "\u25CF",
2722
+ failed: "\u2717",
2723
+ skipped: "\u25CC"
2724
+ };
2725
+ let output = `${statusEmoji[task.status]} Task: ${task.description}
2945
2726
 
2946
- Be specific about locations (top-left, center, etc.) and what each element does.`
2947
- }]);
2948
- step.result = `\u{1F50D} UI Analysis:
2727
+ `;
2728
+ for (const step of task.steps) {
2729
+ output += ` ${stepStatusEmoji[step.status]} ${step.description}`;
2730
+ if (step.result) {
2731
+ output += ` \u2192 ${step.result}`;
2732
+ }
2733
+ if (step.error) {
2734
+ output += ` (Error: ${step.error})`;
2735
+ }
2736
+ output += "\n";
2737
+ }
2738
+ return output;
2739
+ }
2949
2740
 
2950
- ${uiAnalysis.content}`;
2951
- break;
2741
+ // src/services/telegram.ts
2742
+ function formatForTelegram(text) {
2743
+ const hasMarkdown = /[*_`\[\]()]/.test(text);
2744
+ if (!hasMarkdown) {
2745
+ return { text, parseMode: void 0 };
2746
+ }
2747
+ try {
2748
+ let formatted = text;
2749
+ const escapeChars = ["\\", "_", "*", "[", "]", "(", ")", "~", "`", ">", "#", "+", "-", "=", "|", "{", "}", ".", "!"];
2750
+ const placeholders = [];
2751
+ let placeholderIndex = 0;
2752
+ formatted = formatted.replace(/```([\s\S]*?)```/g, (match, code) => {
2753
+ const placeholder = `__CODEBLOCK_${placeholderIndex++}__`;
2754
+ placeholders.push({ placeholder, original: "```" + code.replace(/\\/g, "\\\\") + "```" });
2755
+ return placeholder;
2756
+ });
2757
+ formatted = formatted.replace(/`([^`]+)`/g, (match, code) => {
2758
+ const placeholder = `__INLINECODE_${placeholderIndex++}__`;
2759
+ placeholders.push({ placeholder, original: "`" + code.replace(/\\/g, "\\\\") + "`" });
2760
+ return placeholder;
2761
+ });
2762
+ formatted = formatted.replace(/\*\*(.+?)\*\*/g, (match, text2) => {
2763
+ const placeholder = `__BOLD_${placeholderIndex++}__`;
2764
+ placeholders.push({ placeholder, original: "*" + text2 + "*" });
2765
+ return placeholder;
2766
+ });
2767
+ formatted = formatted.replace(/(?<!\*)\*(?!\*)(.+?)(?<!\*)\*(?!\*)/g, (match, text2) => {
2768
+ const placeholder = `__ITALIC_${placeholderIndex++}__`;
2769
+ placeholders.push({ placeholder, original: "_" + text2 + "_" });
2770
+ return placeholder;
2771
+ });
2772
+ formatted = formatted.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (match, text2, url) => {
2773
+ const placeholder = `__LINK_${placeholderIndex++}__`;
2774
+ placeholders.push({ placeholder, original: "[" + text2 + "](" + url + ")" });
2775
+ return placeholder;
2776
+ });
2777
+ for (const char of escapeChars) {
2778
+ if (char === "\\") continue;
2779
+ formatted = formatted.split(char).join("\\" + char);
2952
2780
  }
2953
- case "adaptive_do": {
2954
- const goal = params;
2955
- const maxAttempts = 5;
2956
- const actionHistory = [];
2957
- let accomplished = false;
2958
- const page = await getPage();
2959
- for (let attempt = 0; attempt < maxAttempts && !accomplished; attempt++) {
2960
- const screenshot = await takeScreenshot();
2961
- const currentState = await chat([{
2962
- role: "user",
2963
- content: `Describe what you see on this screen. What app/website is it? What elements are visible?`
2964
- }]);
2965
- const nextAction = await chat([{
2966
- role: "user",
2967
- content: `GOAL: ${goal}
2781
+ for (const { placeholder, original } of placeholders) {
2782
+ formatted = formatted.replace(placeholder, original);
2783
+ }
2784
+ return { text: formatted, parseMode: "MarkdownV2" };
2785
+ } catch {
2786
+ return { text, parseMode: void 0 };
2787
+ }
2788
+ }
2789
+ async function sendFormattedMessage(ctx, text) {
2790
+ const { text: formatted, parseMode } = formatForTelegram(text);
2791
+ try {
2792
+ if (parseMode) {
2793
+ await ctx.reply(formatted, { parse_mode: parseMode });
2794
+ } else {
2795
+ await ctx.reply(text);
2796
+ }
2797
+ } catch {
2798
+ await ctx.reply(text);
2799
+ }
2800
+ }
2801
+ var TelegramBotService = class extends EventEmitter {
2802
+ bot = null;
2803
+ isRunning = false;
2804
+ allowedChatIds = /* @__PURE__ */ new Set();
2805
+ chatHistory = /* @__PURE__ */ new Map();
2806
+ constructor() {
2807
+ super();
2808
+ }
2809
+ /**
2810
+ * Start the Telegram bot
2811
+ */
2812
+ async start() {
2813
+ if (this.isRunning) {
2814
+ return;
2815
+ }
2816
+ const botToken = getApiKey("telegram");
2817
+ if (!botToken) {
2818
+ throw new Error("Telegram bot token not configured. Use: cnapse auth telegram YOUR_BOT_TOKEN");
2819
+ }
2820
+ try {
2821
+ const { Telegraf } = await import("telegraf");
2822
+ this.bot = new Telegraf(botToken);
2823
+ const config = getConfig();
2824
+ if (config.telegram?.chatId) {
2825
+ this.allowedChatIds.add(config.telegram.chatId);
2826
+ }
2827
+ this.setupHandlers();
2828
+ await this.bot.launch();
2829
+ this.isRunning = true;
2830
+ this.emit("started");
2831
+ } catch (error) {
2832
+ throw new Error(`Failed to start Telegram bot: ${error instanceof Error ? error.message : "Unknown error"}`);
2833
+ }
2834
+ }
2835
+ /**
2836
+ * Stop the Telegram bot
2837
+ */
2838
+ async stop() {
2839
+ if (!this.isRunning || !this.bot) {
2840
+ return;
2841
+ }
2842
+ this.bot.stop("SIGTERM");
2843
+ this.isRunning = false;
2844
+ this.bot = null;
2845
+ this.emit("stopped");
2846
+ }
2847
+ /**
2848
+ * Check if bot is running
2849
+ */
2850
+ get running() {
2851
+ return this.isRunning;
2852
+ }
2853
+ /**
2854
+ * Setup message and command handlers
2855
+ */
2856
+ setupHandlers() {
2857
+ if (!this.bot) return;
2858
+ this.bot.command("start", async (ctx) => {
2859
+ const chatId = ctx.chat.id;
2860
+ this.allowedChatIds.add(chatId);
2861
+ await ctx.reply(
2862
+ `\u{1F916} C-napse connected!
2968
2863
 
2969
- CURRENT SCREEN: ${currentState.content}
2864
+ Commands:
2865
+ /screen - Take screenshot
2866
+ /describe - Screenshot + AI description
2867
+ /task <desc> - Multi-step automation
2868
+ /run <cmd> - Execute shell command
2869
+ /status - System status
2970
2870
 
2971
- PREVIOUS ACTIONS TAKEN:
2972
- ${actionHistory.length > 0 ? actionHistory.join("\n") : "None yet"}
2871
+ Examples:
2872
+ \u2022 /task open folder E:/Test and list files
2873
+ \u2022 /task open notepad and type hello
2874
+ \u2022 minimize chrome
2875
+ \u2022 what windows are open?
2973
2876
 
2974
- Based on what you see, what's the SINGLE next action to take?
2975
- Options:
2976
- - click: Click element (describe CSS selector or visible text)
2977
- - type: Type something (specify selector and text)
2978
- - press: Press a key (specify key)
2979
- - scroll: Scroll up/down
2980
- - navigate: Go to URL
2981
- - done: Goal is accomplished
2982
- - stuck: Can't figure out what to do
2877
+ Your chat ID: ${chatId}`
2878
+ );
2879
+ });
2880
+ this.bot.command("screen", async (ctx) => {
2881
+ if (!this.isAllowed(ctx.chat.id)) {
2882
+ await ctx.reply("\u26D4 Not authorized. Send /start first.");
2883
+ return;
2884
+ }
2885
+ await ctx.reply("\u{1F4F8} Taking screenshot...");
2886
+ try {
2887
+ const screenshot = await captureScreenshot();
2888
+ if (!screenshot) {
2889
+ await ctx.reply("\u274C Failed to capture screenshot");
2890
+ return;
2891
+ }
2892
+ const buffer = Buffer.from(screenshot, "base64");
2893
+ await ctx.replyWithPhoto({ source: buffer }, { caption: "\u{1F4F8} Current screen" });
2894
+ } catch (error) {
2895
+ await ctx.reply(`\u274C Error: ${error instanceof Error ? error.message : "Unknown error"}`);
2896
+ }
2897
+ });
2898
+ this.bot.command("describe", async (ctx) => {
2899
+ if (!this.isAllowed(ctx.chat.id)) {
2900
+ await ctx.reply("\u26D4 Not authorized. Send /start first.");
2901
+ return;
2902
+ }
2903
+ await ctx.reply("\u{1F50D} Analyzing screen...");
2904
+ try {
2905
+ const result = await describeScreen();
2906
+ const buffer = Buffer.from(result.screenshot, "base64");
2907
+ const caption = `\u{1F5A5}\uFE0F Screen Analysis:
2983
2908
 
2984
- Respond in format:
2985
- ACTION: <action_type>
2986
- SELECTOR: <css selector or text to find>
2987
- VALUE: <text to type or URL>
2988
- REASONING: <why>`
2989
- }]);
2990
- const actionContent = nextAction.content;
2991
- const actionMatch = actionContent.match(/ACTION:\s*(\w+)/i);
2992
- const selectorMatch = actionContent.match(/SELECTOR:\s*(.+?)(?:\n|$)/i);
2993
- const valueMatch = actionContent.match(/VALUE:\s*(.+?)(?:\n|$)/i);
2994
- if (!actionMatch) {
2995
- actionHistory.push(`Attempt ${attempt + 1}: Couldn't parse action`);
2996
- continue;
2909
+ ${result.description}`.slice(0, 1024);
2910
+ await ctx.replyWithPhoto({ source: buffer }, { caption });
2911
+ if (result.description.length > 900) {
2912
+ await ctx.reply(result.description);
2997
2913
  }
2998
- const action = actionMatch[1].toLowerCase();
2999
- const selector = selectorMatch?.[1]?.trim() || "";
3000
- const value = valueMatch?.[1]?.trim() || "";
3001
- if (action === "done") {
3002
- accomplished = true;
3003
- actionHistory.push(`Attempt ${attempt + 1}: Goal accomplished!`);
3004
- break;
2914
+ } catch (error) {
2915
+ await ctx.reply(`\u274C Error: ${error instanceof Error ? error.message : "Unknown error"}`);
2916
+ }
2917
+ });
2918
+ this.bot.command("run", async (ctx) => {
2919
+ if (!this.isAllowed(ctx.chat.id)) {
2920
+ await ctx.reply("\u26D4 Not authorized. Send /start first.");
2921
+ return;
2922
+ }
2923
+ const cmd = ctx.message.text.replace("/run ", "").trim();
2924
+ if (!cmd) {
2925
+ await ctx.reply("Usage: /run <command>\nExample: /run dir");
2926
+ return;
2927
+ }
2928
+ await ctx.reply(`\u2699\uFE0F Running: ${cmd}`);
2929
+ try {
2930
+ const result = await runCommand(cmd, 3e4);
2931
+ if (result.success) {
2932
+ const output = result.output.slice(0, 4e3) || "(no output)";
2933
+ await ctx.reply(`\u2705 Output:
2934
+ \`\`\`
2935
+ ${output}
2936
+ \`\`\``, { parse_mode: "Markdown" });
2937
+ } else {
2938
+ await ctx.reply(`\u274C Error:
2939
+ \`\`\`
2940
+ ${result.error}
2941
+ \`\`\``, { parse_mode: "Markdown" });
3005
2942
  }
3006
- if (action === "stuck") {
3007
- actionHistory.push(`Attempt ${attempt + 1}: Got stuck, asking Perplexity for help...`);
3008
- const helpRequest = `I'm trying to: ${goal}
2943
+ } catch (error) {
2944
+ await ctx.reply(`\u274C Error: ${error instanceof Error ? error.message : "Unknown error"}`);
2945
+ }
2946
+ });
2947
+ this.bot.command("status", async (ctx) => {
2948
+ if (!this.isAllowed(ctx.chat.id)) {
2949
+ await ctx.reply("\u26D4 Not authorized. Send /start first.");
2950
+ return;
2951
+ }
2952
+ const config = getConfig();
2953
+ const status = [
2954
+ "\u{1F4CA} C-napse Status",
2955
+ "",
2956
+ `Provider: ${config.provider}`,
2957
+ `Model: ${config.model}`,
2958
+ `Platform: ${process.platform}`,
2959
+ `Node: ${process.version}`
2960
+ ].join("\n");
2961
+ await ctx.reply(status);
2962
+ });
2963
+ this.bot.command("task", async (ctx) => {
2964
+ if (!this.isAllowed(ctx.chat.id)) {
2965
+ await ctx.reply("\u26D4 Not authorized. Send /start first.");
2966
+ return;
2967
+ }
2968
+ const taskDescription = ctx.message.text.replace("/task", "").trim();
2969
+ if (!taskDescription) {
2970
+ await ctx.reply(
2971
+ "\u{1F4CB} Usage: /task <description>\n\nExamples:\n\u2022 /task open notepad and type hello\n\u2022 /task open folder E:/Test and list files\n\u2022 /task search google for weather today\n\u2022 /task open chrome and go to github.com"
2972
+ );
2973
+ return;
2974
+ }
2975
+ await ctx.reply(`\u{1F3AF} Parsing task: "${taskDescription}"`);
2976
+ try {
2977
+ const task = await parseTask(taskDescription);
2978
+ let stepsPreview = `\u{1F4CB} Task broken into ${task.steps.length} steps:
3009
2979
 
3010
- I'm stuck. What should I do next? Be specific about what to click or type.`;
3011
- const advice = await askAI("perplexity", helpRequest, false);
3012
- actionHistory.push(`Got advice: ${advice.response.slice(0, 200)}...`);
3013
- await navigateTo(page.url());
3014
- continue;
2980
+ `;
2981
+ task.steps.forEach((step, i) => {
2982
+ stepsPreview += `${i + 1}. ${step.description}
2983
+ `;
2984
+ });
2985
+ stepsPreview += "\n\u23F3 Executing...";
2986
+ await ctx.reply(stepsPreview);
2987
+ let lastUpdate = Date.now();
2988
+ const updatedTask = await executeTask(task, async (t, step) => {
2989
+ const now = Date.now();
2990
+ if (now - lastUpdate > 2e3) {
2991
+ lastUpdate = now;
2992
+ const stepNum = t.steps.indexOf(step) + 1;
2993
+ const status = step.status === "running" ? "\u{1F504}" : step.status === "completed" ? "\u2705" : "\u274C";
2994
+ await ctx.sendChatAction("typing");
2995
+ }
2996
+ });
2997
+ const result = formatTask(updatedTask);
2998
+ if (result.length > 4e3) {
2999
+ const chunks = result.match(/.{1,4000}/gs) || [result];
3000
+ for (const chunk of chunks) {
3001
+ await sendFormattedMessage(ctx, chunk);
3002
+ }
3003
+ } else {
3004
+ await sendFormattedMessage(ctx, result);
3015
3005
  }
3016
- try {
3017
- switch (action) {
3018
- case "click":
3019
- if (selector) {
3020
- const clicked = await clickElement(selector);
3021
- if (!clicked) {
3022
- await page.getByText(selector).first().click({ timeout: 5e3 });
3023
- }
3024
- }
3025
- actionHistory.push(`Attempt ${attempt + 1}: Clicked "${selector}"`);
3026
- break;
3027
- case "type":
3028
- if (selector && value) {
3029
- const typed = await typeInElement(selector, value);
3030
- if (!typed) {
3031
- await page.getByPlaceholder(selector).first().fill(value);
3032
- }
3033
- }
3034
- actionHistory.push(`Attempt ${attempt + 1}: Typed "${value}" in "${selector}"`);
3035
- break;
3036
- case "press":
3037
- await pressKey2(value || selector);
3038
- actionHistory.push(`Attempt ${attempt + 1}: Pressed ${value || selector}`);
3039
- break;
3040
- case "scroll":
3041
- await scroll(value.toLowerCase().includes("up") ? "up" : "down");
3042
- actionHistory.push(`Attempt ${attempt + 1}: Scrolled ${value || "down"}`);
3043
- break;
3044
- case "navigate":
3045
- const url = value.startsWith("http") ? value : `https://${value}`;
3046
- await navigateTo(url);
3047
- actionHistory.push(`Attempt ${attempt + 1}: Navigated to ${url}`);
3048
- break;
3049
- default:
3050
- actionHistory.push(`Attempt ${attempt + 1}: Unknown action ${action}`);
3006
+ const hasScreenStep = updatedTask.steps.some(
3007
+ (s) => s.action.includes("screenshot") || s.action.includes("describe")
3008
+ );
3009
+ if (hasScreenStep || updatedTask.status === "completed") {
3010
+ try {
3011
+ const screenshot = await captureScreenshot();
3012
+ if (screenshot) {
3013
+ const buffer = Buffer.from(screenshot, "base64");
3014
+ await ctx.replyWithPhoto({ source: buffer }, {
3015
+ caption: updatedTask.status === "completed" ? "\u2705 Task completed - current screen" : "\u{1F4F8} Final screen state"
3016
+ });
3017
+ }
3018
+ } catch {
3051
3019
  }
3052
- } catch (e) {
3053
- actionHistory.push(`Attempt ${attempt + 1}: Action failed - ${e}`);
3054
3020
  }
3055
- await sleep(2e3);
3021
+ } catch (error) {
3022
+ await ctx.reply(`\u274C Task failed: ${error instanceof Error ? error.message : "Unknown error"}`);
3023
+ }
3024
+ });
3025
+ this.bot.on("text", async (ctx) => {
3026
+ if (!this.isAllowed(ctx.chat.id)) {
3027
+ return;
3028
+ }
3029
+ if (ctx.message.text.startsWith("/")) {
3030
+ return;
3031
+ }
3032
+ const chatId = ctx.chat.id;
3033
+ const userText = ctx.message.text;
3034
+ const from = ctx.from.username || ctx.from.first_name || "User";
3035
+ const message = {
3036
+ chatId,
3037
+ text: userText,
3038
+ from
3039
+ };
3040
+ this.emit("message", message);
3041
+ if (!this.chatHistory.has(chatId)) {
3042
+ this.chatHistory.set(chatId, []);
3043
+ }
3044
+ const history = this.chatHistory.get(chatId);
3045
+ history.push({ role: "user", content: userText });
3046
+ if (history.length > 10) {
3047
+ history.splice(0, history.length - 10);
3048
+ }
3049
+ try {
3050
+ await ctx.sendChatAction("typing");
3051
+ const computerControlResult = await this.tryComputerControl(userText);
3052
+ if (computerControlResult) {
3053
+ await sendFormattedMessage(ctx, computerControlResult);
3054
+ history.push({ role: "assistant", content: computerControlResult });
3055
+ return;
3056
+ }
3057
+ const isVisionRequest = /screen|see|look|what('?s| is) (on|visible)|show me|screenshot/i.test(userText);
3058
+ let response;
3059
+ if (isVisionRequest) {
3060
+ const screenshot = await captureScreenshot();
3061
+ if (screenshot) {
3062
+ response = await chatWithVision(history, screenshot);
3063
+ } else {
3064
+ response = await chat(history);
3065
+ }
3066
+ } else {
3067
+ response = await chat(history);
3068
+ }
3069
+ history.push({ role: "assistant", content: response.content });
3070
+ const responseText = response.content || "(no response)";
3071
+ if (responseText.length > 4e3) {
3072
+ const chunks = responseText.match(/.{1,4000}/gs) || [responseText];
3073
+ for (const chunk of chunks) {
3074
+ await sendFormattedMessage(ctx, chunk);
3075
+ }
3076
+ } else {
3077
+ await sendFormattedMessage(ctx, responseText);
3078
+ }
3079
+ } catch (error) {
3080
+ const errorMsg = error instanceof Error ? error.message : "Unknown error";
3081
+ await ctx.reply(`\u274C Error: ${errorMsg}`);
3082
+ this.emit("error", new Error(errorMsg));
3056
3083
  }
3057
- step.result = `\u{1F3AF} Adaptive Agent Result:
3058
-
3059
- Goal: ${goal}
3060
- Accomplished: ${accomplished ? "Yes \u2705" : "Partial/No \u274C"}
3061
-
3062
- Action Log:
3063
- ${actionHistory.join("\n")}`;
3064
- break;
3084
+ });
3085
+ this.bot.catch((err2) => {
3086
+ this.emit("error", err2);
3087
+ });
3088
+ }
3089
+ /**
3090
+ * Check if chat is authorized
3091
+ */
3092
+ isAllowed(chatId) {
3093
+ if (this.allowedChatIds.size === 0) {
3094
+ return true;
3065
3095
  }
3066
- case "chat":
3067
- step.result = `Task noted: ${params}`;
3068
- break;
3069
- default:
3070
- throw new Error(`Unknown action: ${actionType}`);
3096
+ return this.allowedChatIds.has(chatId);
3071
3097
  }
3072
- }
3073
- async function executeTask(task, onProgress) {
3074
- task.status = "running";
3075
- for (const step of task.steps) {
3076
- if (task.status === "failed") {
3077
- step.status = "skipped";
3078
- continue;
3098
+ /**
3099
+ * Try to execute computer control commands directly
3100
+ * Returns response string if handled, null if not a computer command
3101
+ */
3102
+ async tryComputerControl(text) {
3103
+ const lower = text.toLowerCase();
3104
+ let match = lower.match(/minimize\s+(?:the\s+)?(.+)/i);
3105
+ if (match) {
3106
+ const result = await minimizeWindow(match[1].trim());
3107
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3079
3108
  }
3080
- step.status = "running";
3081
- onProgress?.(task, step);
3082
- try {
3083
- await executeStep(step);
3084
- step.status = "completed";
3085
- } catch (error) {
3086
- step.status = "failed";
3087
- step.error = error instanceof Error ? error.message : "Unknown error";
3088
- task.status = "failed";
3109
+ match = lower.match(/maximize\s+(?:the\s+)?(.+)/i);
3110
+ if (match) {
3111
+ const result = await maximizeWindow(match[1].trim());
3112
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3089
3113
  }
3090
- onProgress?.(task, step);
3114
+ match = lower.match(/close\s+(?:the\s+)?(.+)/i);
3115
+ if (match) {
3116
+ const result = await closeWindow(match[1].trim());
3117
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3118
+ }
3119
+ match = lower.match(/restore\s+(?:the\s+)?(.+)/i);
3120
+ if (match) {
3121
+ const result = await restoreWindow(match[1].trim());
3122
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3123
+ }
3124
+ const looksLikeTask = /\b(and|then|after|tell me|list|what|show|describe|check|find|search|create|write|type\s+.+\s+in)\b/i.test(text);
3125
+ if (!looksLikeTask) {
3126
+ match = lower.match(/(?:focus|open|switch to)\s+(?:the\s+)?(\w+(?:\s+\w+)?)/i);
3127
+ if (match) {
3128
+ const result = await focusWindow(match[1].trim());
3129
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3130
+ }
3131
+ }
3132
+ match = text.match(/type\s+["'](.+)["']/i);
3133
+ if (match) {
3134
+ const result = await typeText(match[1]);
3135
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3136
+ }
3137
+ match = lower.match(/press\s+(?:the\s+)?(\w+)/i);
3138
+ if (match) {
3139
+ const result = await pressKey(match[1]);
3140
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3141
+ }
3142
+ if (/^click$/i.test(lower) || /click\s+(?:the\s+)?mouse/i.test(lower)) {
3143
+ const result = await clickMouse("left");
3144
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3145
+ }
3146
+ if (/right\s*click/i.test(lower)) {
3147
+ const result = await clickMouse("right");
3148
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3149
+ }
3150
+ if (/double\s*click/i.test(lower)) {
3151
+ const result = await doubleClick();
3152
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3153
+ }
3154
+ match = lower.match(/move\s+(?:the\s+)?mouse\s+(?:to\s+)?(\d+)[,\s]+(\d+)/i);
3155
+ if (match) {
3156
+ const result = await moveMouse(parseInt(match[1]), parseInt(match[2]));
3157
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3158
+ }
3159
+ match = lower.match(/scroll\s+(up|down)(?:\s+(\d+))?/i);
3160
+ if (match) {
3161
+ const amount = match[1] === "up" ? parseInt(match[2]) || 3 : -(parseInt(match[2]) || 3);
3162
+ const result = await scrollMouse(amount);
3163
+ return result.success ? `\u2705 ${result.output}` : `\u274C ${result.error}`;
3164
+ }
3165
+ if (/list\s+(?:all\s+)?windows/i.test(lower) || /what\s+windows/i.test(lower)) {
3166
+ const result = await listWindows();
3167
+ return result.success ? `\u{1F4CB} Open Windows:
3168
+ ${result.output}` : `\u274C ${result.error}`;
3169
+ }
3170
+ if (/(?:active|current|focused)\s+window/i.test(lower) || /what\s+(?:window|app)/i.test(lower)) {
3171
+ const result = await getActiveWindow();
3172
+ return result.success ? `\u{1FA9F} Active: ${result.output}` : `\u274C ${result.error}`;
3173
+ }
3174
+ if (/mouse\s+position/i.test(lower) || /where.*mouse/i.test(lower)) {
3175
+ const result = await getMousePosition();
3176
+ return result.success ? `\u{1F5B1}\uFE0F ${result.output}` : `\u274C ${result.error}`;
3177
+ }
3178
+ return null;
3091
3179
  }
3092
- if (task.status !== "failed") {
3093
- task.status = "completed";
3094
- const steps = task.steps.map((s) => ({
3095
- description: s.description,
3096
- action: s.action
3097
- }));
3098
- saveTaskPattern(task.description, steps);
3180
+ /**
3181
+ * Send a message to a specific chat
3182
+ */
3183
+ async sendMessage(chatId, text) {
3184
+ if (!this.bot || !this.isRunning) {
3185
+ throw new Error("Telegram bot is not running");
3186
+ }
3187
+ await this.bot.telegram.sendMessage(chatId, text);
3099
3188
  }
3100
- task.completedAt = /* @__PURE__ */ new Date();
3101
- return task;
3102
- }
3103
- function sleep(ms) {
3104
- return new Promise((resolve) => setTimeout(resolve, ms));
3105
- }
3106
- function getTaskMemoryStats() {
3107
- const memory = loadTaskMemory();
3108
- const totalUses = memory.patterns.reduce((sum, p) => sum + p.successCount, 0);
3109
- const topPatterns = memory.patterns.sort((a, b) => b.successCount - a.successCount).slice(0, 5).map((p) => `"${p.input}" (${p.successCount}x)`);
3110
- return {
3111
- patternCount: memory.patterns.length,
3112
- totalUses,
3113
- topPatterns
3114
- };
3115
- }
3116
- function clearTaskMemory() {
3117
- try {
3118
- if (fs2.existsSync(TASK_MEMORY_FILE)) {
3119
- fs2.unlinkSync(TASK_MEMORY_FILE);
3189
+ /**
3190
+ * Send a photo to a specific chat
3191
+ */
3192
+ async sendPhoto(chatId, base64Image, caption) {
3193
+ if (!this.bot || !this.isRunning) {
3194
+ throw new Error("Telegram bot is not running");
3120
3195
  }
3121
- } catch {
3196
+ const buffer = Buffer.from(base64Image, "base64");
3197
+ await this.bot.telegram.sendPhoto(chatId, { source: buffer }, { caption });
3198
+ }
3199
+ };
3200
+ var instance = null;
3201
+ function getTelegramBot() {
3202
+ if (!instance) {
3203
+ instance = new TelegramBotService();
3122
3204
  }
3205
+ return instance;
3123
3206
  }
3124
- function formatTask(task) {
3125
- const statusEmoji = {
3126
- pending: "\u23F3",
3127
- running: "\u{1F504}",
3128
- completed: "\u2705",
3129
- failed: "\u274C"
3130
- };
3131
- const stepStatusEmoji = {
3132
- pending: "\u25CB",
3133
- running: "\u25D0",
3134
- completed: "\u25CF",
3135
- failed: "\u2717",
3136
- skipped: "\u25CC"
3137
- };
3138
- let output = `${statusEmoji[task.status]} Task: ${task.description}
3139
3207
 
3140
- `;
3141
- for (const step of task.steps) {
3142
- output += ` ${stepStatusEmoji[step.status]} ${step.description}`;
3143
- if (step.result) {
3144
- output += ` \u2192 ${step.result}`;
3208
+ // src/hooks/useTelegram.ts
3209
+ function useTelegram(onMessage) {
3210
+ const [isEnabled, setIsEnabled] = useState4(false);
3211
+ const [isStarting, setIsStarting] = useState4(false);
3212
+ const [error, setError] = useState4(null);
3213
+ const [lastMessage, setLastMessage] = useState4(null);
3214
+ const onMessageRef = useRef2(onMessage);
3215
+ useEffect2(() => {
3216
+ onMessageRef.current = onMessage;
3217
+ }, [onMessage]);
3218
+ const start = useCallback3(async () => {
3219
+ if (isEnabled) return;
3220
+ setIsStarting(true);
3221
+ setError(null);
3222
+ try {
3223
+ const bot = getTelegramBot();
3224
+ bot.on("message", (msg) => {
3225
+ setLastMessage(msg);
3226
+ onMessageRef.current?.(msg);
3227
+ });
3228
+ bot.on("error", (err2) => {
3229
+ setError(err2.message);
3230
+ });
3231
+ await bot.start();
3232
+ setIsEnabled(true);
3233
+ } catch (err2) {
3234
+ const errorMsg = err2 instanceof Error ? err2.message : "Failed to start Telegram bot";
3235
+ setError(errorMsg);
3236
+ throw err2;
3237
+ } finally {
3238
+ setIsStarting(false);
3145
3239
  }
3146
- if (step.error) {
3147
- output += ` (Error: ${step.error})`;
3240
+ }, [isEnabled]);
3241
+ const stop = useCallback3(async () => {
3242
+ if (!isEnabled) return;
3243
+ try {
3244
+ const bot = getTelegramBot();
3245
+ await bot.stop();
3246
+ setIsEnabled(false);
3247
+ } catch (err2) {
3248
+ const errorMsg = err2 instanceof Error ? err2.message : "Failed to stop Telegram bot";
3249
+ setError(errorMsg);
3250
+ throw err2;
3148
3251
  }
3149
- output += "\n";
3150
- }
3151
- return output;
3252
+ }, [isEnabled]);
3253
+ const toggle = useCallback3(async () => {
3254
+ if (isEnabled) {
3255
+ await stop();
3256
+ } else {
3257
+ await start();
3258
+ }
3259
+ }, [isEnabled, start, stop]);
3260
+ return {
3261
+ isEnabled,
3262
+ isStarting,
3263
+ error,
3264
+ lastMessage,
3265
+ toggle,
3266
+ start,
3267
+ stop
3268
+ };
3152
3269
  }
3153
3270
 
3154
3271
  // src/hooks/useTasks.ts
3272
+ import { useState as useState5, useCallback as useCallback4 } from "react";
3155
3273
  function useTasks(onProgress) {
3156
3274
  const [isRunning, setIsRunning] = useState5(false);
3157
3275
  const [currentTask, setCurrentTask] = useState5(null);