@projectservan8n/cnapse 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -586,10 +586,10 @@ async function captureScreenFallback() {
586
586
  const { exec: exec5 } = await import("child_process");
587
587
  const { promisify: promisify5 } = await import("util");
588
588
  const { tmpdir } = await import("os");
589
- const { join: join4 } = await import("path");
589
+ const { join: join3 } = await import("path");
590
590
  const { readFile: readFile2, unlink } = await import("fs/promises");
591
591
  const execAsync5 = promisify5(exec5);
592
- const tempFile = join4(tmpdir(), `cnapse-screen-${Date.now()}.png`);
592
+ const tempFile = join3(tmpdir(), `cnapse-screen-${Date.now()}.png`);
593
593
  try {
594
594
  const platform = process.platform;
595
595
  if (platform === "win32") {
@@ -902,29 +902,29 @@ import { promisify as promisify4 } from "util";
902
902
  // src/tools/filesystem.ts
903
903
  import { promises as fs } from "fs";
904
904
  import { join, dirname } from "path";
905
- async function readFile(path3) {
905
+ async function readFile(path2) {
906
906
  try {
907
- const content = await fs.readFile(path3, "utf-8");
907
+ const content = await fs.readFile(path2, "utf-8");
908
908
  return ok(content);
909
909
  } catch (error) {
910
910
  return err(`Failed to read file: ${error.message}`);
911
911
  }
912
912
  }
913
- async function writeFile(path3, content) {
913
+ async function writeFile(path2, content) {
914
914
  try {
915
- const dir = dirname(path3);
915
+ const dir = dirname(path2);
916
916
  await fs.mkdir(dir, { recursive: true });
917
- await fs.writeFile(path3, content, "utf-8");
918
- return ok(`Written ${content.length} bytes to ${path3}`);
917
+ await fs.writeFile(path2, content, "utf-8");
918
+ return ok(`Written ${content.length} bytes to ${path2}`);
919
919
  } catch (error) {
920
920
  return err(`Failed to write file: ${error.message}`);
921
921
  }
922
922
  }
923
- async function listDir(path3, recursive = false) {
923
+ async function listDir(path2, recursive = false) {
924
924
  try {
925
- const stat = await fs.stat(path3);
925
+ const stat = await fs.stat(path2);
926
926
  if (!stat.isDirectory()) {
927
- return err(`Not a directory: ${path3}`);
927
+ return err(`Not a directory: ${path2}`);
928
928
  }
929
929
  const entries = [];
930
930
  async function walkDir(dir, prefix) {
@@ -941,7 +941,7 @@ async function listDir(path3, recursive = false) {
941
941
  }
942
942
  }
943
943
  }
944
- await walkDir(path3, "");
944
+ await walkDir(path2, "");
945
945
  entries.sort();
946
946
  return ok(entries.join("\n"));
947
947
  } catch (error) {
@@ -1459,288 +1459,146 @@ ${stderr}`
1459
1459
  }
1460
1460
 
1461
1461
  // src/services/browser.ts
1462
- import { chromium } from "playwright";
1463
- import * as path from "path";
1464
- import * as os2 from "os";
1465
- import * as fs2 from "fs";
1466
- var context = null;
1467
- var activePage = null;
1468
- var defaultConfig = {
1469
- headless: false,
1470
- // Show browser so user can see what's happening
1471
- slowMo: 50,
1472
- // Slight delay for visibility
1473
- viewport: { width: 1280, height: 800 },
1474
- useSystemBrowser: true
1475
- // Default to using system Chrome
1476
- };
1477
- function findSystemBrowser() {
1478
- const possiblePaths = [
1479
- // Chrome paths
1480
- path.join(process.env["PROGRAMFILES"] || "", "Google", "Chrome", "Application", "chrome.exe"),
1481
- path.join(process.env["PROGRAMFILES(X86)"] || "", "Google", "Chrome", "Application", "chrome.exe"),
1482
- path.join(process.env["LOCALAPPDATA"] || "", "Google", "Chrome", "Application", "chrome.exe"),
1483
- // Edge paths (fallback)
1484
- path.join(process.env["PROGRAMFILES"] || "", "Microsoft", "Edge", "Application", "msedge.exe"),
1485
- path.join(process.env["PROGRAMFILES(X86)"] || "", "Microsoft", "Edge", "Application", "msedge.exe")
1486
- ];
1487
- for (const browserPath of possiblePaths) {
1488
- if (fs2.existsSync(browserPath)) {
1489
- return browserPath;
1490
- }
1491
- }
1492
- return null;
1493
- }
1494
- function getChromeUserDataDir() {
1495
- const cnapseProfile = path.join(os2.homedir(), ".cnapse", "chrome-profile");
1496
- if (!fs2.existsSync(cnapseProfile)) {
1497
- fs2.mkdirSync(cnapseProfile, { recursive: true });
1498
- }
1499
- return cnapseProfile;
1500
- }
1501
- async function initBrowser(config = {}) {
1502
- const cfg = { ...defaultConfig, ...config };
1503
- if (!context) {
1504
- const browserPath = cfg.useSystemBrowser ? findSystemBrowser() : null;
1505
- const userDataDir = getChromeUserDataDir();
1506
- if (browserPath && cfg.useSystemBrowser) {
1507
- context = await chromium.launchPersistentContext(userDataDir, {
1508
- headless: cfg.headless,
1509
- slowMo: cfg.slowMo,
1510
- viewport: cfg.viewport,
1511
- executablePath: browserPath,
1512
- channel: void 0,
1513
- // Don't use channel when specifying executablePath
1514
- args: [
1515
- "--disable-blink-features=AutomationControlled",
1516
- // Less bot detection
1517
- "--no-first-run",
1518
- "--no-default-browser-check"
1519
- ]
1520
- });
1462
+ function sleep(ms) {
1463
+ return new Promise((resolve) => setTimeout(resolve, ms));
1464
+ }
1465
+ async function openUrl(url) {
1466
+ const fullUrl = url.startsWith("http") ? url : `https://${url}`;
1467
+ try {
1468
+ if (process.platform === "win32") {
1469
+ await runCommand(`start "" "${fullUrl}"`, 5e3);
1470
+ } else if (process.platform === "darwin") {
1471
+ await runCommand(`open "${fullUrl}"`, 5e3);
1521
1472
  } else {
1522
- context = await chromium.launchPersistentContext(userDataDir, {
1523
- headless: cfg.headless,
1524
- slowMo: cfg.slowMo,
1525
- viewport: cfg.viewport,
1526
- args: [
1527
- "--disable-blink-features=AutomationControlled"
1528
- ]
1529
- });
1473
+ await runCommand(`xdg-open "${fullUrl}"`, 5e3);
1530
1474
  }
1475
+ return { success: true };
1476
+ } catch (error) {
1477
+ return {
1478
+ success: false,
1479
+ error: error instanceof Error ? error.message : "Failed to open URL"
1480
+ };
1531
1481
  }
1532
- const pages = context.pages();
1533
- if (pages.length > 0) {
1534
- activePage = pages[0];
1535
- } else {
1536
- activePage = await context.newPage();
1537
- }
1538
- return activePage;
1539
1482
  }
1540
- async function getPage() {
1541
- if (!activePage) {
1542
- return initBrowser();
1543
- }
1544
- return activePage;
1483
+ async function searchGoogle(query) {
1484
+ const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}`;
1485
+ return openUrl(searchUrl);
1545
1486
  }
1546
- async function navigateTo(url) {
1547
- const page = await getPage();
1548
- await page.goto(url, { waitUntil: "domcontentloaded" });
1487
+ async function webSearch(query, engine = "google") {
1488
+ const urls = {
1489
+ google: `https://www.google.com/search?q=${encodeURIComponent(query)}`,
1490
+ bing: `https://www.bing.com/search?q=${encodeURIComponent(query)}`,
1491
+ duckduckgo: `https://duckduckgo.com/?q=${encodeURIComponent(query)}`
1492
+ };
1493
+ await openUrl(urls[engine]);
1494
+ await sleep(3e3);
1495
+ const vision = await describeScreen();
1496
+ return `\u{1F50D} Search results for "${query}":
1497
+
1498
+ ${vision.description}`;
1549
1499
  }
1550
- async function takeScreenshot() {
1551
- const page = await getPage();
1552
- const buffer = await page.screenshot({ type: "png" });
1553
- return buffer.toString("base64");
1500
+ async function askAI(site, question) {
1501
+ const urls = {
1502
+ perplexity: "https://www.perplexity.ai",
1503
+ chatgpt: "https://chat.openai.com",
1504
+ claude: "https://claude.ai",
1505
+ copilot: "https://copilot.microsoft.com",
1506
+ google: "https://www.google.com"
1507
+ };
1508
+ await openUrl(urls[site]);
1509
+ await sleep(4e3);
1510
+ await typeText(question);
1511
+ await sleep(500);
1512
+ await pressKey("Return");
1513
+ await sleep(site === "google" ? 3e3 : 1e4);
1514
+ const vision = await describeScreen();
1515
+ return {
1516
+ response: vision.description,
1517
+ screenshot: vision.screenshot
1518
+ };
1554
1519
  }
1555
- async function clickElement(selector, timeout = 1e4) {
1556
- const page = await getPage();
1557
- try {
1558
- await page.click(selector, { timeout });
1559
- return true;
1560
- } catch {
1561
- return false;
1562
- }
1520
+ async function openGmailCompose(to, subject, body) {
1521
+ let url = "https://mail.google.com/mail/u/0/?fs=1&tf=cm";
1522
+ if (to) url += `&to=${encodeURIComponent(to)}`;
1523
+ if (subject) url += `&su=${encodeURIComponent(subject)}`;
1524
+ if (body) url += `&body=${encodeURIComponent(body)}`;
1525
+ const result = await openUrl(url);
1526
+ return result.success;
1563
1527
  }
1564
- async function typeInElement(selector, text, timeout = 1e4) {
1565
- const page = await getPage();
1528
+ async function sendGmail(email) {
1566
1529
  try {
1567
- await page.fill(selector, text, { timeout });
1530
+ await openGmailCompose(email.to, email.subject, email.body);
1531
+ await sleep(5e3);
1532
+ await keyCombo(["control", "Return"]);
1533
+ await sleep(2e3);
1568
1534
  return true;
1569
1535
  } catch {
1570
1536
  return false;
1571
1537
  }
1572
1538
  }
1573
- async function pressKey2(key) {
1574
- const page = await getPage();
1575
- await page.keyboard.press(key);
1539
+ async function openOutlookCompose(to, subject, body) {
1540
+ let url = "https://outlook.office.com/mail/deeplink/compose?";
1541
+ if (to) url += `to=${encodeURIComponent(to)}&`;
1542
+ if (subject) url += `subject=${encodeURIComponent(subject)}&`;
1543
+ if (body) url += `body=${encodeURIComponent(body)}&`;
1544
+ const result = await openUrl(url);
1545
+ return result.success;
1576
1546
  }
1577
- async function scroll(direction, amount = 500) {
1578
- const page = await getPage();
1579
- await page.mouse.wheel(0, direction === "down" ? amount : -amount);
1580
- }
1581
- async function getPageText() {
1582
- const page = await getPage();
1583
- return await page.evaluate(() => document.body.innerText);
1584
- }
1585
- async function elementExists(selector) {
1586
- const page = await getPage();
1547
+ async function sendOutlook(email) {
1587
1548
  try {
1588
- const element = await page.$(selector);
1589
- return element !== null;
1549
+ await openOutlookCompose(email.to, email.subject, email.body);
1550
+ await sleep(5e3);
1551
+ await keyCombo(["control", "Return"]);
1552
+ await sleep(2e3);
1553
+ return true;
1590
1554
  } catch {
1591
1555
  return false;
1592
1556
  }
1593
1557
  }
1594
- var aiChatConfigs = {
1595
- perplexity: {
1596
- url: "https://www.perplexity.ai",
1597
- inputSelector: 'textarea[placeholder*="Ask"]',
1598
- submitKey: "Enter",
1599
- responseSelector: '.prose, [class*="answer"], [class*="response"]',
1600
- waitForResponse: 15e3
1601
- },
1602
- chatgpt: {
1603
- url: "https://chat.openai.com",
1604
- inputSelector: 'textarea[id="prompt-textarea"], textarea[data-id="root"]',
1605
- submitSelector: 'button[data-testid="send-button"]',
1606
- responseSelector: '[data-message-author-role="assistant"]',
1607
- waitForResponse: 2e4
1608
- },
1609
- claude: {
1610
- url: "https://claude.ai",
1611
- inputSelector: '[contenteditable="true"], textarea',
1612
- submitKey: "Enter",
1613
- responseSelector: '[data-testid="message-content"]',
1614
- waitForResponse: 2e4
1615
- },
1616
- copilot: {
1617
- url: "https://copilot.microsoft.com",
1618
- inputSelector: 'textarea, [contenteditable="true"]',
1619
- submitKey: "Enter",
1620
- responseSelector: '[class*="response"], [class*="message"]',
1621
- waitForResponse: 15e3
1622
- },
1623
- google: {
1624
- url: "https://www.google.com",
1625
- inputSelector: 'textarea[name="q"], input[name="q"]',
1626
- submitKey: "Enter",
1627
- responseSelector: "#search",
1628
- waitForResponse: 5e3
1629
- }
1630
- };
1631
- async function askAI(site, question, includeScreenshot = false) {
1632
- const config = aiChatConfigs[site];
1633
- if (!config) {
1634
- throw new Error(`Unknown AI site: ${site}`);
1635
- }
1636
- const page = await getPage();
1637
- await page.goto(config.url, { waitUntil: "domcontentloaded" });
1638
- await page.waitForTimeout(2e3);
1639
- try {
1640
- await page.waitForSelector(config.inputSelector, { timeout: 1e4 });
1641
- await page.fill(config.inputSelector, question);
1642
- } catch {
1643
- await page.click(config.inputSelector);
1644
- await page.type(config.inputSelector, question, { delay: 30 });
1645
- }
1646
- if (config.submitSelector) {
1647
- await page.click(config.submitSelector);
1648
- } else if (config.submitKey) {
1649
- await page.keyboard.press(config.submitKey);
1650
- }
1651
- await page.waitForTimeout(config.waitForResponse);
1652
- let response = "";
1653
- try {
1654
- const elements = await page.$$(config.responseSelector);
1655
- if (elements.length > 0) {
1656
- const lastElement = elements[elements.length - 1];
1657
- response = await lastElement.textContent() || "";
1658
- }
1659
- } catch {
1660
- response = await getPageText();
1661
- }
1662
- let screenshot;
1663
- if (includeScreenshot) {
1664
- screenshot = await takeScreenshot();
1665
- }
1666
- return { response: response.trim(), screenshot };
1558
+ async function openGoogleSheet() {
1559
+ const result = await openUrl("https://docs.google.com/spreadsheets/create");
1560
+ return result.success;
1667
1561
  }
1668
- async function getFullAIResponse(site, maxScrolls = 5) {
1669
- const config = aiChatConfigs[site];
1670
- const page = await getPage();
1671
- const responseParts = [];
1672
- for (let i = 0; i < maxScrolls; i++) {
1673
- try {
1674
- const elements = await page.$$(config.responseSelector);
1675
- if (elements.length > 0) {
1676
- const lastElement = elements[elements.length - 1];
1677
- const text = await lastElement.textContent();
1678
- if (text) {
1679
- responseParts.push(text.trim());
1680
- }
1681
- }
1682
- await page.mouse.wheel(0, 500);
1683
- await page.waitForTimeout(1e3);
1684
- const atBottom = await page.evaluate(() => {
1685
- return window.innerHeight + window.scrollY >= document.body.scrollHeight - 100;
1686
- });
1687
- if (atBottom) break;
1688
- } catch {
1689
- break;
1690
- }
1691
- }
1692
- return responseParts;
1562
+ async function openGoogleDoc() {
1563
+ const result = await openUrl("https://docs.google.com/document/create");
1564
+ return result.success;
1693
1565
  }
1694
- async function sendGmail(email) {
1695
- const page = await getPage();
1696
- try {
1697
- await page.goto("https://mail.google.com/mail/u/0/#inbox?compose=new");
1698
- await page.waitForTimeout(3e3);
1699
- await page.waitForSelector('input[aria-label*="To"]', { timeout: 1e4 });
1700
- await page.fill('input[aria-label*="To"]', email.to);
1701
- await page.keyboard.press("Tab");
1702
- await page.fill('input[name="subjectbox"]', email.subject);
1703
- await page.keyboard.press("Tab");
1704
- await page.fill('[aria-label*="Message Body"], [role="textbox"]', email.body);
1705
- await page.keyboard.press("Control+Enter");
1706
- await page.waitForTimeout(2e3);
1707
- return true;
1708
- } catch {
1709
- return false;
1566
+ async function scroll(direction, amount = 3) {
1567
+ const key = direction === "down" ? "pagedown" : "pageup";
1568
+ for (let i = 0; i < amount; i++) {
1569
+ await pressKey(key);
1570
+ await sleep(200);
1710
1571
  }
1711
1572
  }
1712
- async function sendOutlook(email) {
1713
- const page = await getPage();
1714
- try {
1715
- await page.goto("https://outlook.office.com/mail/0/inbox");
1716
- await page.waitForTimeout(3e3);
1717
- await page.click('button[aria-label*="New mail"], button[title*="New mail"]');
1718
- await page.waitForTimeout(2e3);
1719
- await page.fill('input[aria-label*="To"]', email.to);
1720
- await page.keyboard.press("Tab");
1721
- await page.fill('input[aria-label*="Subject"], input[placeholder*="Subject"]', email.subject);
1722
- await page.keyboard.press("Tab");
1723
- await page.fill('[aria-label*="Message body"], [role="textbox"]', email.body);
1724
- await page.click('button[aria-label*="Send"], button[title*="Send"]');
1725
- await page.waitForTimeout(2e3);
1726
- return true;
1727
- } catch {
1728
- return false;
1729
- }
1573
+ async function getPageText() {
1574
+ const vision = await describeScreen();
1575
+ return vision.description;
1576
+ }
1577
+ async function research(topic, maxSources = 3) {
1578
+ await searchGoogle(topic);
1579
+ await sleep(3e3);
1580
+ const searchResults = await describeScreen();
1581
+ return {
1582
+ query: topic,
1583
+ sources: [{
1584
+ title: `Google search: ${topic}`,
1585
+ url: `https://www.google.com/search?q=${encodeURIComponent(topic)}`,
1586
+ content: searchResults.description
1587
+ }],
1588
+ summary: searchResults.description
1589
+ };
1730
1590
  }
1731
- async function googleSheetsType(cellData) {
1732
- const page = await getPage();
1591
+ async function googleSheetsType(cells) {
1733
1592
  try {
1734
- await page.goto("https://docs.google.com/spreadsheets/create");
1735
- await page.waitForTimeout(5e3);
1736
- for (const { cell, value } of cellData) {
1737
- await page.click("input#t-name-box");
1738
- await page.fill("input#t-name-box", cell);
1739
- await page.keyboard.press("Enter");
1740
- await page.waitForTimeout(500);
1741
- await page.keyboard.type(value);
1742
- await page.keyboard.press("Enter");
1743
- await page.waitForTimeout(300);
1593
+ for (const { cell, value } of cells) {
1594
+ await keyCombo(["control", "g"]);
1595
+ await sleep(500);
1596
+ await typeText(cell);
1597
+ await pressKey("Return");
1598
+ await sleep(300);
1599
+ await typeText(value);
1600
+ await pressKey("Return");
1601
+ await sleep(200);
1744
1602
  }
1745
1603
  return true;
1746
1604
  } catch {
@@ -1748,90 +1606,24 @@ async function googleSheetsType(cellData) {
1748
1606
  }
1749
1607
  }
1750
1608
  async function googleDocsType(text) {
1751
- const page = await getPage();
1752
1609
  try {
1753
- await page.goto("https://docs.google.com/document/create");
1754
- await page.waitForTimeout(5e3);
1755
- await page.click(".kix-appview-editor");
1756
- await page.waitForTimeout(500);
1757
- await page.keyboard.type(text, { delay: 20 });
1610
+ await sleep(1e3);
1611
+ await typeText(text);
1758
1612
  return true;
1759
1613
  } catch {
1760
1614
  return false;
1761
1615
  }
1762
1616
  }
1763
- async function webSearch(query, engine = "google") {
1764
- const page = await getPage();
1765
- const results = [];
1766
- const urls = {
1767
- google: "https://www.google.com",
1768
- bing: "https://www.bing.com",
1769
- duckduckgo: "https://duckduckgo.com"
1770
- };
1771
- const selectors = {
1772
- google: { input: 'textarea[name="q"]', results: "#search .g h3" },
1773
- bing: { input: 'input[name="q"]', results: "#b_results h2 a" },
1774
- duckduckgo: { input: 'input[name="q"]', results: "[data-result] h2" }
1775
- };
1776
- try {
1777
- await page.goto(urls[engine]);
1778
- await page.waitForTimeout(2e3);
1779
- await page.fill(selectors[engine].input, query);
1780
- await page.keyboard.press("Enter");
1781
- await page.waitForTimeout(3e3);
1782
- const elements = await page.$$(selectors[engine].results);
1783
- for (const el of elements.slice(0, 10)) {
1784
- const text = await el.textContent();
1785
- if (text) results.push(text);
1786
- }
1787
- } catch {
1788
- }
1789
- return results;
1790
- }
1791
- async function research(topic, maxSources = 3) {
1792
- const page = await getPage();
1793
- const sources = [];
1794
- await webSearch(topic);
1795
- await page.waitForTimeout(2e3);
1796
- for (let i = 0; i < maxSources; i++) {
1797
- try {
1798
- const results = await page.$$("#search .g");
1799
- if (results[i]) {
1800
- const titleEl = await results[i].$("h3");
1801
- const linkEl = await results[i].$("a");
1802
- const title = await titleEl?.textContent() || "Unknown";
1803
- const url = await linkEl?.getAttribute("href") || "";
1804
- await titleEl?.click();
1805
- await page.waitForTimeout(3e3);
1806
- const content = await page.evaluate(() => {
1807
- const article = document.querySelector("article, main, .content, #content");
1808
- return article?.textContent?.slice(0, 2e3) || document.body.innerText.slice(0, 2e3);
1809
- });
1810
- sources.push({ title, url, content: content.trim() });
1811
- await page.goBack();
1812
- await page.waitForTimeout(1500);
1813
- }
1814
- } catch {
1815
- continue;
1816
- }
1817
- }
1818
- return {
1819
- query: topic,
1820
- sources,
1821
- summary: ""
1822
- // To be filled by AI
1823
- };
1824
- }
1825
1617
 
1826
1618
  // src/lib/tasks.ts
1827
- import * as fs3 from "fs";
1828
- import * as path2 from "path";
1829
- import * as os3 from "os";
1830
- var TASK_MEMORY_FILE = path2.join(os3.homedir(), ".cnapse", "task-memory.json");
1619
+ import * as fs2 from "fs";
1620
+ import * as path from "path";
1621
+ import * as os2 from "os";
1622
+ var TASK_MEMORY_FILE = path.join(os2.homedir(), ".cnapse", "task-memory.json");
1831
1623
  function loadTaskMemory() {
1832
1624
  try {
1833
- if (fs3.existsSync(TASK_MEMORY_FILE)) {
1834
- const data = fs3.readFileSync(TASK_MEMORY_FILE, "utf-8");
1625
+ if (fs2.existsSync(TASK_MEMORY_FILE)) {
1626
+ const data = fs2.readFileSync(TASK_MEMORY_FILE, "utf-8");
1835
1627
  return JSON.parse(data);
1836
1628
  }
1837
1629
  } catch {
@@ -1857,11 +1649,11 @@ function saveTaskPattern(input, steps) {
1857
1649
  });
1858
1650
  }
1859
1651
  memory.patterns = memory.patterns.sort((a, b) => b.successCount - a.successCount).slice(0, 100);
1860
- const dir = path2.dirname(TASK_MEMORY_FILE);
1861
- if (!fs3.existsSync(dir)) {
1862
- fs3.mkdirSync(dir, { recursive: true });
1652
+ const dir = path.dirname(TASK_MEMORY_FILE);
1653
+ if (!fs2.existsSync(dir)) {
1654
+ fs2.mkdirSync(dir, { recursive: true });
1863
1655
  }
1864
- fs3.writeFileSync(TASK_MEMORY_FILE, JSON.stringify(memory, null, 2));
1656
+ fs2.writeFileSync(TASK_MEMORY_FILE, JSON.stringify(memory, null, 2));
1865
1657
  } catch {
1866
1658
  }
1867
1659
  }
@@ -2212,9 +2004,9 @@ async function executeStep(step) {
2212
2004
  switch (actionType) {
2213
2005
  case "open_app":
2214
2006
  await keyCombo(["meta", "r"]);
2215
- await sleep(500);
2007
+ await sleep2(500);
2216
2008
  await typeText(params);
2217
- await sleep(300);
2009
+ await sleep2(300);
2218
2010
  await pressKey("Return");
2219
2011
  step.result = `Opened ${params}`;
2220
2012
  break;
@@ -2238,7 +2030,7 @@ async function executeStep(step) {
2238
2030
  break;
2239
2031
  case "wait":
2240
2032
  const seconds = parseInt(params) || 1;
2241
- await sleep(seconds * 1e3);
2033
+ await sleep2(seconds * 1e3);
2242
2034
  step.result = `Waited ${seconds}s`;
2243
2035
  break;
2244
2036
  case "focus_window":
@@ -2348,32 +2140,17 @@ ${existingResult.output}`;
2348
2140
  const supportedSites = ["perplexity", "chatgpt", "claude", "copilot", "google"];
2349
2141
  const siteLower = site.toLowerCase();
2350
2142
  if (supportedSites.includes(siteLower)) {
2351
- const result = await askAI(siteLower, question, true);
2352
- if (result.response.length < 500) {
2353
- const fullParts = await getFullAIResponse(siteLower, 5);
2354
- if (fullParts.length > 0) {
2355
- step.result = `\u{1F4DD} ${site.charAt(0).toUpperCase() + site.slice(1)} says:
2356
-
2357
- ${fullParts.join("\n\n")}`;
2358
- break;
2359
- }
2360
- }
2143
+ const result = await askAI(siteLower, question);
2361
2144
  step.result = `\u{1F4DD} ${site.charAt(0).toUpperCase() + site.slice(1)} says:
2362
2145
 
2363
2146
  ${result.response}`;
2364
2147
  } else {
2365
- await navigateTo(`https://${site}`);
2366
- await sleep(2e3);
2367
- const page = await getPage();
2368
- const inputs = ["textarea", 'input[type="text"]', 'input[type="search"]', '[contenteditable="true"]'];
2369
- for (const selector of inputs) {
2370
- if (await elementExists(selector)) {
2371
- await typeInElement(selector, question);
2372
- await pressKey2("Enter");
2373
- break;
2374
- }
2375
- }
2376
- await sleep(5e3);
2148
+ await openUrl(`https://${site}`);
2149
+ await sleep2(4e3);
2150
+ await typeText(question);
2151
+ await sleep2(300);
2152
+ await pressKey("Return");
2153
+ await sleep2(5e3);
2377
2154
  const pageText = await getPageText();
2378
2155
  step.result = `\u{1F4DD} Response from ${site}:
2379
2156
 
@@ -2386,17 +2163,8 @@ ${pageText.slice(0, 3e3)}`;
2386
2163
  step.result = vision.description;
2387
2164
  break;
2388
2165
  case "web_search": {
2389
- const searchResults = await webSearch(params, "google");
2390
- if (searchResults.length > 0) {
2391
- step.result = `\u{1F50D} Search results for "${params}":
2392
-
2393
- ${searchResults.map((r, i) => `${i + 1}. ${r}`).join("\n")}`;
2394
- } else {
2395
- const pageText = await getPageText();
2396
- step.result = `\u{1F50D} Search results for "${params}":
2397
-
2398
- ${pageText.slice(0, 2e3)}`;
2399
- }
2166
+ const searchResult = await webSearch(params, "google");
2167
+ step.result = searchResult;
2400
2168
  break;
2401
2169
  }
2402
2170
  case "send_email": {
@@ -2412,9 +2180,9 @@ ${pageText.slice(0, 2e3)}`;
2412
2180
  throw new Error(`Unsupported email provider: ${provider}. Use gmail or outlook.`);
2413
2181
  }
2414
2182
  if (success) {
2415
- step.result = `\u{1F4E7} Email sent via ${provider} to ${to}`;
2183
+ step.result = `\u{1F4E7} Email composed via ${provider} to ${to} (check browser to confirm send)`;
2416
2184
  } else {
2417
- throw new Error(`Failed to send email via ${provider}. Make sure you're logged in.`);
2185
+ throw new Error(`Failed to open email via ${provider}. Make sure you're logged in.`);
2418
2186
  }
2419
2187
  break;
2420
2188
  }
@@ -2423,9 +2191,9 @@ ${pageText.slice(0, 2e3)}`;
2423
2191
  switch (sheetCmd.toLowerCase()) {
2424
2192
  case "new": {
2425
2193
  const sheetName = sheetArgs[0] || "Untitled spreadsheet";
2426
- await navigateTo("https://docs.google.com/spreadsheets/create");
2427
- await sleep(5e3);
2428
- step.result = `\u{1F4CA} Created Google Sheet: ${sheetName}`;
2194
+ await openGoogleSheet();
2195
+ await sleep2(5e3);
2196
+ step.result = `\u{1F4CA} Opened new Google Sheet: ${sheetName}`;
2429
2197
  break;
2430
2198
  }
2431
2199
  case "type": {
@@ -2436,13 +2204,9 @@ ${pageText.slice(0, 2e3)}`;
2436
2204
  break;
2437
2205
  }
2438
2206
  case "read": {
2439
- const screenshot = await takeScreenshot();
2440
- const analysis = await chat([{
2441
- role: "user",
2442
- content: "Describe the contents of this Google Sheet. List visible data in the cells."
2443
- }]);
2207
+ const vision2 = await describeScreen();
2444
2208
  step.result = `\u{1F4CA} Current sheet view:
2445
- ${analysis.content}`;
2209
+ ${vision2.description}`;
2446
2210
  break;
2447
2211
  }
2448
2212
  default:
@@ -2455,8 +2219,9 @@ ${analysis.content}`;
2455
2219
  switch (docCmd.toLowerCase()) {
2456
2220
  case "new": {
2457
2221
  const docName = docArgs[0] || "Untitled document";
2458
- const success = await googleDocsType("");
2459
- step.result = success ? `\u{1F4C4} Created Google Doc: ${docName}` : `\u{1F4C4} Could not create Google Doc`;
2222
+ await openGoogleDoc();
2223
+ await sleep2(5e3);
2224
+ step.result = `\u{1F4C4} Opened new Google Doc: ${docName}`;
2460
2225
  break;
2461
2226
  }
2462
2227
  case "type": {
@@ -2473,15 +2238,11 @@ ${analysis.content}`;
2473
2238
  case "research": {
2474
2239
  const researchQuery = params;
2475
2240
  const researchData = await research(researchQuery, 3);
2476
- const sourceSummaries = researchData.sources.map(
2477
- (s, i) => `Source ${i + 1}: ${s.title}
2478
- ${s.content.slice(0, 500)}...`
2479
- ).join("\n\n");
2480
2241
  const synthesis = await chat([{
2481
2242
  role: "user",
2482
- content: `Based on the following research gathered about "${researchQuery}", provide a comprehensive summary:
2243
+ content: `Based on the following search results about "${researchQuery}", provide a comprehensive summary:
2483
2244
 
2484
- ${sourceSummaries}
2245
+ ${researchData.summary}
2485
2246
 
2486
2247
  Create a well-organized summary with:
2487
2248
  1. Key findings
@@ -2508,12 +2269,10 @@ Here's what I see on my screen: ${currentScreen.description}`;
2508
2269
  if (!supportedLLMs.includes(llmLower)) {
2509
2270
  throw new Error(`Unknown LLM: ${llmName}. Supported: ${supportedLLMs.join(", ")}`);
2510
2271
  }
2511
- const result = await askAI(llmLower, fullQuestion, false);
2512
- const fullParts = await getFullAIResponse(llmLower, 3);
2513
- const finalResponse = fullParts.length > 0 ? fullParts.join("\n\n") : result.response;
2272
+ const result = await askAI(llmLower, fullQuestion);
2514
2273
  step.result = `\u{1F916} ${llmName} says:
2515
2274
 
2516
- ${finalResponse}`;
2275
+ ${result.response}`;
2517
2276
  break;
2518
2277
  }
2519
2278
  case "learn_ui": {
@@ -2542,48 +2301,40 @@ ${uiAnalysis.content}`;
2542
2301
  const maxAttempts = 5;
2543
2302
  const actionHistory = [];
2544
2303
  let accomplished = false;
2545
- const page = await getPage();
2546
2304
  for (let attempt = 0; attempt < maxAttempts && !accomplished; attempt++) {
2547
- const screenshot = await takeScreenshot();
2548
- const currentState = await chat([{
2549
- role: "user",
2550
- content: `Describe what you see on this screen. What app/website is it? What elements are visible?`
2551
- }]);
2305
+ const currentScreen = await describeScreen();
2552
2306
  const nextAction = await chat([{
2553
2307
  role: "user",
2554
2308
  content: `GOAL: ${goal}
2555
2309
 
2556
- CURRENT SCREEN: ${currentState.content}
2310
+ CURRENT SCREEN: ${currentScreen.description}
2557
2311
 
2558
2312
  PREVIOUS ACTIONS TAKEN:
2559
2313
  ${actionHistory.length > 0 ? actionHistory.join("\n") : "None yet"}
2560
2314
 
2561
2315
  Based on what you see, what's the SINGLE next action to take?
2562
2316
  Options:
2563
- - click: Click element (describe CSS selector or visible text)
2564
- - type: Type something (specify selector and text)
2565
- - press: Press a key (specify key)
2317
+ - click: Click (will click at current mouse position)
2318
+ - type: Type something (specify text)
2319
+ - press: Press a key (specify key like Enter, Tab, Escape)
2566
2320
  - scroll: Scroll up/down
2567
- - navigate: Go to URL
2321
+ - navigate: Go to URL (opens in browser)
2568
2322
  - done: Goal is accomplished
2569
2323
  - stuck: Can't figure out what to do
2570
2324
 
2571
2325
  Respond in format:
2572
2326
  ACTION: <action_type>
2573
- SELECTOR: <css selector or text to find>
2574
- VALUE: <text to type or URL>
2327
+ VALUE: <text to type, URL to navigate, or key to press>
2575
2328
  REASONING: <why>`
2576
2329
  }]);
2577
2330
  const actionContent = nextAction.content;
2578
2331
  const actionMatch = actionContent.match(/ACTION:\s*(\w+)/i);
2579
- const selectorMatch = actionContent.match(/SELECTOR:\s*(.+?)(?:\n|$)/i);
2580
2332
  const valueMatch = actionContent.match(/VALUE:\s*(.+?)(?:\n|$)/i);
2581
2333
  if (!actionMatch) {
2582
2334
  actionHistory.push(`Attempt ${attempt + 1}: Couldn't parse action`);
2583
2335
  continue;
2584
2336
  }
2585
2337
  const action = actionMatch[1].toLowerCase();
2586
- const selector = selectorMatch?.[1]?.trim() || "";
2587
2338
  const value = valueMatch?.[1]?.trim() || "";
2588
2339
  if (action === "done") {
2589
2340
  accomplished = true;
@@ -2595,34 +2346,25 @@ REASONING: <why>`
2595
2346
  const helpRequest = `I'm trying to: ${goal}
2596
2347
 
2597
2348
  I'm stuck. What should I do next? Be specific about what to click or type.`;
2598
- const advice = await askAI("perplexity", helpRequest, false);
2349
+ const advice = await askAI("perplexity", helpRequest);
2599
2350
  actionHistory.push(`Got advice: ${advice.response.slice(0, 200)}...`);
2600
- await navigateTo(page.url());
2601
2351
  continue;
2602
2352
  }
2603
2353
  try {
2604
2354
  switch (action) {
2605
2355
  case "click":
2606
- if (selector) {
2607
- const clicked = await clickElement(selector);
2608
- if (!clicked) {
2609
- await page.getByText(selector).first().click({ timeout: 5e3 });
2610
- }
2611
- }
2612
- actionHistory.push(`Attempt ${attempt + 1}: Clicked "${selector}"`);
2356
+ await clickMouse("left");
2357
+ actionHistory.push(`Attempt ${attempt + 1}: Clicked`);
2613
2358
  break;
2614
2359
  case "type":
2615
- if (selector && value) {
2616
- const typed = await typeInElement(selector, value);
2617
- if (!typed) {
2618
- await page.getByPlaceholder(selector).first().fill(value);
2619
- }
2360
+ if (value) {
2361
+ await typeText(value);
2620
2362
  }
2621
- actionHistory.push(`Attempt ${attempt + 1}: Typed "${value}" in "${selector}"`);
2363
+ actionHistory.push(`Attempt ${attempt + 1}: Typed "${value}"`);
2622
2364
  break;
2623
2365
  case "press":
2624
- await pressKey2(value || selector);
2625
- actionHistory.push(`Attempt ${attempt + 1}: Pressed ${value || selector}`);
2366
+ await pressKey(value || "Return");
2367
+ actionHistory.push(`Attempt ${attempt + 1}: Pressed ${value || "Enter"}`);
2626
2368
  break;
2627
2369
  case "scroll":
2628
2370
  await scroll(value.toLowerCase().includes("up") ? "up" : "down");
@@ -2630,8 +2372,8 @@ I'm stuck. What should I do next? Be specific about what to click or type.`;
2630
2372
  break;
2631
2373
  case "navigate":
2632
2374
  const url = value.startsWith("http") ? value : `https://${value}`;
2633
- await navigateTo(url);
2634
- actionHistory.push(`Attempt ${attempt + 1}: Navigated to ${url}`);
2375
+ await openUrl(url);
2376
+ actionHistory.push(`Attempt ${attempt + 1}: Opened ${url}`);
2635
2377
  break;
2636
2378
  default:
2637
2379
  actionHistory.push(`Attempt ${attempt + 1}: Unknown action ${action}`);
@@ -2639,7 +2381,7 @@ I'm stuck. What should I do next? Be specific about what to click or type.`;
2639
2381
  } catch (e) {
2640
2382
  actionHistory.push(`Attempt ${attempt + 1}: Action failed - ${e}`);
2641
2383
  }
2642
- await sleep(2e3);
2384
+ await sleep2(2e3);
2643
2385
  }
2644
2386
  step.result = `\u{1F3AF} Adaptive Agent Result:
2645
2387
 
@@ -2687,7 +2429,7 @@ async function executeTask(task, onProgress) {
2687
2429
  task.completedAt = /* @__PURE__ */ new Date();
2688
2430
  return task;
2689
2431
  }
2690
- function sleep(ms) {
2432
+ function sleep2(ms) {
2691
2433
  return new Promise((resolve) => setTimeout(resolve, ms));
2692
2434
  }
2693
2435
  function getTaskMemoryStats() {
@@ -2702,8 +2444,8 @@ function getTaskMemoryStats() {
2702
2444
  }
2703
2445
  function clearTaskMemory() {
2704
2446
  try {
2705
- if (fs3.existsSync(TASK_MEMORY_FILE)) {
2706
- fs3.unlinkSync(TASK_MEMORY_FILE);
2447
+ if (fs2.existsSync(TASK_MEMORY_FILE)) {
2448
+ fs2.unlinkSync(TASK_MEMORY_FILE);
2707
2449
  }
2708
2450
  } catch {
2709
2451
  }