@projectservan8n/cnapse 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1459,240 +1459,146 @@ ${stderr}`
1459
1459
  }
1460
1460
 
1461
1461
  // src/services/browser.ts
1462
- import { chromium } from "playwright";
1463
- var browser = null;
1464
- var context = null;
1465
- var activePage = null;
1466
- var defaultConfig = {
1467
- headless: false,
1468
- // Show browser so user can see what's happening
1469
- slowMo: 50,
1470
- // Slight delay for visibility
1471
- viewport: { width: 1280, height: 800 }
1472
- };
1473
- async function initBrowser(config = {}) {
1474
- const cfg = { ...defaultConfig, ...config };
1475
- if (!browser) {
1476
- browser = await chromium.launch({
1477
- headless: cfg.headless,
1478
- slowMo: cfg.slowMo
1479
- });
1480
- }
1481
- if (!context) {
1482
- context = await browser.newContext({
1483
- viewport: cfg.viewport,
1484
- userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
1485
- });
1486
- }
1487
- if (!activePage) {
1488
- activePage = await context.newPage();
1489
- }
1490
- return activePage;
1462
+ function sleep(ms) {
1463
+ return new Promise((resolve) => setTimeout(resolve, ms));
1491
1464
  }
1492
- async function getPage() {
1493
- if (!activePage) {
1494
- return initBrowser();
1465
+ async function openUrl(url) {
1466
+ const fullUrl = url.startsWith("http") ? url : `https://${url}`;
1467
+ try {
1468
+ if (process.platform === "win32") {
1469
+ await runCommand(`start "" "${fullUrl}"`, 5e3);
1470
+ } else if (process.platform === "darwin") {
1471
+ await runCommand(`open "${fullUrl}"`, 5e3);
1472
+ } else {
1473
+ await runCommand(`xdg-open "${fullUrl}"`, 5e3);
1474
+ }
1475
+ return { success: true };
1476
+ } catch (error) {
1477
+ return {
1478
+ success: false,
1479
+ error: error instanceof Error ? error.message : "Failed to open URL"
1480
+ };
1495
1481
  }
1496
- return activePage;
1497
1482
  }
1498
- async function navigateTo(url) {
1499
- const page = await getPage();
1500
- await page.goto(url, { waitUntil: "domcontentloaded" });
1483
+ async function searchGoogle(query) {
1484
+ const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}`;
1485
+ return openUrl(searchUrl);
1501
1486
  }
1502
- async function takeScreenshot() {
1503
- const page = await getPage();
1504
- const buffer = await page.screenshot({ type: "png" });
1505
- return buffer.toString("base64");
1487
+ async function webSearch(query, engine = "google") {
1488
+ const urls = {
1489
+ google: `https://www.google.com/search?q=${encodeURIComponent(query)}`,
1490
+ bing: `https://www.bing.com/search?q=${encodeURIComponent(query)}`,
1491
+ duckduckgo: `https://duckduckgo.com/?q=${encodeURIComponent(query)}`
1492
+ };
1493
+ await openUrl(urls[engine]);
1494
+ await sleep(3e3);
1495
+ const vision = await describeScreen();
1496
+ return `\u{1F50D} Search results for "${query}":
1497
+
1498
+ ${vision.description}`;
1506
1499
  }
1507
- async function clickElement(selector, timeout = 1e4) {
1508
- const page = await getPage();
1509
- try {
1510
- await page.click(selector, { timeout });
1511
- return true;
1512
- } catch {
1513
- return false;
1514
- }
1500
+ async function askAI(site, question) {
1501
+ const urls = {
1502
+ perplexity: "https://www.perplexity.ai",
1503
+ chatgpt: "https://chat.openai.com",
1504
+ claude: "https://claude.ai",
1505
+ copilot: "https://copilot.microsoft.com",
1506
+ google: "https://www.google.com"
1507
+ };
1508
+ await openUrl(urls[site]);
1509
+ await sleep(4e3);
1510
+ await typeText(question);
1511
+ await sleep(500);
1512
+ await pressKey("Return");
1513
+ await sleep(site === "google" ? 3e3 : 1e4);
1514
+ const vision = await describeScreen();
1515
+ return {
1516
+ response: vision.description,
1517
+ screenshot: vision.screenshot
1518
+ };
1519
+ }
1520
+ async function openGmailCompose(to, subject, body) {
1521
+ let url = "https://mail.google.com/mail/u/0/?fs=1&tf=cm";
1522
+ if (to) url += `&to=${encodeURIComponent(to)}`;
1523
+ if (subject) url += `&su=${encodeURIComponent(subject)}`;
1524
+ if (body) url += `&body=${encodeURIComponent(body)}`;
1525
+ const result = await openUrl(url);
1526
+ return result.success;
1515
1527
  }
1516
- async function typeInElement(selector, text, timeout = 1e4) {
1517
- const page = await getPage();
1528
+ async function sendGmail(email) {
1518
1529
  try {
1519
- await page.fill(selector, text, { timeout });
1530
+ await openGmailCompose(email.to, email.subject, email.body);
1531
+ await sleep(5e3);
1532
+ await keyCombo(["control", "Return"]);
1533
+ await sleep(2e3);
1520
1534
  return true;
1521
1535
  } catch {
1522
1536
  return false;
1523
1537
  }
1524
1538
  }
1525
- async function pressKey2(key) {
1526
- const page = await getPage();
1527
- await page.keyboard.press(key);
1528
- }
1529
- async function scroll(direction, amount = 500) {
1530
- const page = await getPage();
1531
- await page.mouse.wheel(0, direction === "down" ? amount : -amount);
1539
+ async function openOutlookCompose(to, subject, body) {
1540
+ let url = "https://outlook.office.com/mail/deeplink/compose?";
1541
+ if (to) url += `to=${encodeURIComponent(to)}&`;
1542
+ if (subject) url += `subject=${encodeURIComponent(subject)}&`;
1543
+ if (body) url += `body=${encodeURIComponent(body)}&`;
1544
+ const result = await openUrl(url);
1545
+ return result.success;
1532
1546
  }
1533
- async function getPageText() {
1534
- const page = await getPage();
1535
- return await page.evaluate(() => document.body.innerText);
1536
- }
1537
- async function elementExists(selector) {
1538
- const page = await getPage();
1547
+ async function sendOutlook(email) {
1539
1548
  try {
1540
- const element = await page.$(selector);
1541
- return element !== null;
1549
+ await openOutlookCompose(email.to, email.subject, email.body);
1550
+ await sleep(5e3);
1551
+ await keyCombo(["control", "Return"]);
1552
+ await sleep(2e3);
1553
+ return true;
1542
1554
  } catch {
1543
1555
  return false;
1544
1556
  }
1545
1557
  }
1546
- var aiChatConfigs = {
1547
- perplexity: {
1548
- url: "https://www.perplexity.ai",
1549
- inputSelector: 'textarea[placeholder*="Ask"]',
1550
- submitKey: "Enter",
1551
- responseSelector: '.prose, [class*="answer"], [class*="response"]',
1552
- waitForResponse: 15e3
1553
- },
1554
- chatgpt: {
1555
- url: "https://chat.openai.com",
1556
- inputSelector: 'textarea[id="prompt-textarea"], textarea[data-id="root"]',
1557
- submitSelector: 'button[data-testid="send-button"]',
1558
- responseSelector: '[data-message-author-role="assistant"]',
1559
- waitForResponse: 2e4
1560
- },
1561
- claude: {
1562
- url: "https://claude.ai",
1563
- inputSelector: '[contenteditable="true"], textarea',
1564
- submitKey: "Enter",
1565
- responseSelector: '[data-testid="message-content"]',
1566
- waitForResponse: 2e4
1567
- },
1568
- copilot: {
1569
- url: "https://copilot.microsoft.com",
1570
- inputSelector: 'textarea, [contenteditable="true"]',
1571
- submitKey: "Enter",
1572
- responseSelector: '[class*="response"], [class*="message"]',
1573
- waitForResponse: 15e3
1574
- },
1575
- google: {
1576
- url: "https://www.google.com",
1577
- inputSelector: 'textarea[name="q"], input[name="q"]',
1578
- submitKey: "Enter",
1579
- responseSelector: "#search",
1580
- waitForResponse: 5e3
1581
- }
1582
- };
1583
- async function askAI(site, question, includeScreenshot = false) {
1584
- const config = aiChatConfigs[site];
1585
- if (!config) {
1586
- throw new Error(`Unknown AI site: ${site}`);
1587
- }
1588
- const page = await getPage();
1589
- await page.goto(config.url, { waitUntil: "domcontentloaded" });
1590
- await page.waitForTimeout(2e3);
1591
- try {
1592
- await page.waitForSelector(config.inputSelector, { timeout: 1e4 });
1593
- await page.fill(config.inputSelector, question);
1594
- } catch {
1595
- await page.click(config.inputSelector);
1596
- await page.type(config.inputSelector, question, { delay: 30 });
1597
- }
1598
- if (config.submitSelector) {
1599
- await page.click(config.submitSelector);
1600
- } else if (config.submitKey) {
1601
- await page.keyboard.press(config.submitKey);
1602
- }
1603
- await page.waitForTimeout(config.waitForResponse);
1604
- let response = "";
1605
- try {
1606
- const elements = await page.$$(config.responseSelector);
1607
- if (elements.length > 0) {
1608
- const lastElement = elements[elements.length - 1];
1609
- response = await lastElement.textContent() || "";
1610
- }
1611
- } catch {
1612
- response = await getPageText();
1613
- }
1614
- let screenshot;
1615
- if (includeScreenshot) {
1616
- screenshot = await takeScreenshot();
1617
- }
1618
- return { response: response.trim(), screenshot };
1558
+ async function openGoogleSheet() {
1559
+ const result = await openUrl("https://docs.google.com/spreadsheets/create");
1560
+ return result.success;
1619
1561
  }
1620
- async function getFullAIResponse(site, maxScrolls = 5) {
1621
- const config = aiChatConfigs[site];
1622
- const page = await getPage();
1623
- const responseParts = [];
1624
- for (let i = 0; i < maxScrolls; i++) {
1625
- try {
1626
- const elements = await page.$$(config.responseSelector);
1627
- if (elements.length > 0) {
1628
- const lastElement = elements[elements.length - 1];
1629
- const text = await lastElement.textContent();
1630
- if (text) {
1631
- responseParts.push(text.trim());
1632
- }
1633
- }
1634
- await page.mouse.wheel(0, 500);
1635
- await page.waitForTimeout(1e3);
1636
- const atBottom = await page.evaluate(() => {
1637
- return window.innerHeight + window.scrollY >= document.body.scrollHeight - 100;
1638
- });
1639
- if (atBottom) break;
1640
- } catch {
1641
- break;
1642
- }
1643
- }
1644
- return responseParts;
1562
+ async function openGoogleDoc() {
1563
+ const result = await openUrl("https://docs.google.com/document/create");
1564
+ return result.success;
1645
1565
  }
1646
- async function sendGmail(email) {
1647
- const page = await getPage();
1648
- try {
1649
- await page.goto("https://mail.google.com/mail/u/0/#inbox?compose=new");
1650
- await page.waitForTimeout(3e3);
1651
- await page.waitForSelector('input[aria-label*="To"]', { timeout: 1e4 });
1652
- await page.fill('input[aria-label*="To"]', email.to);
1653
- await page.keyboard.press("Tab");
1654
- await page.fill('input[name="subjectbox"]', email.subject);
1655
- await page.keyboard.press("Tab");
1656
- await page.fill('[aria-label*="Message Body"], [role="textbox"]', email.body);
1657
- await page.keyboard.press("Control+Enter");
1658
- await page.waitForTimeout(2e3);
1659
- return true;
1660
- } catch {
1661
- return false;
1566
+ async function scroll(direction, amount = 3) {
1567
+ const key = direction === "down" ? "pagedown" : "pageup";
1568
+ for (let i = 0; i < amount; i++) {
1569
+ await pressKey(key);
1570
+ await sleep(200);
1662
1571
  }
1663
1572
  }
1664
- async function sendOutlook(email) {
1665
- const page = await getPage();
1666
- try {
1667
- await page.goto("https://outlook.office.com/mail/0/inbox");
1668
- await page.waitForTimeout(3e3);
1669
- await page.click('button[aria-label*="New mail"], button[title*="New mail"]');
1670
- await page.waitForTimeout(2e3);
1671
- await page.fill('input[aria-label*="To"]', email.to);
1672
- await page.keyboard.press("Tab");
1673
- await page.fill('input[aria-label*="Subject"], input[placeholder*="Subject"]', email.subject);
1674
- await page.keyboard.press("Tab");
1675
- await page.fill('[aria-label*="Message body"], [role="textbox"]', email.body);
1676
- await page.click('button[aria-label*="Send"], button[title*="Send"]');
1677
- await page.waitForTimeout(2e3);
1678
- return true;
1679
- } catch {
1680
- return false;
1681
- }
1573
+ async function getPageText() {
1574
+ const vision = await describeScreen();
1575
+ return vision.description;
1576
+ }
1577
+ async function research(topic, maxSources = 3) {
1578
+ await searchGoogle(topic);
1579
+ await sleep(3e3);
1580
+ const searchResults = await describeScreen();
1581
+ return {
1582
+ query: topic,
1583
+ sources: [{
1584
+ title: `Google search: ${topic}`,
1585
+ url: `https://www.google.com/search?q=${encodeURIComponent(topic)}`,
1586
+ content: searchResults.description
1587
+ }],
1588
+ summary: searchResults.description
1589
+ };
1682
1590
  }
1683
- async function googleSheetsType(cellData) {
1684
- const page = await getPage();
1591
+ async function googleSheetsType(cells) {
1685
1592
  try {
1686
- await page.goto("https://docs.google.com/spreadsheets/create");
1687
- await page.waitForTimeout(5e3);
1688
- for (const { cell, value } of cellData) {
1689
- await page.click("input#t-name-box");
1690
- await page.fill("input#t-name-box", cell);
1691
- await page.keyboard.press("Enter");
1692
- await page.waitForTimeout(500);
1693
- await page.keyboard.type(value);
1694
- await page.keyboard.press("Enter");
1695
- await page.waitForTimeout(300);
1593
+ for (const { cell, value } of cells) {
1594
+ await keyCombo(["control", "g"]);
1595
+ await sleep(500);
1596
+ await typeText(cell);
1597
+ await pressKey("Return");
1598
+ await sleep(300);
1599
+ await typeText(value);
1600
+ await pressKey("Return");
1601
+ await sleep(200);
1696
1602
  }
1697
1603
  return true;
1698
1604
  } catch {
@@ -1700,80 +1606,14 @@ async function googleSheetsType(cellData) {
1700
1606
  }
1701
1607
  }
1702
1608
  async function googleDocsType(text) {
1703
- const page = await getPage();
1704
1609
  try {
1705
- await page.goto("https://docs.google.com/document/create");
1706
- await page.waitForTimeout(5e3);
1707
- await page.click(".kix-appview-editor");
1708
- await page.waitForTimeout(500);
1709
- await page.keyboard.type(text, { delay: 20 });
1610
+ await sleep(1e3);
1611
+ await typeText(text);
1710
1612
  return true;
1711
1613
  } catch {
1712
1614
  return false;
1713
1615
  }
1714
1616
  }
1715
- async function webSearch(query, engine = "google") {
1716
- const page = await getPage();
1717
- const results = [];
1718
- const urls = {
1719
- google: "https://www.google.com",
1720
- bing: "https://www.bing.com",
1721
- duckduckgo: "https://duckduckgo.com"
1722
- };
1723
- const selectors = {
1724
- google: { input: 'textarea[name="q"]', results: "#search .g h3" },
1725
- bing: { input: 'input[name="q"]', results: "#b_results h2 a" },
1726
- duckduckgo: { input: 'input[name="q"]', results: "[data-result] h2" }
1727
- };
1728
- try {
1729
- await page.goto(urls[engine]);
1730
- await page.waitForTimeout(2e3);
1731
- await page.fill(selectors[engine].input, query);
1732
- await page.keyboard.press("Enter");
1733
- await page.waitForTimeout(3e3);
1734
- const elements = await page.$$(selectors[engine].results);
1735
- for (const el of elements.slice(0, 10)) {
1736
- const text = await el.textContent();
1737
- if (text) results.push(text);
1738
- }
1739
- } catch {
1740
- }
1741
- return results;
1742
- }
1743
- async function research(topic, maxSources = 3) {
1744
- const page = await getPage();
1745
- const sources = [];
1746
- await webSearch(topic);
1747
- await page.waitForTimeout(2e3);
1748
- for (let i = 0; i < maxSources; i++) {
1749
- try {
1750
- const results = await page.$$("#search .g");
1751
- if (results[i]) {
1752
- const titleEl = await results[i].$("h3");
1753
- const linkEl = await results[i].$("a");
1754
- const title = await titleEl?.textContent() || "Unknown";
1755
- const url = await linkEl?.getAttribute("href") || "";
1756
- await titleEl?.click();
1757
- await page.waitForTimeout(3e3);
1758
- const content = await page.evaluate(() => {
1759
- const article = document.querySelector("article, main, .content, #content");
1760
- return article?.textContent?.slice(0, 2e3) || document.body.innerText.slice(0, 2e3);
1761
- });
1762
- sources.push({ title, url, content: content.trim() });
1763
- await page.goBack();
1764
- await page.waitForTimeout(1500);
1765
- }
1766
- } catch {
1767
- continue;
1768
- }
1769
- }
1770
- return {
1771
- query: topic,
1772
- sources,
1773
- summary: ""
1774
- // To be filled by AI
1775
- };
1776
- }
1777
1617
 
1778
1618
  // src/lib/tasks.ts
1779
1619
  import * as fs2 from "fs";
@@ -2164,9 +2004,9 @@ async function executeStep(step) {
2164
2004
  switch (actionType) {
2165
2005
  case "open_app":
2166
2006
  await keyCombo(["meta", "r"]);
2167
- await sleep(500);
2007
+ await sleep2(500);
2168
2008
  await typeText(params);
2169
- await sleep(300);
2009
+ await sleep2(300);
2170
2010
  await pressKey("Return");
2171
2011
  step.result = `Opened ${params}`;
2172
2012
  break;
@@ -2190,7 +2030,7 @@ async function executeStep(step) {
2190
2030
  break;
2191
2031
  case "wait":
2192
2032
  const seconds = parseInt(params) || 1;
2193
- await sleep(seconds * 1e3);
2033
+ await sleep2(seconds * 1e3);
2194
2034
  step.result = `Waited ${seconds}s`;
2195
2035
  break;
2196
2036
  case "focus_window":
@@ -2300,32 +2140,17 @@ ${existingResult.output}`;
2300
2140
  const supportedSites = ["perplexity", "chatgpt", "claude", "copilot", "google"];
2301
2141
  const siteLower = site.toLowerCase();
2302
2142
  if (supportedSites.includes(siteLower)) {
2303
- const result = await askAI(siteLower, question, true);
2304
- if (result.response.length < 500) {
2305
- const fullParts = await getFullAIResponse(siteLower, 5);
2306
- if (fullParts.length > 0) {
2307
- step.result = `\u{1F4DD} ${site.charAt(0).toUpperCase() + site.slice(1)} says:
2308
-
2309
- ${fullParts.join("\n\n")}`;
2310
- break;
2311
- }
2312
- }
2143
+ const result = await askAI(siteLower, question);
2313
2144
  step.result = `\u{1F4DD} ${site.charAt(0).toUpperCase() + site.slice(1)} says:
2314
2145
 
2315
2146
  ${result.response}`;
2316
2147
  } else {
2317
- await navigateTo(`https://${site}`);
2318
- await sleep(2e3);
2319
- const page = await getPage();
2320
- const inputs = ["textarea", 'input[type="text"]', 'input[type="search"]', '[contenteditable="true"]'];
2321
- for (const selector of inputs) {
2322
- if (await elementExists(selector)) {
2323
- await typeInElement(selector, question);
2324
- await pressKey2("Enter");
2325
- break;
2326
- }
2327
- }
2328
- await sleep(5e3);
2148
+ await openUrl(`https://${site}`);
2149
+ await sleep2(4e3);
2150
+ await typeText(question);
2151
+ await sleep2(300);
2152
+ await pressKey("Return");
2153
+ await sleep2(5e3);
2329
2154
  const pageText = await getPageText();
2330
2155
  step.result = `\u{1F4DD} Response from ${site}:
2331
2156
 
@@ -2338,17 +2163,8 @@ ${pageText.slice(0, 3e3)}`;
2338
2163
  step.result = vision.description;
2339
2164
  break;
2340
2165
  case "web_search": {
2341
- const searchResults = await webSearch(params, "google");
2342
- if (searchResults.length > 0) {
2343
- step.result = `\u{1F50D} Search results for "${params}":
2344
-
2345
- ${searchResults.map((r, i) => `${i + 1}. ${r}`).join("\n")}`;
2346
- } else {
2347
- const pageText = await getPageText();
2348
- step.result = `\u{1F50D} Search results for "${params}":
2349
-
2350
- ${pageText.slice(0, 2e3)}`;
2351
- }
2166
+ const searchResult = await webSearch(params, "google");
2167
+ step.result = searchResult;
2352
2168
  break;
2353
2169
  }
2354
2170
  case "send_email": {
@@ -2364,9 +2180,9 @@ ${pageText.slice(0, 2e3)}`;
2364
2180
  throw new Error(`Unsupported email provider: ${provider}. Use gmail or outlook.`);
2365
2181
  }
2366
2182
  if (success) {
2367
- step.result = `\u{1F4E7} Email sent via ${provider} to ${to}`;
2183
+ step.result = `\u{1F4E7} Email composed via ${provider} to ${to} (check browser to confirm send)`;
2368
2184
  } else {
2369
- throw new Error(`Failed to send email via ${provider}. Make sure you're logged in.`);
2185
+ throw new Error(`Failed to open email via ${provider}. Make sure you're logged in.`);
2370
2186
  }
2371
2187
  break;
2372
2188
  }
@@ -2375,9 +2191,9 @@ ${pageText.slice(0, 2e3)}`;
2375
2191
  switch (sheetCmd.toLowerCase()) {
2376
2192
  case "new": {
2377
2193
  const sheetName = sheetArgs[0] || "Untitled spreadsheet";
2378
- await navigateTo("https://docs.google.com/spreadsheets/create");
2379
- await sleep(5e3);
2380
- step.result = `\u{1F4CA} Created Google Sheet: ${sheetName}`;
2194
+ await openGoogleSheet();
2195
+ await sleep2(5e3);
2196
+ step.result = `\u{1F4CA} Opened new Google Sheet: ${sheetName}`;
2381
2197
  break;
2382
2198
  }
2383
2199
  case "type": {
@@ -2388,13 +2204,9 @@ ${pageText.slice(0, 2e3)}`;
2388
2204
  break;
2389
2205
  }
2390
2206
  case "read": {
2391
- const screenshot = await takeScreenshot();
2392
- const analysis = await chat([{
2393
- role: "user",
2394
- content: "Describe the contents of this Google Sheet. List visible data in the cells."
2395
- }]);
2207
+ const vision2 = await describeScreen();
2396
2208
  step.result = `\u{1F4CA} Current sheet view:
2397
- ${analysis.content}`;
2209
+ ${vision2.description}`;
2398
2210
  break;
2399
2211
  }
2400
2212
  default:
@@ -2407,8 +2219,9 @@ ${analysis.content}`;
2407
2219
  switch (docCmd.toLowerCase()) {
2408
2220
  case "new": {
2409
2221
  const docName = docArgs[0] || "Untitled document";
2410
- const success = await googleDocsType("");
2411
- step.result = success ? `\u{1F4C4} Created Google Doc: ${docName}` : `\u{1F4C4} Could not create Google Doc`;
2222
+ await openGoogleDoc();
2223
+ await sleep2(5e3);
2224
+ step.result = `\u{1F4C4} Opened new Google Doc: ${docName}`;
2412
2225
  break;
2413
2226
  }
2414
2227
  case "type": {
@@ -2425,15 +2238,11 @@ ${analysis.content}`;
2425
2238
  case "research": {
2426
2239
  const researchQuery = params;
2427
2240
  const researchData = await research(researchQuery, 3);
2428
- const sourceSummaries = researchData.sources.map(
2429
- (s, i) => `Source ${i + 1}: ${s.title}
2430
- ${s.content.slice(0, 500)}...`
2431
- ).join("\n\n");
2432
2241
  const synthesis = await chat([{
2433
2242
  role: "user",
2434
- content: `Based on the following research gathered about "${researchQuery}", provide a comprehensive summary:
2243
+ content: `Based on the following search results about "${researchQuery}", provide a comprehensive summary:
2435
2244
 
2436
- ${sourceSummaries}
2245
+ ${researchData.summary}
2437
2246
 
2438
2247
  Create a well-organized summary with:
2439
2248
  1. Key findings
@@ -2460,12 +2269,10 @@ Here's what I see on my screen: ${currentScreen.description}`;
2460
2269
  if (!supportedLLMs.includes(llmLower)) {
2461
2270
  throw new Error(`Unknown LLM: ${llmName}. Supported: ${supportedLLMs.join(", ")}`);
2462
2271
  }
2463
- const result = await askAI(llmLower, fullQuestion, false);
2464
- const fullParts = await getFullAIResponse(llmLower, 3);
2465
- const finalResponse = fullParts.length > 0 ? fullParts.join("\n\n") : result.response;
2272
+ const result = await askAI(llmLower, fullQuestion);
2466
2273
  step.result = `\u{1F916} ${llmName} says:
2467
2274
 
2468
- ${finalResponse}`;
2275
+ ${result.response}`;
2469
2276
  break;
2470
2277
  }
2471
2278
  case "learn_ui": {
@@ -2494,48 +2301,40 @@ ${uiAnalysis.content}`;
2494
2301
  const maxAttempts = 5;
2495
2302
  const actionHistory = [];
2496
2303
  let accomplished = false;
2497
- const page = await getPage();
2498
2304
  for (let attempt = 0; attempt < maxAttempts && !accomplished; attempt++) {
2499
- const screenshot = await takeScreenshot();
2500
- const currentState = await chat([{
2501
- role: "user",
2502
- content: `Describe what you see on this screen. What app/website is it? What elements are visible?`
2503
- }]);
2305
+ const currentScreen = await describeScreen();
2504
2306
  const nextAction = await chat([{
2505
2307
  role: "user",
2506
2308
  content: `GOAL: ${goal}
2507
2309
 
2508
- CURRENT SCREEN: ${currentState.content}
2310
+ CURRENT SCREEN: ${currentScreen.description}
2509
2311
 
2510
2312
  PREVIOUS ACTIONS TAKEN:
2511
2313
  ${actionHistory.length > 0 ? actionHistory.join("\n") : "None yet"}
2512
2314
 
2513
2315
  Based on what you see, what's the SINGLE next action to take?
2514
2316
  Options:
2515
- - click: Click element (describe CSS selector or visible text)
2516
- - type: Type something (specify selector and text)
2517
- - press: Press a key (specify key)
2317
+ - click: Click (will click at current mouse position)
2318
+ - type: Type something (specify text)
2319
+ - press: Press a key (specify key like Enter, Tab, Escape)
2518
2320
  - scroll: Scroll up/down
2519
- - navigate: Go to URL
2321
+ - navigate: Go to URL (opens in browser)
2520
2322
  - done: Goal is accomplished
2521
2323
  - stuck: Can't figure out what to do
2522
2324
 
2523
2325
  Respond in format:
2524
2326
  ACTION: <action_type>
2525
- SELECTOR: <css selector or text to find>
2526
- VALUE: <text to type or URL>
2327
+ VALUE: <text to type, URL to navigate, or key to press>
2527
2328
  REASONING: <why>`
2528
2329
  }]);
2529
2330
  const actionContent = nextAction.content;
2530
2331
  const actionMatch = actionContent.match(/ACTION:\s*(\w+)/i);
2531
- const selectorMatch = actionContent.match(/SELECTOR:\s*(.+?)(?:\n|$)/i);
2532
2332
  const valueMatch = actionContent.match(/VALUE:\s*(.+?)(?:\n|$)/i);
2533
2333
  if (!actionMatch) {
2534
2334
  actionHistory.push(`Attempt ${attempt + 1}: Couldn't parse action`);
2535
2335
  continue;
2536
2336
  }
2537
2337
  const action = actionMatch[1].toLowerCase();
2538
- const selector = selectorMatch?.[1]?.trim() || "";
2539
2338
  const value = valueMatch?.[1]?.trim() || "";
2540
2339
  if (action === "done") {
2541
2340
  accomplished = true;
@@ -2547,34 +2346,25 @@ REASONING: <why>`
2547
2346
  const helpRequest = `I'm trying to: ${goal}
2548
2347
 
2549
2348
  I'm stuck. What should I do next? Be specific about what to click or type.`;
2550
- const advice = await askAI("perplexity", helpRequest, false);
2349
+ const advice = await askAI("perplexity", helpRequest);
2551
2350
  actionHistory.push(`Got advice: ${advice.response.slice(0, 200)}...`);
2552
- await navigateTo(page.url());
2553
2351
  continue;
2554
2352
  }
2555
2353
  try {
2556
2354
  switch (action) {
2557
2355
  case "click":
2558
- if (selector) {
2559
- const clicked = await clickElement(selector);
2560
- if (!clicked) {
2561
- await page.getByText(selector).first().click({ timeout: 5e3 });
2562
- }
2563
- }
2564
- actionHistory.push(`Attempt ${attempt + 1}: Clicked "${selector}"`);
2356
+ await clickMouse("left");
2357
+ actionHistory.push(`Attempt ${attempt + 1}: Clicked`);
2565
2358
  break;
2566
2359
  case "type":
2567
- if (selector && value) {
2568
- const typed = await typeInElement(selector, value);
2569
- if (!typed) {
2570
- await page.getByPlaceholder(selector).first().fill(value);
2571
- }
2360
+ if (value) {
2361
+ await typeText(value);
2572
2362
  }
2573
- actionHistory.push(`Attempt ${attempt + 1}: Typed "${value}" in "${selector}"`);
2363
+ actionHistory.push(`Attempt ${attempt + 1}: Typed "${value}"`);
2574
2364
  break;
2575
2365
  case "press":
2576
- await pressKey2(value || selector);
2577
- actionHistory.push(`Attempt ${attempt + 1}: Pressed ${value || selector}`);
2366
+ await pressKey(value || "Return");
2367
+ actionHistory.push(`Attempt ${attempt + 1}: Pressed ${value || "Enter"}`);
2578
2368
  break;
2579
2369
  case "scroll":
2580
2370
  await scroll(value.toLowerCase().includes("up") ? "up" : "down");
@@ -2582,8 +2372,8 @@ I'm stuck. What should I do next? Be specific about what to click or type.`;
2582
2372
  break;
2583
2373
  case "navigate":
2584
2374
  const url = value.startsWith("http") ? value : `https://${value}`;
2585
- await navigateTo(url);
2586
- actionHistory.push(`Attempt ${attempt + 1}: Navigated to ${url}`);
2375
+ await openUrl(url);
2376
+ actionHistory.push(`Attempt ${attempt + 1}: Opened ${url}`);
2587
2377
  break;
2588
2378
  default:
2589
2379
  actionHistory.push(`Attempt ${attempt + 1}: Unknown action ${action}`);
@@ -2591,7 +2381,7 @@ I'm stuck. What should I do next? Be specific about what to click or type.`;
2591
2381
  } catch (e) {
2592
2382
  actionHistory.push(`Attempt ${attempt + 1}: Action failed - ${e}`);
2593
2383
  }
2594
- await sleep(2e3);
2384
+ await sleep2(2e3);
2595
2385
  }
2596
2386
  step.result = `\u{1F3AF} Adaptive Agent Result:
2597
2387
 
@@ -2639,7 +2429,7 @@ async function executeTask(task, onProgress) {
2639
2429
  task.completedAt = /* @__PURE__ */ new Date();
2640
2430
  return task;
2641
2431
  }
2642
- function sleep(ms) {
2432
+ function sleep2(ms) {
2643
2433
  return new Promise((resolve) => setTimeout(resolve, ms));
2644
2434
  }
2645
2435
  function getTaskMemoryStats() {