@leg3ndy/otto-bridge 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -665,6 +665,44 @@ function parseStructuredActions(job) {
665
665
  }
666
666
  continue;
667
667
  }
668
+ if (type === "scroll_view" || type === "scroll" || type === "scroll_page") {
669
+ const rawDirection = (asString(action.direction) || asString(action.dir) || "down").toLowerCase();
670
+ const direction = rawDirection === "up" || rawDirection === "cima" ? "up" : "down";
671
+ const rawAmount = (asString(action.amount) || "").toLowerCase();
672
+ const amount = rawAmount === "small" || rawAmount === "medium" || rawAmount === "large"
673
+ ? rawAmount
674
+ : undefined;
675
+ const rawSteps = Number(action.steps);
676
+ const steps = Number.isFinite(rawSteps) ? Math.max(1, Math.min(Math.round(rawSteps), 6)) : undefined;
677
+ actions.push({
678
+ type: "scroll_view",
679
+ direction,
680
+ amount,
681
+ steps,
682
+ app: asString(action.app) || undefined,
683
+ });
684
+ continue;
685
+ }
686
+ if (type === "whatsapp_send_message") {
687
+ const contact = asString(action.contact) || asString(action.recipient);
688
+ const text = asString(action.text) || asString(action.message);
689
+ if (contact && text) {
690
+ actions.push({ type: "whatsapp_send_message", contact, text });
691
+ }
692
+ continue;
693
+ }
694
+ if (type === "whatsapp_read_chat") {
695
+ const contact = asString(action.contact) || asString(action.recipient);
696
+ const rawLimit = Number(action.limit);
697
+ if (contact) {
698
+ actions.push({
699
+ type: "whatsapp_read_chat",
700
+ contact,
701
+ limit: Number.isFinite(rawLimit) ? Math.max(1, Math.min(Math.round(rawLimit), 30)) : undefined,
702
+ });
703
+ }
704
+ continue;
705
+ }
668
706
  if (type === "click_visual_target" || type === "click_target") {
669
707
  const description = asString(action.description) || asString(action.target);
670
708
  if (description) {
@@ -744,6 +782,23 @@ function deriveActionsFromText(job) {
744
782
  }
745
783
  return [{ type: "set_volume", level }];
746
784
  }
785
+ if (/\b(scroll|rola\w*|role\w*|desce\w*|sobe\w*)\b/i.test(task)) {
786
+ const direction = /\b(sobe\w*|suba\w*|para cima|pra cima|scroll up)\b/i.test(task) ? "up" : "down";
787
+ const amount = /\b(pouco|leve|small)\b/i.test(task)
788
+ ? "small"
789
+ : /\b(muito|bastante|fim|grande|large)\b/i.test(task)
790
+ ? "large"
791
+ : "medium";
792
+ const stepsMatch = task.match(/\b(\d{1,2})\s*(?:x|vezes?)\b/i);
793
+ const steps = stepsMatch?.[1] ? Math.max(1, Math.min(Number(stepsMatch[1]), 6)) : 1;
794
+ return [{
795
+ type: "scroll_view",
796
+ direction,
797
+ amount,
798
+ steps,
799
+ app: detectedApp || undefined,
800
+ }];
801
+ }
747
802
  if ((normalizedTask.includes("leia") || normalizedTask.includes("ler")) && detectedUrl) {
748
803
  return [
749
804
  { type: "open_url", url: detectedUrl, app: detectedApp || "Safari" },
@@ -934,6 +989,68 @@ export class NativeMacOSJobExecutor {
934
989
  completionNotes.push(`Volume ajustado para ${action.level}% no macOS.`);
935
990
  continue;
936
991
  }
992
+ if (action.type === "scroll_view") {
993
+ const scrollApp = action.app || this.lastActiveApp || await this.getFrontmostAppName();
994
+ if (scrollApp) {
995
+ await reporter.progress(progressPercent, `Trazendo ${scrollApp} para frente antes de rolar a tela`);
996
+ await this.focusApp(scrollApp);
997
+ }
998
+ const directionLabel = action.direction === "up" ? "cima" : "baixo";
999
+ await reporter.progress(progressPercent, `Rolando a tela para ${directionLabel}`);
1000
+ await this.scrollView(action.direction, action.amount, action.steps);
1001
+ resultPayload.last_scroll = {
1002
+ direction: action.direction,
1003
+ amount: action.amount || "medium",
1004
+ steps: action.steps || 1,
1005
+ app: scrollApp || null,
1006
+ };
1007
+ completionNotes.push(`Rolei a tela para ${directionLabel} no macOS.`);
1008
+ continue;
1009
+ }
1010
+ if (action.type === "whatsapp_send_message") {
1011
+ await reporter.progress(progressPercent, `Abrindo a conversa do WhatsApp com ${action.contact}`);
1012
+ await this.focusApp("Safari");
1013
+ await this.ensureWhatsAppWebReady();
1014
+ const selected = await this.selectWhatsAppConversation(action.contact);
1015
+ if (!selected) {
1016
+ throw new Error(`Nao consegui localizar a conversa do WhatsApp com ${action.contact}.`);
1017
+ }
1018
+ await reporter.progress(progressPercent, `Digitando a mensagem para ${action.contact} no WhatsApp`);
1019
+ await this.focusWhatsAppComposer();
1020
+ await this.typeText(action.text);
1021
+ await delay(250);
1022
+ await this.pressShortcut("return");
1023
+ await delay(900);
1024
+ const verification = await this.verifyWhatsAppLastMessage(action.text);
1025
+ if (!verification.ok) {
1026
+ throw new Error(verification.reason || `Nao consegui confirmar o envio da mensagem para ${action.contact} no WhatsApp.`);
1027
+ }
1028
+ resultPayload.whatsapp = {
1029
+ action: "send_message",
1030
+ contact: action.contact,
1031
+ text_preview: clipText(action.text, 180),
1032
+ };
1033
+ completionNotes.push(`Enviei a mensagem no WhatsApp para ${action.contact}.`);
1034
+ continue;
1035
+ }
1036
+ if (action.type === "whatsapp_read_chat") {
1037
+ await reporter.progress(progressPercent, `Abrindo a conversa do WhatsApp com ${action.contact}`);
1038
+ await this.focusApp("Safari");
1039
+ await this.ensureWhatsAppWebReady();
1040
+ const selected = await this.selectWhatsAppConversation(action.contact);
1041
+ if (!selected) {
1042
+ throw new Error(`Nao consegui localizar a conversa do WhatsApp com ${action.contact}.`);
1043
+ }
1044
+ await delay(500);
1045
+ const chat = await this.readWhatsAppVisibleConversation(action.contact, action.limit || 12);
1046
+ resultPayload.whatsapp = {
1047
+ action: "read_chat",
1048
+ contact: action.contact,
1049
+ messages: chat.messages,
1050
+ };
1051
+ completionNotes.push(`Mensagens visiveis no WhatsApp com ${action.contact}:\n${chat.summary}`);
1052
+ continue;
1053
+ }
937
1054
  if (action.type === "click_visual_target") {
938
1055
  const browserApp = await this.resolveLikelyBrowserApp(action.app);
939
1056
  if (browserApp) {
@@ -1360,6 +1477,253 @@ end tell
1360
1477
  }
1361
1478
  }
1362
1479
  }
1480
+ async scrollView(direction, amount = "medium", steps = 1) {
1481
+ const clampedSteps = Math.max(1, Math.min(Math.round(steps || 1), 6));
1482
+ const lineDelta = {
1483
+ small: 4,
1484
+ medium: 8,
1485
+ large: 14,
1486
+ }[amount];
1487
+ const signedDelta = direction === "up" ? lineDelta : -lineDelta;
1488
+ const iterations = {
1489
+ small: 1,
1490
+ medium: 2,
1491
+ large: 3,
1492
+ }[amount];
1493
+ const swiftScript = `
1494
+ import ApplicationServices
1495
+ import Foundation
1496
+
1497
+ let wheelDelta: Int32 = ${signedDelta}
1498
+ let stepCount = ${clampedSteps}
1499
+ let iterations = ${iterations}
1500
+
1501
+ for _ in 0..<stepCount {
1502
+ for _ in 0..<iterations {
1503
+ if let event = CGEvent(scrollWheelEvent2Source: nil, units: .line, wheelCount: 1, wheel1: wheelDelta, wheel2: 0, wheel3: 0) {
1504
+ event.post(tap: .cghidEventTap)
1505
+ }
1506
+ usleep(35000)
1507
+ }
1508
+ usleep(85000)
1509
+ }
1510
+ `;
1511
+ try {
1512
+ await this.runCommand("swift", ["-e", swiftScript]);
1513
+ }
1514
+ catch {
1515
+ await this.scrollViewWithPageKeys(direction, clampedSteps);
1516
+ }
1517
+ }
1518
+ async scrollViewWithPageKeys(direction, steps) {
1519
+ const keyCode = direction === "up" ? 116 : 121;
1520
+ const clampedSteps = Math.max(1, Math.min(Math.round(steps || 1), 6));
1521
+ const script = `
1522
+ repeat ${clampedSteps} times
1523
+ tell application "System Events" to key code ${keyCode}
1524
+ delay 0.06
1525
+ end repeat
1526
+ `;
1527
+ await this.runCommand("osascript", ["-e", script]);
1528
+ }
1529
+ async ensureWhatsAppWebReady() {
1530
+ const page = await this.readFrontmostPage("Safari");
1531
+ if (!normalizeComparableUrl(page.url || "").includes("web.whatsapp.com")) {
1532
+ throw new Error("O Safari nao esta aberto no WhatsApp Web.");
1533
+ }
1534
+ }
1535
+ async selectWhatsAppConversation(contact) {
1536
+ const prepared = await this.runSafariJsonScript(`
1537
+ const query = String(__input?.contact || "");
1538
+ const normalize = (value) => String(value || "").normalize("NFD").replace(/[\\u0300-\\u036f]/g, "").toLowerCase().trim();
1539
+ const normalizedQuery = normalize(query);
1540
+
1541
+ function isVisible(element) {
1542
+ if (!(element instanceof HTMLElement)) return false;
1543
+ const rect = element.getBoundingClientRect();
1544
+ if (rect.width < 4 || rect.height < 4) return false;
1545
+ const style = window.getComputedStyle(element);
1546
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
1547
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
1548
+ }
1549
+
1550
+ function focusAndReplaceContent(element, value) {
1551
+ element.focus();
1552
+ const range = document.createRange();
1553
+ range.selectNodeContents(element);
1554
+ const selection = window.getSelection();
1555
+ selection?.removeAllRanges();
1556
+ selection?.addRange(range);
1557
+ document.execCommand("selectAll", false);
1558
+ document.execCommand("delete", false);
1559
+ document.execCommand("insertText", false, value);
1560
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, data: value, inputType: "insertText" }));
1561
+ }
1562
+
1563
+ const candidates = Array.from(document.querySelectorAll('div[contenteditable="true"][role="textbox"], div[contenteditable="true"][data-tab], [data-testid="chat-list-search"] [contenteditable="true"]'))
1564
+ .filter((node) => node instanceof HTMLElement)
1565
+ .filter((node) => isVisible(node))
1566
+ .map((node) => {
1567
+ const element = node;
1568
+ const rect = element.getBoundingClientRect();
1569
+ const label = normalize(element.getAttribute("aria-label") || element.getAttribute("data-testid") || element.textContent || "");
1570
+ let score = 0;
1571
+ if (rect.left < window.innerWidth * 0.45) score += 30;
1572
+ if (rect.top < 240) score += 30;
1573
+ if (label.includes("search") || label.includes("pesquisar") || label.includes("procure") || label.includes("chat list")) score += 80;
1574
+ if (element.closest('[data-testid="chat-list-search"], header')) score += 25;
1575
+ return { element, score };
1576
+ })
1577
+ .sort((left, right) => right.score - left.score);
1578
+
1579
+ if (!candidates.length) {
1580
+ return { ok: false, reason: "Nao achei o campo de busca do WhatsApp Web." };
1581
+ }
1582
+
1583
+ focusAndReplaceContent(candidates[0].element, query);
1584
+ return { ok: true };
1585
+ `, { contact });
1586
+ if (!prepared?.ok) {
1587
+ return false;
1588
+ }
1589
+ await delay(900);
1590
+ const result = await this.runSafariJsonScript(`
1591
+ const query = String(__input?.contact || "");
1592
+ const normalize = (value) => String(value || "").normalize("NFD").replace(/[\\u0300-\\u036f]/g, "").toLowerCase().trim();
1593
+ const normalizedQuery = normalize(query);
1594
+
1595
+ function isVisible(element) {
1596
+ if (!(element instanceof HTMLElement)) return false;
1597
+ const rect = element.getBoundingClientRect();
1598
+ if (rect.width < 6 || rect.height < 6) return false;
1599
+ const style = window.getComputedStyle(element);
1600
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
1601
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
1602
+ }
1603
+
1604
+ const titleNodes = Array.from(document.querySelectorAll('span[title], div[title]'))
1605
+ .filter((node) => node instanceof HTMLElement)
1606
+ .filter((node) => isVisible(node))
1607
+ .map((node) => {
1608
+ const text = normalize(node.getAttribute("title") || node.textContent || "");
1609
+ let score = 0;
1610
+ if (text === normalizedQuery) score += 160;
1611
+ if (text.includes(normalizedQuery)) score += 100;
1612
+ if (normalizedQuery.includes(text) && text.length >= 3) score += 50;
1613
+ const container = node.closest('[data-testid="cell-frame-container"], [role="listitem"], [role="gridcell"], div[tabindex]');
1614
+ if (container instanceof HTMLElement && isVisible(container)) score += 20;
1615
+ return { node, container, text, score };
1616
+ })
1617
+ .filter((item) => item.score > 0)
1618
+ .sort((left, right) => right.score - left.score);
1619
+
1620
+ if (!titleNodes.length) {
1621
+ return { clicked: false, reason: "Nao achei uma conversa visivel com esse nome." };
1622
+ }
1623
+
1624
+ const winner = titleNodes[0];
1625
+ const target = winner.container instanceof HTMLElement ? winner.container : winner.node;
1626
+ target.scrollIntoView({ block: "center", inline: "center", behavior: "auto" });
1627
+ target.dispatchEvent(new MouseEvent("mousedown", { bubbles: true, cancelable: true, view: window }));
1628
+ target.dispatchEvent(new MouseEvent("mouseup", { bubbles: true, cancelable: true, view: window }));
1629
+ target.dispatchEvent(new MouseEvent("click", { bubbles: true, cancelable: true, view: window }));
1630
+ if (typeof target.click === "function") {
1631
+ target.click();
1632
+ }
1633
+ return { clicked: true };
1634
+ `, { contact });
1635
+ return Boolean(result?.clicked);
1636
+ }
1637
+ async focusWhatsAppComposer() {
1638
+ const result = await this.runSafariJsonScript(`
1639
+ function isVisible(element) {
1640
+ if (!(element instanceof HTMLElement)) return false;
1641
+ const rect = element.getBoundingClientRect();
1642
+ if (rect.width < 6 || rect.height < 6) return false;
1643
+ const style = window.getComputedStyle(element);
1644
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
1645
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
1646
+ }
1647
+
1648
+ const candidates = Array.from(document.querySelectorAll('footer div[contenteditable="true"], [data-testid="conversation-compose-box-input"], main footer [contenteditable="true"]'))
1649
+ .filter((node) => node instanceof HTMLElement)
1650
+ .filter((node) => isVisible(node))
1651
+ .sort((left, right) => right.getBoundingClientRect().top - left.getBoundingClientRect().top);
1652
+
1653
+ if (!candidates.length) {
1654
+ return { focused: false, reason: "Nao achei o campo de mensagem do WhatsApp Web." };
1655
+ }
1656
+
1657
+ const composer = candidates[0];
1658
+ composer.focus();
1659
+ composer.click();
1660
+ return { focused: true };
1661
+ `);
1662
+ if (!result?.focused) {
1663
+ throw new Error(result?.reason || "Nao consegui focar o campo de mensagem do WhatsApp Web.");
1664
+ }
1665
+ }
1666
+ async readWhatsAppVisibleConversation(contact, limit) {
1667
+ const result = await this.runSafariJsonScript(`
1668
+ const maxMessages = Number(__input?.limit || 12);
1669
+
1670
+ function isVisible(element) {
1671
+ if (!(element instanceof HTMLElement)) return false;
1672
+ const rect = element.getBoundingClientRect();
1673
+ if (rect.width < 6 || rect.height < 6) return false;
1674
+ const style = window.getComputedStyle(element);
1675
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
1676
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
1677
+ }
1678
+
1679
+ const containers = Array.from(document.querySelectorAll('[data-testid="msg-container"], div[data-id]'))
1680
+ .filter((node) => node instanceof HTMLElement)
1681
+ .filter((node) => isVisible(node));
1682
+
1683
+ const messages = containers.map((node) => {
1684
+ const element = node;
1685
+ const prePlain = element.querySelector('[data-pre-plain-text]')?.getAttribute('data-pre-plain-text') || "";
1686
+ const authorMatch = prePlain.match(/\\]\\s*([^:]+):/);
1687
+ const author = authorMatch?.[1]?.trim() || (element.getAttribute('data-testid')?.includes('out') ? 'Voce' : 'Contato');
1688
+ const text = (element.innerText || "").trim().replace(/\\n{2,}/g, "\\n");
1689
+ return { author, text };
1690
+ }).filter((item) => item.text);
1691
+
1692
+ return { messages: messages.slice(-maxMessages) };
1693
+ `, { contact, limit });
1694
+ const messages = Array.isArray(result?.messages)
1695
+ ? result.messages
1696
+ .map((item) => ({
1697
+ author: clipText(asString(item.author) || "Contato", 80),
1698
+ text: clipText(asString(item.text) || "", 500),
1699
+ }))
1700
+ .filter((item) => item.text)
1701
+ : [];
1702
+ return {
1703
+ messages,
1704
+ summary: messages.length
1705
+ ? messages.map((item) => `${item.author}: ${item.text}`).join("\n")
1706
+ : `(sem mensagens visiveis na conversa com ${contact})`,
1707
+ };
1708
+ }
1709
+ async verifyWhatsAppLastMessage(expectedText) {
1710
+ const chat = await this.readWhatsAppVisibleConversation("Contato", 6);
1711
+ if (!chat.messages.length) {
1712
+ return {
1713
+ ok: false,
1714
+ reason: "Nao consegui ler as mensagens visiveis apos o envio no WhatsApp.",
1715
+ };
1716
+ }
1717
+ const normalizedExpected = normalizeText(expectedText).slice(0, 60);
1718
+ const matched = chat.messages.some((item) => normalizeText(item.text).includes(normalizedExpected));
1719
+ if (!matched) {
1720
+ return {
1721
+ ok: false,
1722
+ reason: "Nao consegui confirmar visualmente a mensagem enviada no WhatsApp.",
1723
+ };
1724
+ }
1725
+ return { ok: true, reason: "" };
1726
+ }
1363
1727
  async takeScreenshot(targetPath) {
1364
1728
  const artifactsDir = path.join(os.homedir(), ".otto-bridge", "artifacts");
1365
1729
  await mkdir(artifactsDir, { recursive: true });
@@ -2214,6 +2578,15 @@ if let output = String(data: data, encoding: .utf8) {
2214
2578
  if (action.type === "set_volume") {
2215
2579
  return `Volume ajustado para ${action.level}% no macOS`;
2216
2580
  }
2581
+ if (action.type === "scroll_view") {
2582
+ return `Tela rolada para ${action.direction === "up" ? "cima" : "baixo"} no macOS`;
2583
+ }
2584
+ if (action.type === "whatsapp_send_message") {
2585
+ return `Mensagem enviada no WhatsApp para ${action.contact}`;
2586
+ }
2587
+ if (action.type === "whatsapp_read_chat") {
2588
+ return `Conversa do WhatsApp lida com ${action.contact}`;
2589
+ }
2217
2590
  if (action.type === "click_visual_target") {
2218
2591
  return `Clique guiado executado para ${action.description}`;
2219
2592
  }
package/dist/types.js CHANGED
@@ -1,5 +1,5 @@
1
1
  export const BRIDGE_CONFIG_VERSION = 1;
2
- export const BRIDGE_VERSION = "0.5.6";
2
+ export const BRIDGE_VERSION = "0.5.8";
3
3
  export const BRIDGE_PACKAGE_NAME = "@leg3ndy/otto-bridge";
4
4
  export const DEFAULT_API_BASE_URL = "http://localhost:8000";
5
5
  export const DEFAULT_POLL_INTERVAL_MS = 3000;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@leg3ndy/otto-bridge",
3
- "version": "0.5.6",
3
+ "version": "0.5.8",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "Local companion for Otto Bridge device pairing and WebSocket runtime.",