@leg3ndy/otto-bridge 0.5.8 → 0.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ import os from "node:os";
4
4
  import path from "node:path";
5
5
  import process from "node:process";
6
6
  import { JobCancelledError } from "./shared.js";
7
+ import { loadManagedBridgeExtensionState, saveManagedBridgeExtensionState, } from "../extensions.js";
7
8
  import { postDeviceJson, uploadDeviceJobArtifact } from "../http.js";
8
9
  const KNOWN_APPS = [
9
10
  { canonical: "Safari", patterns: [/\bsafari\b/i] },
@@ -30,6 +31,8 @@ const KNOWN_SITES = [
30
31
  { label: "WhatsApp Web", url: "https://web.whatsapp.com", patterns: [/\bwhatsapp\b/i] },
31
32
  { label: "X", url: "https://x.com", patterns: [/\bx\.com\b/i, /\btwitter\b/i, /\bxis\b/i] },
32
33
  ];
34
+ const WHATSAPP_WEB_EXTENSION_SLUG = "whatsappweb";
35
+ const WHATSAPP_WEB_URL = "https://web.whatsapp.com";
33
36
  const GENERIC_VISUAL_STOP_WORDS = new Set([
34
37
  "o",
35
38
  "a",
@@ -129,6 +132,102 @@ function extractMeaningfulDescriptionTokens(value) {
129
132
  .map((token) => token.trim())
130
133
  .filter((token) => token.length >= 3 && !GENERIC_VISUAL_STOP_WORDS.has(token))));
131
134
  }
135
+ const MEDIA_QUERY_STOP_WORDS = new Set([
136
+ "youtube",
137
+ "music",
138
+ "spotify",
139
+ "apple",
140
+ "player",
141
+ "play",
142
+ "pause",
143
+ "pausa",
144
+ "pausar",
145
+ "tocar",
146
+ "toque",
147
+ "reproduzir",
148
+ "reproducao",
149
+ "retomar",
150
+ "retoma",
151
+ "continuar",
152
+ "continue",
153
+ "resumir",
154
+ "resume",
155
+ "next",
156
+ "skip",
157
+ "proxima",
158
+ "proximo",
159
+ "anterior",
160
+ "previous",
161
+ "voltar",
162
+ "volta",
163
+ "melhor",
164
+ "tocavel",
165
+ "relacionado",
166
+ "diretamente",
167
+ "navegador",
168
+ "fila",
169
+ "playerbar",
170
+ ]);
171
+ function extractMediaQueryTokens(value) {
172
+ return extractMeaningfulDescriptionTokens(value).filter((token) => !MEDIA_QUERY_STOP_WORDS.has(token));
173
+ }
174
+ function countMatchingTokens(text, tokens) {
175
+ const normalizedText = normalizeText(text || "");
176
+ if (!normalizedText || !tokens.length) {
177
+ return 0;
178
+ }
179
+ return tokens.reduce((total, token) => total + (normalizedText.includes(token) ? 1 : 0), 0);
180
+ }
181
+ function typedTextLooksApplied(actual, expected) {
182
+ const normalizedActual = normalizeText(actual || "").replace(/\s+/g, " ").trim();
183
+ const normalizedExpected = normalizeText(expected || "").replace(/\s+/g, " ").trim();
184
+ if (!normalizedActual || !normalizedExpected) {
185
+ return false;
186
+ }
187
+ if (normalizedActual === normalizedExpected
188
+ || normalizedActual.includes(normalizedExpected)
189
+ || normalizedExpected.includes(normalizedActual)) {
190
+ return true;
191
+ }
192
+ const expectedTokens = normalizedExpected.split(/[^a-z0-9]+/).filter((token) => token.length >= 3);
193
+ if (!expectedTokens.length) {
194
+ return false;
195
+ }
196
+ const matches = expectedTokens.filter((token) => normalizedActual.includes(token)).length;
197
+ return matches >= Math.max(2, Math.ceil(expectedTokens.length * 0.6));
198
+ }
199
+ function descriptionWantsNext(description) {
200
+ return /\b(proxim[ao]?|next|skip|pular|avanca|avanç[ae])\b/.test(normalizeText(description || ""));
201
+ }
202
+ function descriptionWantsPrevious(description) {
203
+ return /\b(anterior|previous|volta[ar]?|back|retorna[ar]?)\b/.test(normalizeText(description || ""));
204
+ }
205
+ function descriptionWantsPause(description) {
206
+ return /\b(pausa|pause|pausar)\b/.test(normalizeText(description || ""));
207
+ }
208
+ function descriptionWantsResume(description) {
209
+ return /\b(retoma|retomar|resume|continu[ae]r|despausa|play)\b/.test(normalizeText(description || ""));
210
+ }
211
+ function extractNativeMediaTransportCommand(description) {
212
+ const normalizedDescription = normalizeText(description || "");
213
+ if (!normalizedDescription
214
+ || /\b(resultado|tocavel|relacionado|faixa|search|busca|pesquisa)\b/.test(normalizedDescription)) {
215
+ return null;
216
+ }
217
+ if (!/\b(player|controle|fila|music|spotify|youtube|deezer|apple)\b/.test(normalizedDescription)) {
218
+ return null;
219
+ }
220
+ if (descriptionWantsNext(description)) {
221
+ return "next";
222
+ }
223
+ if (descriptionWantsPrevious(description)) {
224
+ return "previous";
225
+ }
226
+ if (descriptionWantsPause(description) || descriptionWantsResume(description)) {
227
+ return "play_pause";
228
+ }
229
+ return null;
230
+ }
132
231
  function descriptionLikelyHasTextAnchor(description) {
133
232
  return extractQuotedPhrases(description).length > 0 || extractMeaningfulDescriptionTokens(description).length > 0;
134
233
  }
@@ -832,6 +931,8 @@ export class NativeMacOSJobExecutor {
832
931
  cancelledJobs = new Set();
833
932
  activeChild = null;
834
933
  lastActiveApp = null;
934
+ lastVisualTargetDescription = null;
935
+ lastVisualTargetApp = null;
835
936
  constructor(bridgeConfig) {
836
937
  this.bridgeConfig = bridgeConfig;
837
938
  }
@@ -882,6 +983,17 @@ export class NativeMacOSJobExecutor {
882
983
  if (action.type === "press_shortcut") {
883
984
  await reporter.progress(progressPercent, `Enviando atalho ${action.shortcut}`);
884
985
  await this.pressShortcut(action.shortcut);
986
+ if (action.shortcut.startsWith("media_")) {
987
+ const mediaSummaryMap = {
988
+ media_next: "Acionei o comando de próxima mídia no macOS.",
989
+ media_previous: "Acionei o comando de mídia anterior no macOS.",
990
+ media_pause: "Acionei o comando de pausar mídia no macOS.",
991
+ media_resume: "Acionei o comando de retomar mídia no macOS.",
992
+ media_play: "Acionei o comando de reproduzir mídia no macOS.",
993
+ media_play_pause: "Acionei o comando de play/pause de mídia no macOS.",
994
+ };
995
+ completionNotes.push(mediaSummaryMap[action.shortcut] || `Acionei ${action.shortcut} no macOS.`);
996
+ }
885
997
  continue;
886
998
  }
887
999
  if (action.type === "create_note") {
@@ -891,8 +1003,24 @@ export class NativeMacOSJobExecutor {
891
1003
  continue;
892
1004
  }
893
1005
  if (action.type === "type_text") {
894
- await reporter.progress(progressPercent, "Digitando texto no app ativo");
895
- await this.typeText(action.text);
1006
+ const typingApp = this.lastActiveApp || await this.getFrontmostAppName();
1007
+ await reporter.progress(progressPercent, `Digitando texto em ${typingApp || "app ativo"}`);
1008
+ const typed = await this.guidedTypeText(action.text, typingApp || undefined);
1009
+ if (!typed.ok) {
1010
+ throw new Error(typed.reason || "Nao consegui digitar o texto no app ativo.");
1011
+ }
1012
+ resultPayload.last_typed = {
1013
+ strategy: typed.strategy,
1014
+ verified: typed.verified,
1015
+ app: typed.app,
1016
+ attempts: typed.attempts,
1017
+ text_preview: clipText(action.text, 180),
1018
+ };
1019
+ this.lastVisualTargetDescription = null;
1020
+ this.lastVisualTargetApp = null;
1021
+ completionNotes.push(typed.verified
1022
+ ? `Digitei e confirmei o texto no ${typed.app || "app ativo"}.`
1023
+ : `Digitei o texto no ${typed.app || "app ativo"}.`);
896
1024
  continue;
897
1025
  }
898
1026
  if (action.type === "take_screenshot") {
@@ -1009,17 +1137,13 @@ export class NativeMacOSJobExecutor {
1009
1137
  }
1010
1138
  if (action.type === "whatsapp_send_message") {
1011
1139
  await reporter.progress(progressPercent, `Abrindo a conversa do WhatsApp com ${action.contact}`);
1012
- await this.focusApp("Safari");
1013
1140
  await this.ensureWhatsAppWebReady();
1014
1141
  const selected = await this.selectWhatsAppConversation(action.contact);
1015
1142
  if (!selected) {
1016
1143
  throw new Error(`Nao consegui localizar a conversa do WhatsApp com ${action.contact}.`);
1017
1144
  }
1018
- await reporter.progress(progressPercent, `Digitando a mensagem para ${action.contact} no WhatsApp`);
1019
- await this.focusWhatsAppComposer();
1020
- await this.typeText(action.text);
1021
- await delay(250);
1022
- await this.pressShortcut("return");
1145
+ await reporter.progress(progressPercent, `Enviando a mensagem para ${action.contact} no WhatsApp`);
1146
+ await this.sendWhatsAppMessage(action.text);
1023
1147
  await delay(900);
1024
1148
  const verification = await this.verifyWhatsAppLastMessage(action.text);
1025
1149
  if (!verification.ok) {
@@ -1035,7 +1159,6 @@ export class NativeMacOSJobExecutor {
1035
1159
  }
1036
1160
  if (action.type === "whatsapp_read_chat") {
1037
1161
  await reporter.progress(progressPercent, `Abrindo a conversa do WhatsApp com ${action.contact}`);
1038
- await this.focusApp("Safari");
1039
1162
  await this.ensureWhatsAppWebReady();
1040
1163
  const selected = await this.selectWhatsAppConversation(action.contact);
1041
1164
  if (!selected) {
@@ -1069,6 +1192,42 @@ export class NativeMacOSJobExecutor {
1069
1192
  const initialBrowserState = browserApp
1070
1193
  ? await this.captureBrowserPageState(browserApp).catch(() => null)
1071
1194
  : null;
1195
+ const nativeMediaTransport = extractNativeMediaTransportCommand(targetDescription);
1196
+ if (nativeMediaTransport) {
1197
+ await reporter.progress(progressPercent, `Tentando controle de mídia nativo do macOS para ${targetDescription}`);
1198
+ try {
1199
+ await this.triggerMacOSMediaTransport(nativeMediaTransport);
1200
+ let validated = false;
1201
+ let validationReason = "";
1202
+ if (action.verification_prompt) {
1203
+ const verification = await this.validateVisualClickWithVision(job.job_id, targetDescription, action.verification_prompt, progressPercent, reporter, artifacts, "native_media_transport_result");
1204
+ validated = verification.ok;
1205
+ validationReason = verification.reason;
1206
+ }
1207
+ else if (browserApp) {
1208
+ const browserValidation = await this.confirmBrowserClick(browserApp, initialBrowserState, targetDescription, null);
1209
+ validated = browserValidation.ok;
1210
+ validationReason = browserValidation.reason;
1211
+ }
1212
+ else {
1213
+ validated = true;
1214
+ }
1215
+ if (validated) {
1216
+ resultPayload.last_click = {
1217
+ strategy: "native_media_transport",
1218
+ matched_text: targetDescription,
1219
+ };
1220
+ completionNotes.push(`Acionei ${targetDescription} usando o controle de mídia nativo do macOS.`);
1221
+ clickSucceeded = true;
1222
+ break;
1223
+ }
1224
+ lastFailureReason = validationReason || `O controle de mídia nativo do macOS nao confirmou ${targetDescription}.`;
1225
+ await reporter.progress(progressPercent, "O controle de mídia nativo nao foi suficiente; vou tentar DOM/OCR");
1226
+ }
1227
+ catch (error) {
1228
+ lastFailureReason = error instanceof Error ? error.message : String(error);
1229
+ }
1230
+ }
1072
1231
  if (browserApp === "Safari") {
1073
1232
  await reporter.progress(progressPercent, `Tentando localizar ${targetDescription} diretamente no Safari`);
1074
1233
  const domClick = await this.trySafariDomClick(targetDescription);
@@ -1086,6 +1245,8 @@ export class NativeMacOSJobExecutor {
1086
1245
  validationReason = browserValidation.reason;
1087
1246
  }
1088
1247
  if (validated) {
1248
+ this.lastVisualTargetDescription = targetDescription;
1249
+ this.lastVisualTargetApp = browserApp || action.app || this.lastActiveApp;
1089
1250
  resultPayload.last_click = {
1090
1251
  strategy: domClick.strategy || "safari_dom",
1091
1252
  matched_text: domClick.matchedText || null,
@@ -1127,6 +1288,8 @@ export class NativeMacOSJobExecutor {
1127
1288
  }
1128
1289
  if (validated) {
1129
1290
  const region = ocrClick.region || null;
1291
+ this.lastVisualTargetDescription = targetDescription;
1292
+ this.lastVisualTargetApp = browserApp || action.app || this.lastActiveApp;
1130
1293
  resultPayload.last_click = {
1131
1294
  strategy: ocrClick.strategy || "local_ocr",
1132
1295
  score: ocrClick.score || null,
@@ -1202,6 +1365,8 @@ export class NativeMacOSJobExecutor {
1202
1365
  }
1203
1366
  }
1204
1367
  completionNotes.push(`Localizei e cliquei em ${targetDescription}.`);
1368
+ this.lastVisualTargetDescription = targetDescription;
1369
+ this.lastVisualTargetApp = browserApp || action.app || this.lastActiveApp;
1205
1370
  clickSucceeded = true;
1206
1371
  break;
1207
1372
  }
@@ -1387,7 +1552,26 @@ end tell
1387
1552
  const beforePlayerState = normalizeText(before?.playerState || "");
1388
1553
  const afterPlayerState = normalizeText(after.playerState || "");
1389
1554
  const playerLooksActive = afterPlayerState.includes("pause") || afterPlayerState.includes("pausar");
1555
+ const playerLooksPaused = !playerLooksActive && /play|tocar|reproduzir|continuar|retomar|resume/.test(afterPlayerState);
1556
+ const wantsNext = descriptionWantsNext(targetDescription);
1557
+ const wantsPrevious = descriptionWantsPrevious(targetDescription);
1558
+ const wantsPause = descriptionWantsPause(targetDescription);
1559
+ const wantsResume = descriptionWantsResume(targetDescription);
1560
+ const mediaQueryTokens = extractMediaQueryTokens(targetDescription);
1561
+ const mediaMatchCount = countMatchingTokens(after.playerTitle || "", mediaQueryTokens);
1390
1562
  if (afterUrl.includes("music.youtube.com")) {
1563
+ if (wantsPause && beforePlayerState && beforePlayerState !== afterPlayerState && playerLooksPaused) {
1564
+ return true;
1565
+ }
1566
+ if (wantsResume && playerLooksActive && beforePlayerState !== afterPlayerState) {
1567
+ return true;
1568
+ }
1569
+ if ((wantsNext || wantsPrevious) && beforePlayerTitle && afterPlayerTitle && beforePlayerTitle !== afterPlayerTitle) {
1570
+ return true;
1571
+ }
1572
+ if (mediaQueryTokens.length >= 2 && mediaMatchCount >= Math.max(2, Math.ceil(mediaQueryTokens.length * 0.5)) && playerLooksActive) {
1573
+ return true;
1574
+ }
1391
1575
  if (beforePlayerState && afterPlayerState && beforePlayerState !== afterPlayerState && playerLooksActive) {
1392
1576
  return true;
1393
1577
  }
@@ -1440,6 +1624,23 @@ end tell
1440
1624
  if (!key) {
1441
1625
  throw new Error(`Invalid shortcut: ${shortcut}`);
1442
1626
  }
1627
+ const normalizedShortcut = normalizeText(shortcut).replace(/[\s+-]+/g, "_");
1628
+ const mediaCommandMap = {
1629
+ media_play: "play_pause",
1630
+ media_pause: "play_pause",
1631
+ media_play_pause: "play_pause",
1632
+ media_resume: "play_pause",
1633
+ media_next: "next",
1634
+ media_proxima: "next",
1635
+ media_previous: "previous",
1636
+ media_prev: "previous",
1637
+ media_anterior: "previous",
1638
+ };
1639
+ const mediaCommand = mediaCommandMap[normalizedShortcut];
1640
+ if (mediaCommand) {
1641
+ await this.triggerMacOSMediaTransport(mediaCommand);
1642
+ return;
1643
+ }
1443
1644
  const namedKeyCodes = {
1444
1645
  return: 36,
1445
1646
  enter: 36,
@@ -1465,6 +1666,45 @@ end tell
1465
1666
  `tell application "System Events" to keystroke "${escapeAppleScript(key)}"${usingClause}`,
1466
1667
  ]);
1467
1668
  }
1669
+ async triggerMacOSMediaTransport(command) {
1670
+ const keyTypeMap = {
1671
+ play_pause: 16,
1672
+ next: 17,
1673
+ previous: 18,
1674
+ };
1675
+ const keyType = keyTypeMap[command];
1676
+ const swiftScript = `
1677
+ import AppKit
1678
+ import Foundation
1679
+
1680
+ let keyType = ${keyType}
1681
+
1682
+ func postMediaKey(_ keyType: Int32, down: Bool) {
1683
+ let eventFlags = NSEvent.ModifierFlags(rawValue: 0xA00)
1684
+ let state = down ? 0xA : 0xB
1685
+ let data1 = Int((keyType << 16) | (Int32(state) << 8))
1686
+ guard let event = NSEvent.otherEvent(
1687
+ with: .systemDefined,
1688
+ location: .zero,
1689
+ modifierFlags: eventFlags,
1690
+ timestamp: 0,
1691
+ windowNumber: 0,
1692
+ context: nil,
1693
+ subtype: 8,
1694
+ data1: data1,
1695
+ data2: -1
1696
+ ) else {
1697
+ return
1698
+ }
1699
+ event.cgEvent?.post(tap: .cghidEventTap)
1700
+ }
1701
+
1702
+ postMediaKey(Int32(keyType), down: true)
1703
+ usleep(90000)
1704
+ postMediaKey(Int32(keyType), down: false)
1705
+ `;
1706
+ await this.runCommand("swift", ["-e", swiftScript]);
1707
+ }
1468
1708
  async typeText(text) {
1469
1709
  const previousClipboard = await this.readClipboardText();
1470
1710
  try {
@@ -1477,6 +1717,227 @@ end tell
1477
1717
  }
1478
1718
  }
1479
1719
  }
1720
+ resolveLikelySearchShortcut(app) {
1721
+ const normalizedHint = normalizeText(this.lastVisualTargetDescription || "");
1722
+ const looksLikeSearchTarget = /\b(busca|pesquisa|search|campo|caixa|icone|ícone)\b/.test(normalizedHint);
1723
+ if (!looksLikeSearchTarget || !app) {
1724
+ return null;
1725
+ }
1726
+ if (app === "Spotify") {
1727
+ return "cmd+l";
1728
+ }
1729
+ if (app === "Music") {
1730
+ return "cmd+f";
1731
+ }
1732
+ return null;
1733
+ }
1734
+ async guidedTypeText(text, preferredApp) {
1735
+ const app = preferredApp || this.lastActiveApp || await this.getFrontmostAppName();
1736
+ if (app === "Safari") {
1737
+ const safariResult = await this.trySafariGuidedType(text);
1738
+ if (safariResult.ok) {
1739
+ return {
1740
+ ...safariResult,
1741
+ app,
1742
+ };
1743
+ }
1744
+ }
1745
+ const searchShortcut = this.resolveLikelySearchShortcut(app);
1746
+ if (searchShortcut) {
1747
+ await this.pressShortcut(searchShortcut).catch(() => undefined);
1748
+ await delay(180);
1749
+ }
1750
+ await this.typeText(text);
1751
+ return {
1752
+ ok: true,
1753
+ verified: false,
1754
+ strategy: searchShortcut ? `clipboard_paste_after_${searchShortcut}` : "clipboard_paste",
1755
+ app: app || null,
1756
+ attempts: 1,
1757
+ };
1758
+ }
1759
+ async trySafariGuidedType(text) {
1760
+ for (let attempt = 0; attempt < 3; attempt += 1) {
1761
+ try {
1762
+ const result = await this.runSafariJsonScript(`
1763
+ const inputText = String(__input?.text || "");
1764
+ function isVisible(element) {
1765
+ if (!(element instanceof HTMLElement)) return false;
1766
+ const rect = element.getBoundingClientRect();
1767
+ if (rect.width < 4 || rect.height < 4) return false;
1768
+ const style = window.getComputedStyle(element);
1769
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
1770
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
1771
+ }
1772
+ function isEditable(element) {
1773
+ return element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement || (element instanceof HTMLElement && element.isContentEditable);
1774
+ }
1775
+ function readEditableValue(element) {
1776
+ if (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement) {
1777
+ return String(element.value || "").trim();
1778
+ }
1779
+ if (element instanceof HTMLElement && element.isContentEditable) {
1780
+ return String(element.innerText || element.textContent || "").trim();
1781
+ }
1782
+ return "";
1783
+ }
1784
+ function clearAndFill(element, value) {
1785
+ if (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement) {
1786
+ element.focus();
1787
+ element.select?.();
1788
+ element.value = "";
1789
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "deleteContentBackward", data: null }));
1790
+ element.value = value;
1791
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "insertText", data: value }));
1792
+ element.dispatchEvent(new Event("change", { bubbles: true }));
1793
+ return;
1794
+ }
1795
+ if (element instanceof HTMLElement && element.isContentEditable) {
1796
+ element.focus();
1797
+ const selection = window.getSelection();
1798
+ const range = document.createRange();
1799
+ range.selectNodeContents(element);
1800
+ selection?.removeAllRanges();
1801
+ selection?.addRange(range);
1802
+ document.execCommand("selectAll", false);
1803
+ document.execCommand("delete", false);
1804
+ document.execCommand("insertText", false, value);
1805
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "insertText", data: value }));
1806
+ }
1807
+ }
1808
+ const selectors = location.hostname.includes("music.youtube.com")
1809
+ ? [
1810
+ "ytmusic-search-box input#input",
1811
+ "ytmusic-search-box input",
1812
+ "input[placeholder*='Search']",
1813
+ "input[placeholder*='Pesquisar']",
1814
+ "[role='searchbox'] input"
1815
+ ]
1816
+ : location.hostname.includes("open.spotify.com")
1817
+ ? [
1818
+ "input[data-testid='search-input']",
1819
+ "[role='searchbox'] input",
1820
+ "input[placeholder*='Search']"
1821
+ ]
1822
+ : location.hostname.includes("deezer.com")
1823
+ ? [
1824
+ "input[type='search']",
1825
+ "input[placeholder*='Search']",
1826
+ "input[placeholder*='Pesquisar']",
1827
+ "form input[type='text']"
1828
+ ]
1829
+ : location.hostname.includes("soundcloud.com")
1830
+ ? [
1831
+ "input[type='search']",
1832
+ "input[placeholder*='Search']",
1833
+ "form input[type='search']",
1834
+ "form input[type='text']"
1835
+ ]
1836
+ : location.hostname.includes("music.amazon.com")
1837
+ ? [
1838
+ "input[type='search']",
1839
+ "input[aria-label*='Search']",
1840
+ "input[placeholder*='Search']",
1841
+ "[role='searchbox'] input"
1842
+ ]
1843
+ : [
1844
+ "textarea",
1845
+ "input[type='search']",
1846
+ "input[type='text']",
1847
+ "input:not([type])",
1848
+ "[contenteditable='true'][role='textbox']",
1849
+ "[contenteditable='true']"
1850
+ ];
1851
+ const active = document.activeElement;
1852
+ let target = null;
1853
+ if (active instanceof HTMLElement && isVisible(active) && isEditable(active)) {
1854
+ target = active;
1855
+ }
1856
+ if (!target) {
1857
+ const candidates = selectors
1858
+ .flatMap((selector) => Array.from(document.querySelectorAll(selector)))
1859
+ .filter((node) => node instanceof HTMLElement)
1860
+ .filter((node) => isVisible(node))
1861
+ .map((node, index) => {
1862
+ const label = String(
1863
+ node.getAttribute("aria-label")
1864
+ || node.getAttribute("placeholder")
1865
+ || node.getAttribute("title")
1866
+ || node.textContent
1867
+ || ""
1868
+ ).toLowerCase();
1869
+ let score = 0;
1870
+ if (label.includes("search") || label.includes("pesquis")) score += 80;
1871
+ if (node === document.activeElement) score += 30;
1872
+ score += Math.max(0, 12 - index);
1873
+ return { node, score };
1874
+ })
1875
+ .sort((left, right) => right.score - left.score);
1876
+ target = candidates[0]?.node || null;
1877
+ }
1878
+ if (!(target instanceof HTMLElement) || !isEditable(target)) {
1879
+ return { ok: false, reason: "Nao achei um campo editavel confiavel no Safari.", strategy: "safari_guided_type_no_field" };
1880
+ }
1881
+ target.scrollIntoView({ block: "center", inline: "center", behavior: "auto" });
1882
+ clearAndFill(target, inputText);
1883
+ return {
1884
+ ok: true,
1885
+ actualText: readEditableValue(target),
1886
+ strategy: target === active ? "safari_guided_type_active_field" : "safari_guided_type_search_field",
1887
+ };
1888
+ `, { text });
1889
+ if (result?.ok && typedTextLooksApplied(result.actualText || "", text)) {
1890
+ return {
1891
+ ok: true,
1892
+ verified: true,
1893
+ strategy: result.strategy || "safari_guided_type",
1894
+ attempts: attempt + 1,
1895
+ };
1896
+ }
1897
+ await delay(180);
1898
+ }
1899
+ catch (error) {
1900
+ const detail = error instanceof Error ? error.message : String(error);
1901
+ if (detail.toLowerCase().includes("allow javascript from apple events")) {
1902
+ break;
1903
+ }
1904
+ }
1905
+ }
1906
+ await this.typeText(text);
1907
+ await delay(180);
1908
+ try {
1909
+ const verification = await this.runSafariJsonScript(`
1910
+ function readEditableValue(element) {
1911
+ if (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement) {
1912
+ return String(element.value || "").trim();
1913
+ }
1914
+ if (element instanceof HTMLElement && element.isContentEditable) {
1915
+ return String(element.innerText || element.textContent || "").trim();
1916
+ }
1917
+ return "";
1918
+ }
1919
+ const active = document.activeElement;
1920
+ return { actualText: active ? readEditableValue(active) : "" };
1921
+ `, {});
1922
+ if (typedTextLooksApplied(verification.actualText || "", text)) {
1923
+ return {
1924
+ ok: true,
1925
+ verified: true,
1926
+ strategy: "safari_clipboard_retry",
1927
+ attempts: 4,
1928
+ };
1929
+ }
1930
+ }
1931
+ catch {
1932
+ // ignore and fall back to unverified success below
1933
+ }
1934
+ return {
1935
+ ok: true,
1936
+ verified: false,
1937
+ strategy: "clipboard_paste",
1938
+ attempts: 1,
1939
+ };
1940
+ }
1480
1941
  async scrollView(direction, amount = "medium", steps = 1) {
1481
1942
  const clampedSteps = Math.max(1, Math.min(Math.round(steps || 1), 6));
1482
1943
  const lineDelta = {
@@ -1526,11 +1987,114 @@ end repeat
1526
1987
  `;
1527
1988
  await this.runCommand("osascript", ["-e", script]);
1528
1989
  }
1990
+ hasInstalledBridgeExtension(slug) {
1991
+ return Array.isArray(this.bridgeConfig?.installedExtensions)
1992
+ ? this.bridgeConfig?.installedExtensions.includes(slug)
1993
+ : false;
1994
+ }
1995
+ getWhatsAppWebScriptOptions(activate = false) {
1996
+ return {
1997
+ targetUrlIncludes: "web.whatsapp.com",
1998
+ activate,
1999
+ };
2000
+ }
2001
+ async syncWhatsAppExtensionState(status, notes) {
2002
+ const current = await loadManagedBridgeExtensionState(WHATSAPP_WEB_EXTENSION_SLUG).catch(() => null);
2003
+ if (!current) {
2004
+ return;
2005
+ }
2006
+ await saveManagedBridgeExtensionState(WHATSAPP_WEB_EXTENSION_SLUG, {
2007
+ ...current,
2008
+ status,
2009
+ notes,
2010
+ lastStatusCheckAt: new Date().toISOString(),
2011
+ }).catch(() => undefined);
2012
+ }
2013
+ async readWhatsAppWebSessionState() {
2014
+ return this.runSafariJsonScript(`
2015
+ function isVisible(element) {
2016
+ if (!(element instanceof HTMLElement)) return false;
2017
+ const rect = element.getBoundingClientRect();
2018
+ if (rect.width < 4 || rect.height < 4) return false;
2019
+ const style = window.getComputedStyle(element);
2020
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
2021
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
2022
+ }
2023
+
2024
+ return {
2025
+ title: document.title || "",
2026
+ url: location.href || "",
2027
+ qrVisible: Array.from(document.querySelectorAll('[data-testid="qrcode"], canvas[aria-label*="Scan"], canvas[aria-label*="scan"], div[data-ref] canvas'))
2028
+ .some((node) => node instanceof HTMLElement ? isVisible(node) : true),
2029
+ paneVisible: Array.from(document.querySelectorAll('#pane-side, [data-testid="chat-list"]'))
2030
+ .some((node) => node instanceof HTMLElement && isVisible(node)),
2031
+ searchVisible: Array.from(document.querySelectorAll('[data-testid="chat-list-search"] [contenteditable="true"], div[contenteditable="true"][role="textbox"]'))
2032
+ .some((node) => node instanceof HTMLElement && isVisible(node)),
2033
+ composerVisible: Array.from(document.querySelectorAll('footer [contenteditable="true"], [data-testid="conversation-compose-box-input"]'))
2034
+ .some((node) => node instanceof HTMLElement && isVisible(node)),
2035
+ };
2036
+ `, {}, this.getWhatsAppWebScriptOptions(false)).then((result) => ({
2037
+ title: asString(result.title) || "",
2038
+ url: asString(result.url) || "",
2039
+ qrVisible: result.qrVisible === true,
2040
+ paneVisible: result.paneVisible === true,
2041
+ searchVisible: result.searchVisible === true,
2042
+ composerVisible: result.composerVisible === true,
2043
+ }));
2044
+ }
1529
2045
  async ensureWhatsAppWebReady() {
1530
- const page = await this.readFrontmostPage("Safari");
1531
- if (!normalizeComparableUrl(page.url || "").includes("web.whatsapp.com")) {
1532
- throw new Error("O Safari nao esta aberto no WhatsApp Web.");
2046
+ if (!this.hasInstalledBridgeExtension(WHATSAPP_WEB_EXTENSION_SLUG)) {
2047
+ throw new Error("WhatsApp Web nao esta instalado neste Otto Bridge. Rode `otto-bridge extensions --install whatsappweb` e depois `otto-bridge extensions --setup whatsappweb`.");
2048
+ }
2049
+ const currentState = await loadManagedBridgeExtensionState(WHATSAPP_WEB_EXTENSION_SLUG).catch(() => null);
2050
+ if (!currentState || currentState.status === "installed_needs_setup") {
2051
+ throw new Error("WhatsApp Web ainda nao foi configurado neste Otto Bridge. Rode `otto-bridge extensions --setup whatsappweb` para abrir o QR code.");
2052
+ }
2053
+ let sessionState = null;
2054
+ try {
2055
+ sessionState = await this.readWhatsAppWebSessionState();
1533
2056
  }
2057
+ catch (error) {
2058
+ const detail = error instanceof Error ? error.message : String(error);
2059
+ if (detail.toLowerCase().includes("aba correspondente")) {
2060
+ await this.openUrl(WHATSAPP_WEB_URL, "Safari");
2061
+ await delay(1400);
2062
+ try {
2063
+ sessionState = await this.readWhatsAppWebSessionState();
2064
+ }
2065
+ catch {
2066
+ sessionState = null;
2067
+ }
2068
+ }
2069
+ else {
2070
+ throw error;
2071
+ }
2072
+ }
2073
+ if (!sessionState) {
2074
+ const disconnectedStatus = currentState.status === "connected" || currentState.status === "session_expired"
2075
+ ? "session_expired"
2076
+ : "not_open";
2077
+ await this.syncWhatsAppExtensionState(disconnectedStatus, disconnectedStatus === "session_expired"
2078
+ ? "A sessao do WhatsApp Web expirou ou foi fechada. Rode `otto-bridge extensions --setup whatsappweb` para abrir o login novamente."
2079
+ : "Nao consegui localizar uma aba do WhatsApp Web no Safari.");
2080
+ throw new Error("Nao consegui localizar uma aba do WhatsApp Web no Safari. Rode `otto-bridge extensions --setup whatsappweb` se precisar reabrir a sessao.");
2081
+ }
2082
+ const loggedIn = sessionState.paneVisible || sessionState.searchVisible || sessionState.composerVisible;
2083
+ if (!loggedIn) {
2084
+ const disconnectedStatus = currentState.status === "connected" || currentState.status === "session_expired"
2085
+ ? "session_expired"
2086
+ : "waiting_login";
2087
+ await this.syncWhatsAppExtensionState(disconnectedStatus, disconnectedStatus === "session_expired"
2088
+ ? "A sessao do WhatsApp Web expirou. Rode `otto-bridge extensions --setup whatsappweb` para abrir o QR code novamente."
2089
+ : sessionState.qrVisible
2090
+ ? "QR code visivel. Escaneie com o celular para concluir o login."
2091
+ : "Sessao do WhatsApp Web aberta, mas ainda sem chat disponivel.");
2092
+ if (disconnectedStatus === "session_expired") {
2093
+ throw new Error("A sessao do WhatsApp Web expirou nesta maquina. Rode `otto-bridge extensions --setup whatsappweb` para fazer login de novo e depois `otto-bridge extensions --status whatsappweb`.");
2094
+ }
2095
+ throw new Error("WhatsApp Web ainda nao esta conectado nesta maquina. Rode `otto-bridge extensions --setup whatsappweb`, escaneie o QR code e depois `otto-bridge extensions --status whatsappweb`.");
2096
+ }
2097
+ await this.syncWhatsAppExtensionState("connected", "Sessao local do WhatsApp Web pronta para uso.");
1534
2098
  }
1535
2099
  async selectWhatsAppConversation(contact) {
1536
2100
  const prepared = await this.runSafariJsonScript(`
@@ -1582,7 +2146,7 @@ if (!candidates.length) {
1582
2146
 
1583
2147
  focusAndReplaceContent(candidates[0].element, query);
1584
2148
  return { ok: true };
1585
- `, { contact });
2149
+ `, { contact }, this.getWhatsAppWebScriptOptions(false));
1586
2150
  if (!prepared?.ok) {
1587
2151
  return false;
1588
2152
  }
@@ -1631,11 +2195,12 @@ if (typeof target.click === "function") {
1631
2195
  target.click();
1632
2196
  }
1633
2197
  return { clicked: true };
1634
- `, { contact });
2198
+ `, { contact }, this.getWhatsAppWebScriptOptions(false));
1635
2199
  return Boolean(result?.clicked);
1636
2200
  }
1637
- async focusWhatsAppComposer() {
2201
+ async sendWhatsAppMessage(text) {
1638
2202
  const result = await this.runSafariJsonScript(`
2203
+ const value = String(__input?.text || "");
1639
2204
  function isVisible(element) {
1640
2205
  if (!(element instanceof HTMLElement)) return false;
1641
2206
  const rect = element.getBoundingClientRect();
@@ -1645,22 +2210,66 @@ function isVisible(element) {
1645
2210
  return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
1646
2211
  }
1647
2212
 
1648
- const candidates = Array.from(document.querySelectorAll('footer div[contenteditable="true"], [data-testid="conversation-compose-box-input"], main footer [contenteditable="true"]'))
2213
+ function clearAndFillComposer(element, nextValue) {
2214
+ element.focus();
2215
+ if (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement) {
2216
+ element.value = "";
2217
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "deleteContentBackward", data: null }));
2218
+ element.value = nextValue;
2219
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "insertText", data: nextValue }));
2220
+ return;
2221
+ }
2222
+ const selection = window.getSelection();
2223
+ const range = document.createRange();
2224
+ range.selectNodeContents(element);
2225
+ selection?.removeAllRanges();
2226
+ selection?.addRange(range);
2227
+ document.execCommand("selectAll", false);
2228
+ document.execCommand("delete", false);
2229
+ document.execCommand("insertText", false, nextValue);
2230
+ if ((element.innerText || "").trim() !== nextValue.trim()) {
2231
+ element.textContent = nextValue;
2232
+ }
2233
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "insertText", data: nextValue }));
2234
+ }
2235
+
2236
+ const candidates = Array.from(document.querySelectorAll('footer div[contenteditable="true"], [data-testid="conversation-compose-box-input"], main footer [contenteditable="true"], footer textarea'))
1649
2237
  .filter((node) => node instanceof HTMLElement)
1650
2238
  .filter((node) => isVisible(node))
1651
2239
  .sort((left, right) => right.getBoundingClientRect().top - left.getBoundingClientRect().top);
1652
2240
 
1653
2241
  if (!candidates.length) {
1654
- return { focused: false, reason: "Nao achei o campo de mensagem do WhatsApp Web." };
2242
+ return { sent: false, reason: "Nao achei o campo de mensagem do WhatsApp Web." };
1655
2243
  }
1656
2244
 
1657
2245
  const composer = candidates[0];
1658
- composer.focus();
2246
+ clearAndFillComposer(composer, value);
1659
2247
  composer.click();
1660
- return { focused: true };
1661
- `);
1662
- if (!result?.focused) {
1663
- throw new Error(result?.reason || "Nao consegui focar o campo de mensagem do WhatsApp Web.");
2248
+
2249
+ const sendCandidates = Array.from(document.querySelectorAll('[data-testid="compose-btn-send"], button[aria-label*="Send"], button[aria-label*="Enviar"], span[data-icon="send"], div[role="button"][aria-label*="Send"], div[role="button"][aria-label*="Enviar"]'))
2250
+ .map((node) => node instanceof HTMLElement ? (node.closest('button, div[role="button"]') || node) : null)
2251
+ .filter((node) => node instanceof HTMLElement)
2252
+ .filter((node) => isVisible(node));
2253
+
2254
+ const sendButton = sendCandidates[0];
2255
+ if (sendButton instanceof HTMLElement) {
2256
+ sendButton.scrollIntoView({ block: "center", inline: "center", behavior: "auto" });
2257
+ sendButton.dispatchEvent(new MouseEvent("mousedown", { bubbles: true, cancelable: true, view: window }));
2258
+ sendButton.dispatchEvent(new MouseEvent("mouseup", { bubbles: true, cancelable: true, view: window }));
2259
+ sendButton.dispatchEvent(new MouseEvent("click", { bubbles: true, cancelable: true, view: window }));
2260
+ if (typeof sendButton.click === "function") {
2261
+ sendButton.click();
2262
+ }
2263
+ return { sent: true };
2264
+ }
2265
+
2266
+ composer.dispatchEvent(new KeyboardEvent("keydown", { key: "Enter", code: "Enter", keyCode: 13, which: 13, bubbles: true }));
2267
+ composer.dispatchEvent(new KeyboardEvent("keypress", { key: "Enter", code: "Enter", keyCode: 13, which: 13, bubbles: true }));
2268
+ composer.dispatchEvent(new KeyboardEvent("keyup", { key: "Enter", code: "Enter", keyCode: 13, which: 13, bubbles: true }));
2269
+ return { sent: true };
2270
+ `, { text }, this.getWhatsAppWebScriptOptions(false));
2271
+ if (!result?.sent) {
2272
+ throw new Error(result?.reason || "Nao consegui enviar a mensagem no WhatsApp Web.");
1664
2273
  }
1665
2274
  }
1666
2275
  async readWhatsAppVisibleConversation(contact, limit) {
@@ -1690,7 +2299,7 @@ const messages = containers.map((node) => {
1690
2299
  }).filter((item) => item.text);
1691
2300
 
1692
2301
  return { messages: messages.slice(-maxMessages) };
1693
- `, { contact, limit });
2302
+ `, { contact, limit }, this.getWhatsAppWebScriptOptions(false));
1694
2303
  const messages = Array.isArray(result?.messages)
1695
2304
  ? result.messages
1696
2305
  .map((item) => ({
@@ -1804,7 +2413,7 @@ return { messages: messages.slice(-maxMessages) };
1804
2413
  reason: verificationAnswer,
1805
2414
  };
1806
2415
  }
1807
- async runSafariJsonScript(scriptBody, input) {
2416
+ async runSafariJsonScript(scriptBody, input, options) {
1808
2417
  const wrappedScript = `
1809
2418
  (function(){
1810
2419
  const __input = ${JSON.stringify(input || null)};
@@ -1821,12 +2430,42 @@ ${scriptBody}
1821
2430
  }
1822
2431
  })()
1823
2432
  `;
2433
+ const targetUrlIncludes = String(options?.targetUrlIncludes || "").trim();
2434
+ const shouldActivate = options?.activate !== false;
1824
2435
  const script = `
2436
+ set targetUrlIncludes to "${escapeAppleScript(targetUrlIncludes)}"
2437
+ set shouldActivate to ${shouldActivate ? "true" : "false"}
1825
2438
  tell application "Safari"
1826
- activate
1827
2439
  if (count of windows) = 0 then error "Safari nao possui janelas abertas."
2440
+ if shouldActivate then activate
2441
+ set targetWindow to missing value
2442
+ set targetTab to missing value
2443
+ if targetUrlIncludes is not "" then
2444
+ repeat with safariWindow in windows
2445
+ repeat with safariTab in tabs of safariWindow
2446
+ set tabUrl to ""
2447
+ try
2448
+ set tabUrl to URL of safariTab
2449
+ end try
2450
+ if tabUrl contains targetUrlIncludes then
2451
+ set targetWindow to safariWindow
2452
+ set targetTab to safariTab
2453
+ exit repeat
2454
+ end if
2455
+ end repeat
2456
+ if targetTab is not missing value then exit repeat
2457
+ end repeat
2458
+ if targetTab is missing value then error "Safari nao possui aba correspondente a " & targetUrlIncludes
2459
+ else
2460
+ set targetWindow to front window
2461
+ set targetTab to current tab of front window
2462
+ end if
2463
+ if shouldActivate and targetWindow is not missing value then
2464
+ set current tab of targetWindow to targetTab
2465
+ set index of targetWindow to 1
2466
+ end if
1828
2467
  delay 0.2
1829
- set scriptResult to do JavaScript "${escapeAppleScript(wrappedScript)}" in current tab of front window
2468
+ set scriptResult to do JavaScript "${escapeAppleScript(wrappedScript)}" in targetTab
1830
2469
  end tell
1831
2470
  return scriptResult
1832
2471
  `;
@@ -1849,11 +2488,17 @@ const normalizedDescription = normalize(rawDescription);
1849
2488
  const isYouTubeMusic = location.hostname.includes("music.youtube.com");
1850
2489
  const wantsFirst = /\\b(primeir[ao]?|first)\\b/.test(normalizedDescription);
1851
2490
  const wantsVideo = /\\b(video|videos|musica|faixa|youtube|resultado|watch)\\b/.test(normalizedDescription) || location.hostname.includes("youtube");
2491
+ const wantsNext = /\\b(proxim[ao]?|next|skip|pular|avanca|avancar)\\b/.test(normalizedDescription);
2492
+ const wantsPrevious = /\\b(anterior|previous|volta[ar]?|back|retorna[ar]?)\\b/.test(normalizedDescription);
2493
+ const wantsPause = /\\b(pausa|pause|pausar)\\b/.test(normalizedDescription);
2494
+ const wantsResume = /\\b(retoma|retomar|resume|continu[ae]r|despausa|play)\\b/.test(normalizedDescription);
1852
2495
  const stopWords = new Set([
1853
2496
  "o", "a", "os", "as", "um", "uma", "uns", "umas", "de", "da", "do", "das", "dos",
1854
2497
  "em", "no", "na", "nos", "nas", "para", "por", "com", "que", "visivel", "visiveis",
1855
2498
  "visivel", "tela", "pagina", "page", "site", "link", "botao", "botao", "clicar",
1856
- "clique", "seleciona", "selecionar", "resultado", "resultados"
2499
+ "clique", "seleciona", "selecionar", "resultado", "resultados", "youtube", "music",
2500
+ "melhor", "tocavel", "relacionado", "diretamente", "navegador", "player", "fila",
2501
+ "play", "pause", "pausa", "proxima", "proximo", "anterior"
1857
2502
  ]);
1858
2503
  const quotedPhrases = Array.from(rawDescription.matchAll(/["'“”‘’]([^"'“”‘’]{2,80})["'“”‘’]/g))
1859
2504
  .map((match) => normalize(match[1]));
@@ -1928,6 +2573,173 @@ function deriveText(element) {
1928
2573
  return "";
1929
2574
  }
1930
2575
 
2576
+ function clickElement(element, strategy, matchedText, matchedHref, score, totalCandidates) {
2577
+ if (!(element instanceof HTMLElement || element instanceof HTMLAnchorElement)) {
2578
+ return null;
2579
+ }
2580
+ element.scrollIntoView({ block: "center", inline: "center", behavior: "auto" });
2581
+ const rect = element.getBoundingClientRect();
2582
+ for (const eventName of ["mouseover", "mousedown", "mouseup", "click"]) {
2583
+ element.dispatchEvent(new MouseEvent(eventName, {
2584
+ bubbles: true,
2585
+ cancelable: true,
2586
+ view: window,
2587
+ clientX: rect.left + (rect.width / 2),
2588
+ clientY: rect.top + (rect.height / 2),
2589
+ }));
2590
+ }
2591
+ if (typeof element.click === "function") {
2592
+ element.click();
2593
+ }
2594
+ return {
2595
+ clicked: true,
2596
+ matchedText: String(matchedText || "").slice(0, 180),
2597
+ matchedHref: String(matchedHref || ""),
2598
+ score,
2599
+ totalCandidates,
2600
+ strategy,
2601
+ };
2602
+ }
2603
+
2604
+ function attemptYouTubeMusicTransportClick() {
2605
+ if (!isYouTubeMusic || !(wantsNext || wantsPrevious || wantsPause || wantsResume)) {
2606
+ return null;
2607
+ }
2608
+
2609
+ const playerButtons = Array.from(document.querySelectorAll(
2610
+ "ytmusic-player-bar button, ytmusic-player-bar [role='button'], ytmusic-player-bar tp-yt-paper-icon-button"
2611
+ ))
2612
+ .filter((node) => node instanceof HTMLElement)
2613
+ .filter((node) => isVisible(node));
2614
+
2615
+ const ranked = playerButtons
2616
+ .map((node, index) => {
2617
+ const label = normalize([
2618
+ deriveText(node),
2619
+ node.getAttribute("aria-label"),
2620
+ node.getAttribute("title"),
2621
+ node.id,
2622
+ ].filter(Boolean).join(" "));
2623
+ let score = 0;
2624
+ if (wantsNext && /proxim|next|skip/.test(label)) score += 140;
2625
+ if (wantsPrevious && /anterior|previous|back|volta/.test(label)) score += 140;
2626
+ if (wantsPause && /pause|pausa|pausar/.test(label)) score += 140;
2627
+ if (wantsResume && /play|tocar|reproduzir|resume|retomar|continuar/.test(label)) score += 140;
2628
+ if (label.includes("player")) score += 12;
2629
+ score += Math.max(0, 12 - index);
2630
+ return score > 0 ? { node, label, score } : null;
2631
+ })
2632
+ .filter(Boolean)
2633
+ .sort((left, right) => right.score - left.score);
2634
+
2635
+ if (!ranked.length) {
2636
+ return null;
2637
+ }
2638
+
2639
+ const winner = ranked[0];
2640
+ return clickElement(winner.node, "safari_dom_ytmusic_transport", winner.label, "", winner.score, ranked.length);
2641
+ }
2642
+
2643
+ function attemptYouTubeMusicSearchResultClick() {
2644
+ if (!isYouTubeMusic || wantsNext || wantsPrevious || wantsPause) {
2645
+ return null;
2646
+ }
2647
+ if (!quotedPhrases.length && !tokens.length) {
2648
+ return null;
2649
+ }
2650
+
2651
+ const rows = Array.from(document.querySelectorAll("ytmusic-responsive-list-item-renderer"))
2652
+ .filter((node) => node instanceof HTMLElement)
2653
+ .filter((node) => isVisible(node));
2654
+
2655
+ const rankedRows = rows
2656
+ .map((row, index) => {
2657
+ const titleNode = row.querySelector("#title, .title, yt-formatted-string.title");
2658
+ const subtitleNode = row.querySelector(".subtitle, .byline, .secondary-flex-columns");
2659
+ const titleText = String((titleNode && titleNode.textContent) || "").trim();
2660
+ const subtitleText = String((subtitleNode && subtitleNode.textContent) || "").trim();
2661
+ const rowText = deriveText(row);
2662
+ const normalizedTitle = normalize(titleText);
2663
+ const normalizedSubtitle = normalize(subtitleText);
2664
+ const normalizedRow = normalize(rowText);
2665
+ let score = 0;
2666
+
2667
+ for (const phrase of quotedPhrases) {
2668
+ if (!phrase) continue;
2669
+ if (normalizedTitle.includes(phrase)) score += 160;
2670
+ else if (normalizedRow.includes(phrase)) score += 110;
2671
+ }
2672
+
2673
+ for (const token of tokens) {
2674
+ if (normalizedTitle.includes(token)) score += 28;
2675
+ else if (normalizedSubtitle.includes(token)) score += 16;
2676
+ else if (normalizedRow.includes(token)) score += 10;
2677
+ }
2678
+
2679
+ if (tokens.length > 1) {
2680
+ const titleMatches = tokens.filter((token) => normalizedTitle.includes(token)).length;
2681
+ const rowMatches = tokens.filter((token) => normalizedRow.includes(token)).length;
2682
+ if (titleMatches >= Math.max(2, Math.ceil(tokens.length * 0.5))) score += 80;
2683
+ if (rowMatches === tokens.length) score += 40;
2684
+ }
2685
+
2686
+ score += Math.max(0, 10 - index);
2687
+
2688
+ const clickableCandidates = Array.from(row.querySelectorAll(
2689
+ "ytmusic-item-thumbnail-overlay-renderer button, button[aria-label], tp-yt-paper-icon-button, a[href*='watch?v=']"
2690
+ ))
2691
+ .filter((candidate) => candidate instanceof HTMLElement || candidate instanceof HTMLAnchorElement)
2692
+ .filter((candidate) => isVisible(candidate))
2693
+ .map((candidate) => {
2694
+ const label = normalize([
2695
+ deriveText(candidate),
2696
+ candidate.getAttribute("aria-label"),
2697
+ candidate.getAttribute("title"),
2698
+ ].filter(Boolean).join(" "));
2699
+ let candidateScore = 0;
2700
+ if (/play|tocar|reproduzir|assistir/.test(label)) candidateScore += 30;
2701
+ if (candidate instanceof HTMLAnchorElement && normalize(candidate.href).includes("watch?v=")) candidateScore += 18;
2702
+ if (candidate.closest("ytmusic-item-thumbnail-overlay-renderer")) candidateScore += 14;
2703
+ return { candidate, label, candidateScore };
2704
+ })
2705
+ .sort((left, right) => right.candidateScore - left.candidateScore);
2706
+
2707
+ return score > 0 ? {
2708
+ row,
2709
+ titleText,
2710
+ href: clickableCandidates[0]?.candidate instanceof HTMLAnchorElement ? clickableCandidates[0].candidate.href : "",
2711
+ score: score + (clickableCandidates[0]?.candidateScore || 0),
2712
+ target: clickableCandidates[0]?.candidate || row,
2713
+ } : null;
2714
+ })
2715
+ .filter(Boolean)
2716
+ .sort((left, right) => right.score - left.score);
2717
+
2718
+ if (!rankedRows.length) {
2719
+ return null;
2720
+ }
2721
+
2722
+ const winner = rankedRows[0];
2723
+ return clickElement(
2724
+ winner.target,
2725
+ "safari_dom_ytmusic_result",
2726
+ winner.titleText || deriveText(winner.row),
2727
+ winner.href || "",
2728
+ winner.score,
2729
+ rankedRows.length,
2730
+ );
2731
+ }
2732
+
2733
+ const ytmTransport = attemptYouTubeMusicTransportClick();
2734
+ if (ytmTransport) {
2735
+ return ytmTransport;
2736
+ }
2737
+
2738
+ const ytmResult = attemptYouTubeMusicSearchResultClick();
2739
+ if (ytmResult) {
2740
+ return ytmResult;
2741
+ }
2742
+
1931
2743
  function scoreCandidate(element, rank) {
1932
2744
  const text = deriveText(element);
1933
2745
  const href = element instanceof HTMLAnchorElement
@@ -2048,7 +2860,8 @@ tell application "Safari"
2048
2860
  activate
2049
2861
  if (count of windows) = 0 then error "Safari nao possui janelas abertas."
2050
2862
  delay 1
2051
- set pageJson to do JavaScript "(function(){const title=document.title||''; const url=location.href||''; const text=((document.body&&document.body.innerText)||'').trim().slice(0, 12000); const playerButton=document.querySelector('ytmusic-player-bar #play-pause-button, ytmusic-player-bar tp-yt-paper-icon-button#play-pause-button, ytmusic-player-bar tp-yt-paper-icon-button.play-pause-button'); const playerTitle=(Array.from(document.querySelectorAll('ytmusic-player-bar .title, ytmusic-player-bar .content-info-wrapper .title, ytmusic-player-bar [slot=\"title\"]')).map((node)=>((node&&node.textContent)||'').trim()).find(Boolean))||''; const playerState=(playerButton&&((playerButton.getAttribute('title')||playerButton.getAttribute('aria-label')||playerButton.textContent)||'').trim())||''; return JSON.stringify({title:title,url:url,text:text,playerTitle:playerTitle,playerState:playerState});})();" in current tab of front window
2863
+ set jsCode to "(function(){const title=document.title||'';const url=location.href||'';const text=((document.body&&document.body.innerText)||'').trim().slice(0,12000);const playerButton=document.querySelector('ytmusic-player-bar #play-pause-button, ytmusic-player-bar tp-yt-paper-icon-button#play-pause-button, ytmusic-player-bar tp-yt-paper-icon-button.play-pause-button');const playerTitle=(Array.from(document.querySelectorAll('ytmusic-player-bar .title, ytmusic-player-bar .content-info-wrapper .title, ytmusic-player-bar [slot=title]')).map((node)=>((node&&node.textContent)||'').trim()).find(Boolean))||'';const playerState=(playerButton&&((playerButton.getAttribute('title')||playerButton.getAttribute('aria-label')||playerButton.textContent)||'').trim())||'';return JSON.stringify({title,url,text,playerTitle,playerState});})();"
2864
+ set pageJson to do JavaScript jsCode in current tab of front window
2052
2865
  end tell
2053
2866
  return pageJson
2054
2867
  `;
@@ -2549,7 +3362,15 @@ if let output = String(data: data, encoding: .utf8) {
2549
3362
  return `${action.app} ficou em foco no macOS`;
2550
3363
  }
2551
3364
  if (action.type === "press_shortcut") {
2552
- return `Atalho ${action.shortcut} executado no macOS`;
3365
+ const mediaSummaryMap = {
3366
+ media_next: "Comando de próxima mídia executado no macOS",
3367
+ media_previous: "Comando de mídia anterior executado no macOS",
3368
+ media_pause: "Comando de pausar mídia executado no macOS",
3369
+ media_resume: "Comando de retomar mídia executado no macOS",
3370
+ media_play: "Comando de reproduzir mídia executado no macOS",
3371
+ media_play_pause: "Comando de play/pause de mídia executado no macOS",
3372
+ };
3373
+ return mediaSummaryMap[action.shortcut] || `Atalho ${action.shortcut} executado no macOS`;
2553
3374
  }
2554
3375
  if (action.type === "create_note") {
2555
3376
  return `Nota criada no Notes: ${action.title || deriveNoteTitle(action.text)}`;