@leg3ndy/otto-bridge 0.5.6 → 0.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -129,6 +129,102 @@ function extractMeaningfulDescriptionTokens(value) {
129
129
  .map((token) => token.trim())
130
130
  .filter((token) => token.length >= 3 && !GENERIC_VISUAL_STOP_WORDS.has(token))));
131
131
  }
132
+ const MEDIA_QUERY_STOP_WORDS = new Set([
133
+ "youtube",
134
+ "music",
135
+ "spotify",
136
+ "apple",
137
+ "player",
138
+ "play",
139
+ "pause",
140
+ "pausa",
141
+ "pausar",
142
+ "tocar",
143
+ "toque",
144
+ "reproduzir",
145
+ "reproducao",
146
+ "retomar",
147
+ "retoma",
148
+ "continuar",
149
+ "continue",
150
+ "resumir",
151
+ "resume",
152
+ "next",
153
+ "skip",
154
+ "proxima",
155
+ "proximo",
156
+ "anterior",
157
+ "previous",
158
+ "voltar",
159
+ "volta",
160
+ "melhor",
161
+ "tocavel",
162
+ "relacionado",
163
+ "diretamente",
164
+ "navegador",
165
+ "fila",
166
+ "playerbar",
167
+ ]);
168
+ function extractMediaQueryTokens(value) {
169
+ return extractMeaningfulDescriptionTokens(value).filter((token) => !MEDIA_QUERY_STOP_WORDS.has(token));
170
+ }
171
+ function countMatchingTokens(text, tokens) {
172
+ const normalizedText = normalizeText(text || "");
173
+ if (!normalizedText || !tokens.length) {
174
+ return 0;
175
+ }
176
+ return tokens.reduce((total, token) => total + (normalizedText.includes(token) ? 1 : 0), 0);
177
+ }
178
+ function typedTextLooksApplied(actual, expected) {
179
+ const normalizedActual = normalizeText(actual || "").replace(/\s+/g, " ").trim();
180
+ const normalizedExpected = normalizeText(expected || "").replace(/\s+/g, " ").trim();
181
+ if (!normalizedActual || !normalizedExpected) {
182
+ return false;
183
+ }
184
+ if (normalizedActual === normalizedExpected
185
+ || normalizedActual.includes(normalizedExpected)
186
+ || normalizedExpected.includes(normalizedActual)) {
187
+ return true;
188
+ }
189
+ const expectedTokens = normalizedExpected.split(/[^a-z0-9]+/).filter((token) => token.length >= 3);
190
+ if (!expectedTokens.length) {
191
+ return false;
192
+ }
193
+ const matches = expectedTokens.filter((token) => normalizedActual.includes(token)).length;
194
+ return matches >= Math.max(2, Math.ceil(expectedTokens.length * 0.6));
195
+ }
196
+ function descriptionWantsNext(description) {
197
+ return /\b(proxim[ao]?|next|skip|pular|avanca|avanç[ae])\b/.test(normalizeText(description || ""));
198
+ }
199
+ function descriptionWantsPrevious(description) {
200
+ return /\b(anterior|previous|volta[ar]?|back|retorna[ar]?)\b/.test(normalizeText(description || ""));
201
+ }
202
+ function descriptionWantsPause(description) {
203
+ return /\b(pausa|pause|pausar)\b/.test(normalizeText(description || ""));
204
+ }
205
+ function descriptionWantsResume(description) {
206
+ return /\b(retoma|retomar|resume|continu[ae]r|despausa|play)\b/.test(normalizeText(description || ""));
207
+ }
208
+ function extractNativeMediaTransportCommand(description) {
209
+ const normalizedDescription = normalizeText(description || "");
210
+ if (!normalizedDescription
211
+ || /\b(resultado|tocavel|relacionado|faixa|search|busca|pesquisa)\b/.test(normalizedDescription)) {
212
+ return null;
213
+ }
214
+ if (!/\b(player|controle|fila|music|spotify|youtube|deezer|apple)\b/.test(normalizedDescription)) {
215
+ return null;
216
+ }
217
+ if (descriptionWantsNext(description)) {
218
+ return "next";
219
+ }
220
+ if (descriptionWantsPrevious(description)) {
221
+ return "previous";
222
+ }
223
+ if (descriptionWantsPause(description) || descriptionWantsResume(description)) {
224
+ return "play_pause";
225
+ }
226
+ return null;
227
+ }
132
228
  function descriptionLikelyHasTextAnchor(description) {
133
229
  return extractQuotedPhrases(description).length > 0 || extractMeaningfulDescriptionTokens(description).length > 0;
134
230
  }
@@ -665,6 +761,44 @@ function parseStructuredActions(job) {
665
761
  }
666
762
  continue;
667
763
  }
764
+ if (type === "scroll_view" || type === "scroll" || type === "scroll_page") {
765
+ const rawDirection = (asString(action.direction) || asString(action.dir) || "down").toLowerCase();
766
+ const direction = rawDirection === "up" || rawDirection === "cima" ? "up" : "down";
767
+ const rawAmount = (asString(action.amount) || "").toLowerCase();
768
+ const amount = rawAmount === "small" || rawAmount === "medium" || rawAmount === "large"
769
+ ? rawAmount
770
+ : undefined;
771
+ const rawSteps = Number(action.steps);
772
+ const steps = Number.isFinite(rawSteps) ? Math.max(1, Math.min(Math.round(rawSteps), 6)) : undefined;
773
+ actions.push({
774
+ type: "scroll_view",
775
+ direction,
776
+ amount,
777
+ steps,
778
+ app: asString(action.app) || undefined,
779
+ });
780
+ continue;
781
+ }
782
+ if (type === "whatsapp_send_message") {
783
+ const contact = asString(action.contact) || asString(action.recipient);
784
+ const text = asString(action.text) || asString(action.message);
785
+ if (contact && text) {
786
+ actions.push({ type: "whatsapp_send_message", contact, text });
787
+ }
788
+ continue;
789
+ }
790
+ if (type === "whatsapp_read_chat") {
791
+ const contact = asString(action.contact) || asString(action.recipient);
792
+ const rawLimit = Number(action.limit);
793
+ if (contact) {
794
+ actions.push({
795
+ type: "whatsapp_read_chat",
796
+ contact,
797
+ limit: Number.isFinite(rawLimit) ? Math.max(1, Math.min(Math.round(rawLimit), 30)) : undefined,
798
+ });
799
+ }
800
+ continue;
801
+ }
668
802
  if (type === "click_visual_target" || type === "click_target") {
669
803
  const description = asString(action.description) || asString(action.target);
670
804
  if (description) {
@@ -744,6 +878,23 @@ function deriveActionsFromText(job) {
744
878
  }
745
879
  return [{ type: "set_volume", level }];
746
880
  }
881
+ if (/\b(scroll|rola\w*|role\w*|desce\w*|sobe\w*)\b/i.test(task)) {
882
+ const direction = /\b(sobe\w*|suba\w*|para cima|pra cima|scroll up)\b/i.test(task) ? "up" : "down";
883
+ const amount = /\b(pouco|leve|small)\b/i.test(task)
884
+ ? "small"
885
+ : /\b(muito|bastante|fim|grande|large)\b/i.test(task)
886
+ ? "large"
887
+ : "medium";
888
+ const stepsMatch = task.match(/\b(\d{1,2})\s*(?:x|vezes?)\b/i);
889
+ const steps = stepsMatch?.[1] ? Math.max(1, Math.min(Number(stepsMatch[1]), 6)) : 1;
890
+ return [{
891
+ type: "scroll_view",
892
+ direction,
893
+ amount,
894
+ steps,
895
+ app: detectedApp || undefined,
896
+ }];
897
+ }
747
898
  if ((normalizedTask.includes("leia") || normalizedTask.includes("ler")) && detectedUrl) {
748
899
  return [
749
900
  { type: "open_url", url: detectedUrl, app: detectedApp || "Safari" },
@@ -777,6 +928,8 @@ export class NativeMacOSJobExecutor {
777
928
  cancelledJobs = new Set();
778
929
  activeChild = null;
779
930
  lastActiveApp = null;
931
+ lastVisualTargetDescription = null;
932
+ lastVisualTargetApp = null;
780
933
  constructor(bridgeConfig) {
781
934
  this.bridgeConfig = bridgeConfig;
782
935
  }
@@ -836,8 +989,24 @@ export class NativeMacOSJobExecutor {
836
989
  continue;
837
990
  }
838
991
  if (action.type === "type_text") {
839
- await reporter.progress(progressPercent, "Digitando texto no app ativo");
840
- await this.typeText(action.text);
992
+ const typingApp = this.lastActiveApp || await this.getFrontmostAppName();
993
+ await reporter.progress(progressPercent, `Digitando texto em ${typingApp || "app ativo"}`);
994
+ const typed = await this.guidedTypeText(action.text, typingApp || undefined);
995
+ if (!typed.ok) {
996
+ throw new Error(typed.reason || "Nao consegui digitar o texto no app ativo.");
997
+ }
998
+ resultPayload.last_typed = {
999
+ strategy: typed.strategy,
1000
+ verified: typed.verified,
1001
+ app: typed.app,
1002
+ attempts: typed.attempts,
1003
+ text_preview: clipText(action.text, 180),
1004
+ };
1005
+ this.lastVisualTargetDescription = null;
1006
+ this.lastVisualTargetApp = null;
1007
+ completionNotes.push(typed.verified
1008
+ ? `Digitei e confirmei o texto no ${typed.app || "app ativo"}.`
1009
+ : `Digitei o texto no ${typed.app || "app ativo"}.`);
841
1010
  continue;
842
1011
  }
843
1012
  if (action.type === "take_screenshot") {
@@ -934,6 +1103,68 @@ export class NativeMacOSJobExecutor {
934
1103
  completionNotes.push(`Volume ajustado para ${action.level}% no macOS.`);
935
1104
  continue;
936
1105
  }
1106
+ if (action.type === "scroll_view") {
1107
+ const scrollApp = action.app || this.lastActiveApp || await this.getFrontmostAppName();
1108
+ if (scrollApp) {
1109
+ await reporter.progress(progressPercent, `Trazendo ${scrollApp} para frente antes de rolar a tela`);
1110
+ await this.focusApp(scrollApp);
1111
+ }
1112
+ const directionLabel = action.direction === "up" ? "cima" : "baixo";
1113
+ await reporter.progress(progressPercent, `Rolando a tela para ${directionLabel}`);
1114
+ await this.scrollView(action.direction, action.amount, action.steps);
1115
+ resultPayload.last_scroll = {
1116
+ direction: action.direction,
1117
+ amount: action.amount || "medium",
1118
+ steps: action.steps || 1,
1119
+ app: scrollApp || null,
1120
+ };
1121
+ completionNotes.push(`Rolei a tela para ${directionLabel} no macOS.`);
1122
+ continue;
1123
+ }
1124
+ if (action.type === "whatsapp_send_message") {
1125
+ await reporter.progress(progressPercent, `Abrindo a conversa do WhatsApp com ${action.contact}`);
1126
+ await this.focusApp("Safari");
1127
+ await this.ensureWhatsAppWebReady();
1128
+ const selected = await this.selectWhatsAppConversation(action.contact);
1129
+ if (!selected) {
1130
+ throw new Error(`Nao consegui localizar a conversa do WhatsApp com ${action.contact}.`);
1131
+ }
1132
+ await reporter.progress(progressPercent, `Digitando a mensagem para ${action.contact} no WhatsApp`);
1133
+ await this.focusWhatsAppComposer();
1134
+ await this.typeText(action.text);
1135
+ await delay(250);
1136
+ await this.pressShortcut("return");
1137
+ await delay(900);
1138
+ const verification = await this.verifyWhatsAppLastMessage(action.text);
1139
+ if (!verification.ok) {
1140
+ throw new Error(verification.reason || `Nao consegui confirmar o envio da mensagem para ${action.contact} no WhatsApp.`);
1141
+ }
1142
+ resultPayload.whatsapp = {
1143
+ action: "send_message",
1144
+ contact: action.contact,
1145
+ text_preview: clipText(action.text, 180),
1146
+ };
1147
+ completionNotes.push(`Enviei a mensagem no WhatsApp para ${action.contact}.`);
1148
+ continue;
1149
+ }
1150
+ if (action.type === "whatsapp_read_chat") {
1151
+ await reporter.progress(progressPercent, `Abrindo a conversa do WhatsApp com ${action.contact}`);
1152
+ await this.focusApp("Safari");
1153
+ await this.ensureWhatsAppWebReady();
1154
+ const selected = await this.selectWhatsAppConversation(action.contact);
1155
+ if (!selected) {
1156
+ throw new Error(`Nao consegui localizar a conversa do WhatsApp com ${action.contact}.`);
1157
+ }
1158
+ await delay(500);
1159
+ const chat = await this.readWhatsAppVisibleConversation(action.contact, action.limit || 12);
1160
+ resultPayload.whatsapp = {
1161
+ action: "read_chat",
1162
+ contact: action.contact,
1163
+ messages: chat.messages,
1164
+ };
1165
+ completionNotes.push(`Mensagens visiveis no WhatsApp com ${action.contact}:\n${chat.summary}`);
1166
+ continue;
1167
+ }
937
1168
  if (action.type === "click_visual_target") {
938
1169
  const browserApp = await this.resolveLikelyBrowserApp(action.app);
939
1170
  if (browserApp) {
@@ -952,6 +1183,42 @@ export class NativeMacOSJobExecutor {
952
1183
  const initialBrowserState = browserApp
953
1184
  ? await this.captureBrowserPageState(browserApp).catch(() => null)
954
1185
  : null;
1186
+ const nativeMediaTransport = extractNativeMediaTransportCommand(targetDescription);
1187
+ if (nativeMediaTransport) {
1188
+ await reporter.progress(progressPercent, `Tentando controle de mídia nativo do macOS para ${targetDescription}`);
1189
+ try {
1190
+ await this.triggerMacOSMediaTransport(nativeMediaTransport);
1191
+ let validated = false;
1192
+ let validationReason = "";
1193
+ if (action.verification_prompt) {
1194
+ const verification = await this.validateVisualClickWithVision(job.job_id, targetDescription, action.verification_prompt, progressPercent, reporter, artifacts, "native_media_transport_result");
1195
+ validated = verification.ok;
1196
+ validationReason = verification.reason;
1197
+ }
1198
+ else if (browserApp) {
1199
+ const browserValidation = await this.confirmBrowserClick(browserApp, initialBrowserState, targetDescription, null);
1200
+ validated = browserValidation.ok;
1201
+ validationReason = browserValidation.reason;
1202
+ }
1203
+ else {
1204
+ validated = true;
1205
+ }
1206
+ if (validated) {
1207
+ resultPayload.last_click = {
1208
+ strategy: "native_media_transport",
1209
+ matched_text: targetDescription,
1210
+ };
1211
+ completionNotes.push(`Acionei ${targetDescription} usando o controle de mídia nativo do macOS.`);
1212
+ clickSucceeded = true;
1213
+ break;
1214
+ }
1215
+ lastFailureReason = validationReason || `O controle de mídia nativo do macOS nao confirmou ${targetDescription}.`;
1216
+ await reporter.progress(progressPercent, "O controle de mídia nativo nao foi suficiente; vou tentar DOM/OCR");
1217
+ }
1218
+ catch (error) {
1219
+ lastFailureReason = error instanceof Error ? error.message : String(error);
1220
+ }
1221
+ }
955
1222
  if (browserApp === "Safari") {
956
1223
  await reporter.progress(progressPercent, `Tentando localizar ${targetDescription} diretamente no Safari`);
957
1224
  const domClick = await this.trySafariDomClick(targetDescription);
@@ -969,6 +1236,8 @@ export class NativeMacOSJobExecutor {
969
1236
  validationReason = browserValidation.reason;
970
1237
  }
971
1238
  if (validated) {
1239
+ this.lastVisualTargetDescription = targetDescription;
1240
+ this.lastVisualTargetApp = browserApp || action.app || this.lastActiveApp;
972
1241
  resultPayload.last_click = {
973
1242
  strategy: domClick.strategy || "safari_dom",
974
1243
  matched_text: domClick.matchedText || null,
@@ -1010,6 +1279,8 @@ export class NativeMacOSJobExecutor {
1010
1279
  }
1011
1280
  if (validated) {
1012
1281
  const region = ocrClick.region || null;
1282
+ this.lastVisualTargetDescription = targetDescription;
1283
+ this.lastVisualTargetApp = browserApp || action.app || this.lastActiveApp;
1013
1284
  resultPayload.last_click = {
1014
1285
  strategy: ocrClick.strategy || "local_ocr",
1015
1286
  score: ocrClick.score || null,
@@ -1085,6 +1356,8 @@ export class NativeMacOSJobExecutor {
1085
1356
  }
1086
1357
  }
1087
1358
  completionNotes.push(`Localizei e cliquei em ${targetDescription}.`);
1359
+ this.lastVisualTargetDescription = targetDescription;
1360
+ this.lastVisualTargetApp = browserApp || action.app || this.lastActiveApp;
1088
1361
  clickSucceeded = true;
1089
1362
  break;
1090
1363
  }
@@ -1270,7 +1543,26 @@ end tell
1270
1543
  const beforePlayerState = normalizeText(before?.playerState || "");
1271
1544
  const afterPlayerState = normalizeText(after.playerState || "");
1272
1545
  const playerLooksActive = afterPlayerState.includes("pause") || afterPlayerState.includes("pausar");
1546
+ const playerLooksPaused = !playerLooksActive && /play|tocar|reproduzir|continuar|retomar|resume/.test(afterPlayerState);
1547
+ const wantsNext = descriptionWantsNext(targetDescription);
1548
+ const wantsPrevious = descriptionWantsPrevious(targetDescription);
1549
+ const wantsPause = descriptionWantsPause(targetDescription);
1550
+ const wantsResume = descriptionWantsResume(targetDescription);
1551
+ const mediaQueryTokens = extractMediaQueryTokens(targetDescription);
1552
+ const mediaMatchCount = countMatchingTokens(after.playerTitle || "", mediaQueryTokens);
1273
1553
  if (afterUrl.includes("music.youtube.com")) {
1554
+ if (wantsPause && beforePlayerState && beforePlayerState !== afterPlayerState && playerLooksPaused) {
1555
+ return true;
1556
+ }
1557
+ if (wantsResume && playerLooksActive && beforePlayerState !== afterPlayerState) {
1558
+ return true;
1559
+ }
1560
+ if ((wantsNext || wantsPrevious) && beforePlayerTitle && afterPlayerTitle && beforePlayerTitle !== afterPlayerTitle) {
1561
+ return true;
1562
+ }
1563
+ if (mediaQueryTokens.length >= 2 && mediaMatchCount >= Math.max(2, Math.ceil(mediaQueryTokens.length * 0.5)) && playerLooksActive) {
1564
+ return true;
1565
+ }
1274
1566
  if (beforePlayerState && afterPlayerState && beforePlayerState !== afterPlayerState && playerLooksActive) {
1275
1567
  return true;
1276
1568
  }
@@ -1323,6 +1615,23 @@ end tell
1323
1615
  if (!key) {
1324
1616
  throw new Error(`Invalid shortcut: ${shortcut}`);
1325
1617
  }
1618
+ const normalizedShortcut = normalizeText(shortcut).replace(/[\s+-]+/g, "_");
1619
+ const mediaCommandMap = {
1620
+ media_play: "play_pause",
1621
+ media_pause: "play_pause",
1622
+ media_play_pause: "play_pause",
1623
+ media_resume: "play_pause",
1624
+ media_next: "next",
1625
+ media_proxima: "next",
1626
+ media_previous: "previous",
1627
+ media_prev: "previous",
1628
+ media_anterior: "previous",
1629
+ };
1630
+ const mediaCommand = mediaCommandMap[normalizedShortcut];
1631
+ if (mediaCommand) {
1632
+ await this.triggerMacOSMediaTransport(mediaCommand);
1633
+ return;
1634
+ }
1326
1635
  const namedKeyCodes = {
1327
1636
  return: 36,
1328
1637
  enter: 36,
@@ -1348,6 +1657,45 @@ end tell
1348
1657
  `tell application "System Events" to keystroke "${escapeAppleScript(key)}"${usingClause}`,
1349
1658
  ]);
1350
1659
  }
1660
+ async triggerMacOSMediaTransport(command) {
1661
+ const keyTypeMap = {
1662
+ play_pause: 16,
1663
+ next: 17,
1664
+ previous: 18,
1665
+ };
1666
+ const keyType = keyTypeMap[command];
1667
+ const swiftScript = `
1668
+ import AppKit
1669
+ import Foundation
1670
+
1671
+ let keyType = ${keyType}
1672
+
1673
+ func postMediaKey(_ keyType: Int32, down: Bool) {
1674
+ let eventFlags = NSEvent.ModifierFlags(rawValue: 0xA00)
1675
+ let state = down ? 0xA : 0xB
1676
+ let data1 = Int((keyType << 16) | (Int32(state) << 8))
1677
+ guard let event = NSEvent.otherEvent(
1678
+ with: .systemDefined,
1679
+ location: .zero,
1680
+ modifierFlags: eventFlags,
1681
+ timestamp: 0,
1682
+ windowNumber: 0,
1683
+ context: nil,
1684
+ subtype: 8,
1685
+ data1: data1,
1686
+ data2: -1
1687
+ ) else {
1688
+ return
1689
+ }
1690
+ event.cgEvent?.post(tap: .cghidEventTap)
1691
+ }
1692
+
1693
+ postMediaKey(Int32(keyType), down: true)
1694
+ usleep(90000)
1695
+ postMediaKey(Int32(keyType), down: false)
1696
+ `;
1697
+ await this.runCommand("swift", ["-e", swiftScript]);
1698
+ }
1351
1699
  async typeText(text) {
1352
1700
  const previousClipboard = await this.readClipboardText();
1353
1701
  try {
@@ -1360,6 +1708,474 @@ end tell
1360
1708
  }
1361
1709
  }
1362
1710
  }
1711
+ resolveLikelySearchShortcut(app) {
1712
+ const normalizedHint = normalizeText(this.lastVisualTargetDescription || "");
1713
+ const looksLikeSearchTarget = /\b(busca|pesquisa|search|campo|caixa|icone|ícone)\b/.test(normalizedHint);
1714
+ if (!looksLikeSearchTarget || !app) {
1715
+ return null;
1716
+ }
1717
+ if (app === "Spotify") {
1718
+ return "cmd+l";
1719
+ }
1720
+ if (app === "Music") {
1721
+ return "cmd+f";
1722
+ }
1723
+ return null;
1724
+ }
1725
+ async guidedTypeText(text, preferredApp) {
1726
+ const app = preferredApp || this.lastActiveApp || await this.getFrontmostAppName();
1727
+ if (app === "Safari") {
1728
+ const safariResult = await this.trySafariGuidedType(text);
1729
+ if (safariResult.ok) {
1730
+ return {
1731
+ ...safariResult,
1732
+ app,
1733
+ };
1734
+ }
1735
+ }
1736
+ const searchShortcut = this.resolveLikelySearchShortcut(app);
1737
+ if (searchShortcut) {
1738
+ await this.pressShortcut(searchShortcut).catch(() => undefined);
1739
+ await delay(180);
1740
+ }
1741
+ await this.typeText(text);
1742
+ return {
1743
+ ok: true,
1744
+ verified: false,
1745
+ strategy: searchShortcut ? `clipboard_paste_after_${searchShortcut}` : "clipboard_paste",
1746
+ app: app || null,
1747
+ attempts: 1,
1748
+ };
1749
+ }
1750
+ async trySafariGuidedType(text) {
1751
+ for (let attempt = 0; attempt < 3; attempt += 1) {
1752
+ try {
1753
+ const result = await this.runSafariJsonScript(`
1754
+ const inputText = String(__input?.text || "");
1755
+ function isVisible(element) {
1756
+ if (!(element instanceof HTMLElement)) return false;
1757
+ const rect = element.getBoundingClientRect();
1758
+ if (rect.width < 4 || rect.height < 4) return false;
1759
+ const style = window.getComputedStyle(element);
1760
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
1761
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
1762
+ }
1763
+ function isEditable(element) {
1764
+ return element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement || (element instanceof HTMLElement && element.isContentEditable);
1765
+ }
1766
+ function readEditableValue(element) {
1767
+ if (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement) {
1768
+ return String(element.value || "").trim();
1769
+ }
1770
+ if (element instanceof HTMLElement && element.isContentEditable) {
1771
+ return String(element.innerText || element.textContent || "").trim();
1772
+ }
1773
+ return "";
1774
+ }
1775
+ function clearAndFill(element, value) {
1776
+ if (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement) {
1777
+ element.focus();
1778
+ element.select?.();
1779
+ element.value = "";
1780
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "deleteContentBackward", data: null }));
1781
+ element.value = value;
1782
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "insertText", data: value }));
1783
+ element.dispatchEvent(new Event("change", { bubbles: true }));
1784
+ return;
1785
+ }
1786
+ if (element instanceof HTMLElement && element.isContentEditable) {
1787
+ element.focus();
1788
+ const selection = window.getSelection();
1789
+ const range = document.createRange();
1790
+ range.selectNodeContents(element);
1791
+ selection?.removeAllRanges();
1792
+ selection?.addRange(range);
1793
+ document.execCommand("selectAll", false);
1794
+ document.execCommand("delete", false);
1795
+ document.execCommand("insertText", false, value);
1796
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "insertText", data: value }));
1797
+ }
1798
+ }
1799
+ const selectors = location.hostname.includes("music.youtube.com")
1800
+ ? [
1801
+ "ytmusic-search-box input#input",
1802
+ "ytmusic-search-box input",
1803
+ "input[placeholder*='Search']",
1804
+ "input[placeholder*='Pesquisar']",
1805
+ "[role='searchbox'] input"
1806
+ ]
1807
+ : location.hostname.includes("open.spotify.com")
1808
+ ? [
1809
+ "input[data-testid='search-input']",
1810
+ "[role='searchbox'] input",
1811
+ "input[placeholder*='Search']"
1812
+ ]
1813
+ : location.hostname.includes("deezer.com")
1814
+ ? [
1815
+ "input[type='search']",
1816
+ "input[placeholder*='Search']",
1817
+ "input[placeholder*='Pesquisar']",
1818
+ "form input[type='text']"
1819
+ ]
1820
+ : location.hostname.includes("soundcloud.com")
1821
+ ? [
1822
+ "input[type='search']",
1823
+ "input[placeholder*='Search']",
1824
+ "form input[type='search']",
1825
+ "form input[type='text']"
1826
+ ]
1827
+ : location.hostname.includes("music.amazon.com")
1828
+ ? [
1829
+ "input[type='search']",
1830
+ "input[aria-label*='Search']",
1831
+ "input[placeholder*='Search']",
1832
+ "[role='searchbox'] input"
1833
+ ]
1834
+ : [
1835
+ "textarea",
1836
+ "input[type='search']",
1837
+ "input[type='text']",
1838
+ "input:not([type])",
1839
+ "[contenteditable='true'][role='textbox']",
1840
+ "[contenteditable='true']"
1841
+ ];
1842
+ const active = document.activeElement;
1843
+ let target = null;
1844
+ if (active instanceof HTMLElement && isVisible(active) && isEditable(active)) {
1845
+ target = active;
1846
+ }
1847
+ if (!target) {
1848
+ const candidates = selectors
1849
+ .flatMap((selector) => Array.from(document.querySelectorAll(selector)))
1850
+ .filter((node) => node instanceof HTMLElement)
1851
+ .filter((node) => isVisible(node))
1852
+ .map((node, index) => {
1853
+ const label = String(
1854
+ node.getAttribute("aria-label")
1855
+ || node.getAttribute("placeholder")
1856
+ || node.getAttribute("title")
1857
+ || node.textContent
1858
+ || ""
1859
+ ).toLowerCase();
1860
+ let score = 0;
1861
+ if (label.includes("search") || label.includes("pesquis")) score += 80;
1862
+ if (node === document.activeElement) score += 30;
1863
+ score += Math.max(0, 12 - index);
1864
+ return { node, score };
1865
+ })
1866
+ .sort((left, right) => right.score - left.score);
1867
+ target = candidates[0]?.node || null;
1868
+ }
1869
+ if (!(target instanceof HTMLElement) || !isEditable(target)) {
1870
+ return { ok: false, reason: "Nao achei um campo editavel confiavel no Safari.", strategy: "safari_guided_type_no_field" };
1871
+ }
1872
+ target.scrollIntoView({ block: "center", inline: "center", behavior: "auto" });
1873
+ clearAndFill(target, inputText);
1874
+ return {
1875
+ ok: true,
1876
+ actualText: readEditableValue(target),
1877
+ strategy: target === active ? "safari_guided_type_active_field" : "safari_guided_type_search_field",
1878
+ };
1879
+ `, { text });
1880
+ if (result?.ok && typedTextLooksApplied(result.actualText || "", text)) {
1881
+ return {
1882
+ ok: true,
1883
+ verified: true,
1884
+ strategy: result.strategy || "safari_guided_type",
1885
+ attempts: attempt + 1,
1886
+ };
1887
+ }
1888
+ await delay(180);
1889
+ }
1890
+ catch (error) {
1891
+ const detail = error instanceof Error ? error.message : String(error);
1892
+ if (detail.toLowerCase().includes("allow javascript from apple events")) {
1893
+ break;
1894
+ }
1895
+ }
1896
+ }
1897
+ await this.typeText(text);
1898
+ await delay(180);
1899
+ try {
1900
+ const verification = await this.runSafariJsonScript(`
1901
+ function readEditableValue(element) {
1902
+ if (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement) {
1903
+ return String(element.value || "").trim();
1904
+ }
1905
+ if (element instanceof HTMLElement && element.isContentEditable) {
1906
+ return String(element.innerText || element.textContent || "").trim();
1907
+ }
1908
+ return "";
1909
+ }
1910
+ const active = document.activeElement;
1911
+ return { actualText: active ? readEditableValue(active) : "" };
1912
+ `, {});
1913
+ if (typedTextLooksApplied(verification.actualText || "", text)) {
1914
+ return {
1915
+ ok: true,
1916
+ verified: true,
1917
+ strategy: "safari_clipboard_retry",
1918
+ attempts: 4,
1919
+ };
1920
+ }
1921
+ }
1922
+ catch {
1923
+ // ignore and fall back to unverified success below
1924
+ }
1925
+ return {
1926
+ ok: true,
1927
+ verified: false,
1928
+ strategy: "clipboard_paste",
1929
+ attempts: 1,
1930
+ };
1931
+ }
1932
+ async scrollView(direction, amount = "medium", steps = 1) {
1933
+ const clampedSteps = Math.max(1, Math.min(Math.round(steps || 1), 6));
1934
+ const lineDelta = {
1935
+ small: 4,
1936
+ medium: 8,
1937
+ large: 14,
1938
+ }[amount];
1939
+ const signedDelta = direction === "up" ? lineDelta : -lineDelta;
1940
+ const iterations = {
1941
+ small: 1,
1942
+ medium: 2,
1943
+ large: 3,
1944
+ }[amount];
1945
+ const swiftScript = `
1946
+ import ApplicationServices
1947
+ import Foundation
1948
+
1949
+ let wheelDelta: Int32 = ${signedDelta}
1950
+ let stepCount = ${clampedSteps}
1951
+ let iterations = ${iterations}
1952
+
1953
+ for _ in 0..<stepCount {
1954
+ for _ in 0..<iterations {
1955
+ if let event = CGEvent(scrollWheelEvent2Source: nil, units: .line, wheelCount: 1, wheel1: wheelDelta, wheel2: 0, wheel3: 0) {
1956
+ event.post(tap: .cghidEventTap)
1957
+ }
1958
+ usleep(35000)
1959
+ }
1960
+ usleep(85000)
1961
+ }
1962
+ `;
1963
+ try {
1964
+ await this.runCommand("swift", ["-e", swiftScript]);
1965
+ }
1966
+ catch {
1967
+ await this.scrollViewWithPageKeys(direction, clampedSteps);
1968
+ }
1969
+ }
1970
+ async scrollViewWithPageKeys(direction, steps) {
1971
+ const keyCode = direction === "up" ? 116 : 121;
1972
+ const clampedSteps = Math.max(1, Math.min(Math.round(steps || 1), 6));
1973
+ const script = `
1974
+ repeat ${clampedSteps} times
1975
+ tell application "System Events" to key code ${keyCode}
1976
+ delay 0.06
1977
+ end repeat
1978
+ `;
1979
+ await this.runCommand("osascript", ["-e", script]);
1980
+ }
1981
+ async ensureWhatsAppWebReady() {
1982
+ const page = await this.readFrontmostPage("Safari");
1983
+ if (!normalizeComparableUrl(page.url || "").includes("web.whatsapp.com")) {
1984
+ throw new Error("O Safari nao esta aberto no WhatsApp Web.");
1985
+ }
1986
+ }
1987
+ async selectWhatsAppConversation(contact) {
1988
+ const prepared = await this.runSafariJsonScript(`
1989
+ const query = String(__input?.contact || "");
1990
+ const normalize = (value) => String(value || "").normalize("NFD").replace(/[\\u0300-\\u036f]/g, "").toLowerCase().trim();
1991
+ const normalizedQuery = normalize(query);
1992
+
1993
+ function isVisible(element) {
1994
+ if (!(element instanceof HTMLElement)) return false;
1995
+ const rect = element.getBoundingClientRect();
1996
+ if (rect.width < 4 || rect.height < 4) return false;
1997
+ const style = window.getComputedStyle(element);
1998
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
1999
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
2000
+ }
2001
+
2002
+ function focusAndReplaceContent(element, value) {
2003
+ element.focus();
2004
+ const range = document.createRange();
2005
+ range.selectNodeContents(element);
2006
+ const selection = window.getSelection();
2007
+ selection?.removeAllRanges();
2008
+ selection?.addRange(range);
2009
+ document.execCommand("selectAll", false);
2010
+ document.execCommand("delete", false);
2011
+ document.execCommand("insertText", false, value);
2012
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, data: value, inputType: "insertText" }));
2013
+ }
2014
+
2015
+ const candidates = Array.from(document.querySelectorAll('div[contenteditable="true"][role="textbox"], div[contenteditable="true"][data-tab], [data-testid="chat-list-search"] [contenteditable="true"]'))
2016
+ .filter((node) => node instanceof HTMLElement)
2017
+ .filter((node) => isVisible(node))
2018
+ .map((node) => {
2019
+ const element = node;
2020
+ const rect = element.getBoundingClientRect();
2021
+ const label = normalize(element.getAttribute("aria-label") || element.getAttribute("data-testid") || element.textContent || "");
2022
+ let score = 0;
2023
+ if (rect.left < window.innerWidth * 0.45) score += 30;
2024
+ if (rect.top < 240) score += 30;
2025
+ if (label.includes("search") || label.includes("pesquisar") || label.includes("procure") || label.includes("chat list")) score += 80;
2026
+ if (element.closest('[data-testid="chat-list-search"], header')) score += 25;
2027
+ return { element, score };
2028
+ })
2029
+ .sort((left, right) => right.score - left.score);
2030
+
2031
+ if (!candidates.length) {
2032
+ return { ok: false, reason: "Nao achei o campo de busca do WhatsApp Web." };
2033
+ }
2034
+
2035
+ focusAndReplaceContent(candidates[0].element, query);
2036
+ return { ok: true };
2037
+ `, { contact });
2038
+ if (!prepared?.ok) {
2039
+ return false;
2040
+ }
2041
+ await delay(900);
2042
+ const result = await this.runSafariJsonScript(`
2043
+ const query = String(__input?.contact || "");
2044
+ const normalize = (value) => String(value || "").normalize("NFD").replace(/[\\u0300-\\u036f]/g, "").toLowerCase().trim();
2045
+ const normalizedQuery = normalize(query);
2046
+
2047
+ function isVisible(element) {
2048
+ if (!(element instanceof HTMLElement)) return false;
2049
+ const rect = element.getBoundingClientRect();
2050
+ if (rect.width < 6 || rect.height < 6) return false;
2051
+ const style = window.getComputedStyle(element);
2052
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
2053
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
2054
+ }
2055
+
2056
+ const titleNodes = Array.from(document.querySelectorAll('span[title], div[title]'))
2057
+ .filter((node) => node instanceof HTMLElement)
2058
+ .filter((node) => isVisible(node))
2059
+ .map((node) => {
2060
+ const text = normalize(node.getAttribute("title") || node.textContent || "");
2061
+ let score = 0;
2062
+ if (text === normalizedQuery) score += 160;
2063
+ if (text.includes(normalizedQuery)) score += 100;
2064
+ if (normalizedQuery.includes(text) && text.length >= 3) score += 50;
2065
+ const container = node.closest('[data-testid="cell-frame-container"], [role="listitem"], [role="gridcell"], div[tabindex]');
2066
+ if (container instanceof HTMLElement && isVisible(container)) score += 20;
2067
+ return { node, container, text, score };
2068
+ })
2069
+ .filter((item) => item.score > 0)
2070
+ .sort((left, right) => right.score - left.score);
2071
+
2072
+ if (!titleNodes.length) {
2073
+ return { clicked: false, reason: "Nao achei uma conversa visivel com esse nome." };
2074
+ }
2075
+
2076
+ const winner = titleNodes[0];
2077
+ const target = winner.container instanceof HTMLElement ? winner.container : winner.node;
2078
+ target.scrollIntoView({ block: "center", inline: "center", behavior: "auto" });
2079
+ target.dispatchEvent(new MouseEvent("mousedown", { bubbles: true, cancelable: true, view: window }));
2080
+ target.dispatchEvent(new MouseEvent("mouseup", { bubbles: true, cancelable: true, view: window }));
2081
+ target.dispatchEvent(new MouseEvent("click", { bubbles: true, cancelable: true, view: window }));
2082
+ if (typeof target.click === "function") {
2083
+ target.click();
2084
+ }
2085
+ return { clicked: true };
2086
+ `, { contact });
2087
+ return Boolean(result?.clicked);
2088
+ }
2089
+ async focusWhatsAppComposer() {
2090
+ const result = await this.runSafariJsonScript(`
2091
+ function isVisible(element) {
2092
+ if (!(element instanceof HTMLElement)) return false;
2093
+ const rect = element.getBoundingClientRect();
2094
+ if (rect.width < 6 || rect.height < 6) return false;
2095
+ const style = window.getComputedStyle(element);
2096
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
2097
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
2098
+ }
2099
+
2100
+ const candidates = Array.from(document.querySelectorAll('footer div[contenteditable="true"], [data-testid="conversation-compose-box-input"], main footer [contenteditable="true"]'))
2101
+ .filter((node) => node instanceof HTMLElement)
2102
+ .filter((node) => isVisible(node))
2103
+ .sort((left, right) => right.getBoundingClientRect().top - left.getBoundingClientRect().top);
2104
+
2105
+ if (!candidates.length) {
2106
+ return { focused: false, reason: "Nao achei o campo de mensagem do WhatsApp Web." };
2107
+ }
2108
+
2109
+ const composer = candidates[0];
2110
+ composer.focus();
2111
+ composer.click();
2112
+ return { focused: true };
2113
+ `);
2114
+ if (!result?.focused) {
2115
+ throw new Error(result?.reason || "Nao consegui focar o campo de mensagem do WhatsApp Web.");
2116
+ }
2117
+ }
2118
+ async readWhatsAppVisibleConversation(contact, limit) {
2119
+ const result = await this.runSafariJsonScript(`
2120
+ const maxMessages = Number(__input?.limit || 12);
2121
+
2122
+ function isVisible(element) {
2123
+ if (!(element instanceof HTMLElement)) return false;
2124
+ const rect = element.getBoundingClientRect();
2125
+ if (rect.width < 6 || rect.height < 6) return false;
2126
+ const style = window.getComputedStyle(element);
2127
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
2128
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
2129
+ }
2130
+
2131
+ const containers = Array.from(document.querySelectorAll('[data-testid="msg-container"], div[data-id]'))
2132
+ .filter((node) => node instanceof HTMLElement)
2133
+ .filter((node) => isVisible(node));
2134
+
2135
+ const messages = containers.map((node) => {
2136
+ const element = node;
2137
+ const prePlain = element.querySelector('[data-pre-plain-text]')?.getAttribute('data-pre-plain-text') || "";
2138
+ const authorMatch = prePlain.match(/\\]\\s*([^:]+):/);
2139
+ const author = authorMatch?.[1]?.trim() || (element.getAttribute('data-testid')?.includes('out') ? 'Voce' : 'Contato');
2140
+ const text = (element.innerText || "").trim().replace(/\\n{2,}/g, "\\n");
2141
+ return { author, text };
2142
+ }).filter((item) => item.text);
2143
+
2144
+ return { messages: messages.slice(-maxMessages) };
2145
+ `, { contact, limit });
2146
+ const messages = Array.isArray(result?.messages)
2147
+ ? result.messages
2148
+ .map((item) => ({
2149
+ author: clipText(asString(item.author) || "Contato", 80),
2150
+ text: clipText(asString(item.text) || "", 500),
2151
+ }))
2152
+ .filter((item) => item.text)
2153
+ : [];
2154
+ return {
2155
+ messages,
2156
+ summary: messages.length
2157
+ ? messages.map((item) => `${item.author}: ${item.text}`).join("\n")
2158
+ : `(sem mensagens visiveis na conversa com ${contact})`,
2159
+ };
2160
+ }
2161
+ async verifyWhatsAppLastMessage(expectedText) {
2162
+ const chat = await this.readWhatsAppVisibleConversation("Contato", 6);
2163
+ if (!chat.messages.length) {
2164
+ return {
2165
+ ok: false,
2166
+ reason: "Nao consegui ler as mensagens visiveis apos o envio no WhatsApp.",
2167
+ };
2168
+ }
2169
+ const normalizedExpected = normalizeText(expectedText).slice(0, 60);
2170
+ const matched = chat.messages.some((item) => normalizeText(item.text).includes(normalizedExpected));
2171
+ if (!matched) {
2172
+ return {
2173
+ ok: false,
2174
+ reason: "Nao consegui confirmar visualmente a mensagem enviada no WhatsApp.",
2175
+ };
2176
+ }
2177
+ return { ok: true, reason: "" };
2178
+ }
1363
2179
  async takeScreenshot(targetPath) {
1364
2180
  const artifactsDir = path.join(os.homedir(), ".otto-bridge", "artifacts");
1365
2181
  await mkdir(artifactsDir, { recursive: true });
@@ -1485,11 +2301,17 @@ const normalizedDescription = normalize(rawDescription);
1485
2301
  const isYouTubeMusic = location.hostname.includes("music.youtube.com");
1486
2302
  const wantsFirst = /\\b(primeir[ao]?|first)\\b/.test(normalizedDescription);
1487
2303
  const wantsVideo = /\\b(video|videos|musica|faixa|youtube|resultado|watch)\\b/.test(normalizedDescription) || location.hostname.includes("youtube");
2304
+ const wantsNext = /\\b(proxim[ao]?|next|skip|pular|avanca|avancar)\\b/.test(normalizedDescription);
2305
+ const wantsPrevious = /\\b(anterior|previous|volta[ar]?|back|retorna[ar]?)\\b/.test(normalizedDescription);
2306
+ const wantsPause = /\\b(pausa|pause|pausar)\\b/.test(normalizedDescription);
2307
+ const wantsResume = /\\b(retoma|retomar|resume|continu[ae]r|despausa|play)\\b/.test(normalizedDescription);
1488
2308
  const stopWords = new Set([
1489
2309
  "o", "a", "os", "as", "um", "uma", "uns", "umas", "de", "da", "do", "das", "dos",
1490
2310
  "em", "no", "na", "nos", "nas", "para", "por", "com", "que", "visivel", "visiveis",
1491
2311
  "visivel", "tela", "pagina", "page", "site", "link", "botao", "botao", "clicar",
1492
- "clique", "seleciona", "selecionar", "resultado", "resultados"
2312
+ "clique", "seleciona", "selecionar", "resultado", "resultados", "youtube", "music",
2313
+ "melhor", "tocavel", "relacionado", "diretamente", "navegador", "player", "fila",
2314
+ "play", "pause", "pausa", "proxima", "proximo", "anterior"
1493
2315
  ]);
1494
2316
  const quotedPhrases = Array.from(rawDescription.matchAll(/["'“”‘’]([^"'“”‘’]{2,80})["'“”‘’]/g))
1495
2317
  .map((match) => normalize(match[1]));
@@ -1564,6 +2386,173 @@ function deriveText(element) {
1564
2386
  return "";
1565
2387
  }
1566
2388
 
2389
+ function clickElement(element, strategy, matchedText, matchedHref, score, totalCandidates) {
2390
+ if (!(element instanceof HTMLElement || element instanceof HTMLAnchorElement)) {
2391
+ return null;
2392
+ }
2393
+ element.scrollIntoView({ block: "center", inline: "center", behavior: "auto" });
2394
+ const rect = element.getBoundingClientRect();
2395
+ for (const eventName of ["mouseover", "mousedown", "mouseup", "click"]) {
2396
+ element.dispatchEvent(new MouseEvent(eventName, {
2397
+ bubbles: true,
2398
+ cancelable: true,
2399
+ view: window,
2400
+ clientX: rect.left + (rect.width / 2),
2401
+ clientY: rect.top + (rect.height / 2),
2402
+ }));
2403
+ }
2404
+ if (typeof element.click === "function") {
2405
+ element.click();
2406
+ }
2407
+ return {
2408
+ clicked: true,
2409
+ matchedText: String(matchedText || "").slice(0, 180),
2410
+ matchedHref: String(matchedHref || ""),
2411
+ score,
2412
+ totalCandidates,
2413
+ strategy,
2414
+ };
2415
+ }
2416
+
2417
+ function attemptYouTubeMusicTransportClick() {
2418
+ if (!isYouTubeMusic || !(wantsNext || wantsPrevious || wantsPause || wantsResume)) {
2419
+ return null;
2420
+ }
2421
+
2422
+ const playerButtons = Array.from(document.querySelectorAll(
2423
+ "ytmusic-player-bar button, ytmusic-player-bar [role='button'], ytmusic-player-bar tp-yt-paper-icon-button"
2424
+ ))
2425
+ .filter((node) => node instanceof HTMLElement)
2426
+ .filter((node) => isVisible(node));
2427
+
2428
+ const ranked = playerButtons
2429
+ .map((node, index) => {
2430
+ const label = normalize([
2431
+ deriveText(node),
2432
+ node.getAttribute("aria-label"),
2433
+ node.getAttribute("title"),
2434
+ node.id,
2435
+ ].filter(Boolean).join(" "));
2436
+ let score = 0;
2437
+ if (wantsNext && /proxim|next|skip/.test(label)) score += 140;
2438
+ if (wantsPrevious && /anterior|previous|back|volta/.test(label)) score += 140;
2439
+ if (wantsPause && /pause|pausa|pausar/.test(label)) score += 140;
2440
+ if (wantsResume && /play|tocar|reproduzir|resume|retomar|continuar/.test(label)) score += 140;
2441
+ if (label.includes("player")) score += 12;
2442
+ score += Math.max(0, 12 - index);
2443
+ return score > 0 ? { node, label, score } : null;
2444
+ })
2445
+ .filter(Boolean)
2446
+ .sort((left, right) => right.score - left.score);
2447
+
2448
+ if (!ranked.length) {
2449
+ return null;
2450
+ }
2451
+
2452
+ const winner = ranked[0];
2453
+ return clickElement(winner.node, "safari_dom_ytmusic_transport", winner.label, "", winner.score, ranked.length);
2454
+ }
2455
+
2456
+ function attemptYouTubeMusicSearchResultClick() {
2457
+ if (!isYouTubeMusic || wantsNext || wantsPrevious || wantsPause) {
2458
+ return null;
2459
+ }
2460
+ if (!quotedPhrases.length && !tokens.length) {
2461
+ return null;
2462
+ }
2463
+
2464
+ const rows = Array.from(document.querySelectorAll("ytmusic-responsive-list-item-renderer"))
2465
+ .filter((node) => node instanceof HTMLElement)
2466
+ .filter((node) => isVisible(node));
2467
+
2468
+ const rankedRows = rows
2469
+ .map((row, index) => {
2470
+ const titleNode = row.querySelector("#title, .title, yt-formatted-string.title");
2471
+ const subtitleNode = row.querySelector(".subtitle, .byline, .secondary-flex-columns");
2472
+ const titleText = String((titleNode && titleNode.textContent) || "").trim();
2473
+ const subtitleText = String((subtitleNode && subtitleNode.textContent) || "").trim();
2474
+ const rowText = deriveText(row);
2475
+ const normalizedTitle = normalize(titleText);
2476
+ const normalizedSubtitle = normalize(subtitleText);
2477
+ const normalizedRow = normalize(rowText);
2478
+ let score = 0;
2479
+
2480
+ for (const phrase of quotedPhrases) {
2481
+ if (!phrase) continue;
2482
+ if (normalizedTitle.includes(phrase)) score += 160;
2483
+ else if (normalizedRow.includes(phrase)) score += 110;
2484
+ }
2485
+
2486
+ for (const token of tokens) {
2487
+ if (normalizedTitle.includes(token)) score += 28;
2488
+ else if (normalizedSubtitle.includes(token)) score += 16;
2489
+ else if (normalizedRow.includes(token)) score += 10;
2490
+ }
2491
+
2492
+ if (tokens.length > 1) {
2493
+ const titleMatches = tokens.filter((token) => normalizedTitle.includes(token)).length;
2494
+ const rowMatches = tokens.filter((token) => normalizedRow.includes(token)).length;
2495
+ if (titleMatches >= Math.max(2, Math.ceil(tokens.length * 0.5))) score += 80;
2496
+ if (rowMatches === tokens.length) score += 40;
2497
+ }
2498
+
2499
+ score += Math.max(0, 10 - index);
2500
+
2501
+ const clickableCandidates = Array.from(row.querySelectorAll(
2502
+ "ytmusic-item-thumbnail-overlay-renderer button, button[aria-label], tp-yt-paper-icon-button, a[href*='watch?v=']"
2503
+ ))
2504
+ .filter((candidate) => candidate instanceof HTMLElement || candidate instanceof HTMLAnchorElement)
2505
+ .filter((candidate) => isVisible(candidate))
2506
+ .map((candidate) => {
2507
+ const label = normalize([
2508
+ deriveText(candidate),
2509
+ candidate.getAttribute("aria-label"),
2510
+ candidate.getAttribute("title"),
2511
+ ].filter(Boolean).join(" "));
2512
+ let candidateScore = 0;
2513
+ if (/play|tocar|reproduzir|assistir/.test(label)) candidateScore += 30;
2514
+ if (candidate instanceof HTMLAnchorElement && normalize(candidate.href).includes("watch?v=")) candidateScore += 18;
2515
+ if (candidate.closest("ytmusic-item-thumbnail-overlay-renderer")) candidateScore += 14;
2516
+ return { candidate, label, candidateScore };
2517
+ })
2518
+ .sort((left, right) => right.candidateScore - left.candidateScore);
2519
+
2520
+ return score > 0 ? {
2521
+ row,
2522
+ titleText,
2523
+ href: clickableCandidates[0]?.candidate instanceof HTMLAnchorElement ? clickableCandidates[0].candidate.href : "",
2524
+ score: score + (clickableCandidates[0]?.candidateScore || 0),
2525
+ target: clickableCandidates[0]?.candidate || row,
2526
+ } : null;
2527
+ })
2528
+ .filter(Boolean)
2529
+ .sort((left, right) => right.score - left.score);
2530
+
2531
+ if (!rankedRows.length) {
2532
+ return null;
2533
+ }
2534
+
2535
+ const winner = rankedRows[0];
2536
+ return clickElement(
2537
+ winner.target,
2538
+ "safari_dom_ytmusic_result",
2539
+ winner.titleText || deriveText(winner.row),
2540
+ winner.href || "",
2541
+ winner.score,
2542
+ rankedRows.length,
2543
+ );
2544
+ }
2545
+
2546
+ const ytmTransport = attemptYouTubeMusicTransportClick();
2547
+ if (ytmTransport) {
2548
+ return ytmTransport;
2549
+ }
2550
+
2551
+ const ytmResult = attemptYouTubeMusicSearchResultClick();
2552
+ if (ytmResult) {
2553
+ return ytmResult;
2554
+ }
2555
+
1567
2556
  function scoreCandidate(element, rank) {
1568
2557
  const text = deriveText(element);
1569
2558
  const href = element instanceof HTMLAnchorElement
@@ -2214,6 +3203,15 @@ if let output = String(data: data, encoding: .utf8) {
2214
3203
  if (action.type === "set_volume") {
2215
3204
  return `Volume ajustado para ${action.level}% no macOS`;
2216
3205
  }
3206
+ if (action.type === "scroll_view") {
3207
+ return `Tela rolada para ${action.direction === "up" ? "cima" : "baixo"} no macOS`;
3208
+ }
3209
+ if (action.type === "whatsapp_send_message") {
3210
+ return `Mensagem enviada no WhatsApp para ${action.contact}`;
3211
+ }
3212
+ if (action.type === "whatsapp_read_chat") {
3213
+ return `Conversa do WhatsApp lida com ${action.contact}`;
3214
+ }
2217
3215
  if (action.type === "click_visual_target") {
2218
3216
  return `Clique guiado executado para ${action.description}`;
2219
3217
  }
package/dist/types.js CHANGED
@@ -1,5 +1,5 @@
1
1
  export const BRIDGE_CONFIG_VERSION = 1;
2
- export const BRIDGE_VERSION = "0.5.6";
2
+ export const BRIDGE_VERSION = "0.5.9";
3
3
  export const BRIDGE_PACKAGE_NAME = "@leg3ndy/otto-bridge";
4
4
  export const DEFAULT_API_BASE_URL = "http://localhost:8000";
5
5
  export const DEFAULT_POLL_INTERVAL_MS = 3000;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@leg3ndy/otto-bridge",
3
- "version": "0.5.6",
3
+ "version": "0.5.9",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "Local companion for Otto Bridge device pairing and WebSocket runtime.",