@leg3ndy/otto-bridge 0.5.8 → 0.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -129,6 +129,102 @@ function extractMeaningfulDescriptionTokens(value) {
129
129
  .map((token) => token.trim())
130
130
  .filter((token) => token.length >= 3 && !GENERIC_VISUAL_STOP_WORDS.has(token))));
131
131
  }
132
+ const MEDIA_QUERY_STOP_WORDS = new Set([
133
+ "youtube",
134
+ "music",
135
+ "spotify",
136
+ "apple",
137
+ "player",
138
+ "play",
139
+ "pause",
140
+ "pausa",
141
+ "pausar",
142
+ "tocar",
143
+ "toque",
144
+ "reproduzir",
145
+ "reproducao",
146
+ "retomar",
147
+ "retoma",
148
+ "continuar",
149
+ "continue",
150
+ "resumir",
151
+ "resume",
152
+ "next",
153
+ "skip",
154
+ "proxima",
155
+ "proximo",
156
+ "anterior",
157
+ "previous",
158
+ "voltar",
159
+ "volta",
160
+ "melhor",
161
+ "tocavel",
162
+ "relacionado",
163
+ "diretamente",
164
+ "navegador",
165
+ "fila",
166
+ "playerbar",
167
+ ]);
168
+ function extractMediaQueryTokens(value) {
169
+ return extractMeaningfulDescriptionTokens(value).filter((token) => !MEDIA_QUERY_STOP_WORDS.has(token));
170
+ }
171
+ function countMatchingTokens(text, tokens) {
172
+ const normalizedText = normalizeText(text || "");
173
+ if (!normalizedText || !tokens.length) {
174
+ return 0;
175
+ }
176
+ return tokens.reduce((total, token) => total + (normalizedText.includes(token) ? 1 : 0), 0);
177
+ }
178
+ function typedTextLooksApplied(actual, expected) {
179
+ const normalizedActual = normalizeText(actual || "").replace(/\s+/g, " ").trim();
180
+ const normalizedExpected = normalizeText(expected || "").replace(/\s+/g, " ").trim();
181
+ if (!normalizedActual || !normalizedExpected) {
182
+ return false;
183
+ }
184
+ if (normalizedActual === normalizedExpected
185
+ || normalizedActual.includes(normalizedExpected)
186
+ || normalizedExpected.includes(normalizedActual)) {
187
+ return true;
188
+ }
189
+ const expectedTokens = normalizedExpected.split(/[^a-z0-9]+/).filter((token) => token.length >= 3);
190
+ if (!expectedTokens.length) {
191
+ return false;
192
+ }
193
+ const matches = expectedTokens.filter((token) => normalizedActual.includes(token)).length;
194
+ return matches >= Math.max(2, Math.ceil(expectedTokens.length * 0.6));
195
+ }
196
+ function descriptionWantsNext(description) {
197
+ return /\b(proxim[ao]?|next|skip|pular|avanca|avanç[ae])\b/.test(normalizeText(description || ""));
198
+ }
199
+ function descriptionWantsPrevious(description) {
200
+ return /\b(anterior|previous|volta[ar]?|back|retorna[ar]?)\b/.test(normalizeText(description || ""));
201
+ }
202
+ function descriptionWantsPause(description) {
203
+ return /\b(pausa|pause|pausar)\b/.test(normalizeText(description || ""));
204
+ }
205
+ function descriptionWantsResume(description) {
206
+ return /\b(retoma|retomar|resume|continu[ae]r|despausa|play)\b/.test(normalizeText(description || ""));
207
+ }
208
+ function extractNativeMediaTransportCommand(description) {
209
+ const normalizedDescription = normalizeText(description || "");
210
+ if (!normalizedDescription
211
+ || /\b(resultado|tocavel|relacionado|faixa|search|busca|pesquisa)\b/.test(normalizedDescription)) {
212
+ return null;
213
+ }
214
+ if (!/\b(player|controle|fila|music|spotify|youtube|deezer|apple)\b/.test(normalizedDescription)) {
215
+ return null;
216
+ }
217
+ if (descriptionWantsNext(description)) {
218
+ return "next";
219
+ }
220
+ if (descriptionWantsPrevious(description)) {
221
+ return "previous";
222
+ }
223
+ if (descriptionWantsPause(description) || descriptionWantsResume(description)) {
224
+ return "play_pause";
225
+ }
226
+ return null;
227
+ }
132
228
  function descriptionLikelyHasTextAnchor(description) {
133
229
  return extractQuotedPhrases(description).length > 0 || extractMeaningfulDescriptionTokens(description).length > 0;
134
230
  }
@@ -832,6 +928,8 @@ export class NativeMacOSJobExecutor {
832
928
  cancelledJobs = new Set();
833
929
  activeChild = null;
834
930
  lastActiveApp = null;
931
+ lastVisualTargetDescription = null;
932
+ lastVisualTargetApp = null;
835
933
  constructor(bridgeConfig) {
836
934
  this.bridgeConfig = bridgeConfig;
837
935
  }
@@ -891,8 +989,24 @@ export class NativeMacOSJobExecutor {
891
989
  continue;
892
990
  }
893
991
  if (action.type === "type_text") {
894
- await reporter.progress(progressPercent, "Digitando texto no app ativo");
895
- await this.typeText(action.text);
992
+ const typingApp = this.lastActiveApp || await this.getFrontmostAppName();
993
+ await reporter.progress(progressPercent, `Digitando texto em ${typingApp || "app ativo"}`);
994
+ const typed = await this.guidedTypeText(action.text, typingApp || undefined);
995
+ if (!typed.ok) {
996
+ throw new Error(typed.reason || "Nao consegui digitar o texto no app ativo.");
997
+ }
998
+ resultPayload.last_typed = {
999
+ strategy: typed.strategy,
1000
+ verified: typed.verified,
1001
+ app: typed.app,
1002
+ attempts: typed.attempts,
1003
+ text_preview: clipText(action.text, 180),
1004
+ };
1005
+ this.lastVisualTargetDescription = null;
1006
+ this.lastVisualTargetApp = null;
1007
+ completionNotes.push(typed.verified
1008
+ ? `Digitei e confirmei o texto no ${typed.app || "app ativo"}.`
1009
+ : `Digitei o texto no ${typed.app || "app ativo"}.`);
896
1010
  continue;
897
1011
  }
898
1012
  if (action.type === "take_screenshot") {
@@ -1069,6 +1183,42 @@ export class NativeMacOSJobExecutor {
1069
1183
  const initialBrowserState = browserApp
1070
1184
  ? await this.captureBrowserPageState(browserApp).catch(() => null)
1071
1185
  : null;
1186
+ const nativeMediaTransport = extractNativeMediaTransportCommand(targetDescription);
1187
+ if (nativeMediaTransport) {
1188
+ await reporter.progress(progressPercent, `Tentando controle de mídia nativo do macOS para ${targetDescription}`);
1189
+ try {
1190
+ await this.triggerMacOSMediaTransport(nativeMediaTransport);
1191
+ let validated = false;
1192
+ let validationReason = "";
1193
+ if (action.verification_prompt) {
1194
+ const verification = await this.validateVisualClickWithVision(job.job_id, targetDescription, action.verification_prompt, progressPercent, reporter, artifacts, "native_media_transport_result");
1195
+ validated = verification.ok;
1196
+ validationReason = verification.reason;
1197
+ }
1198
+ else if (browserApp) {
1199
+ const browserValidation = await this.confirmBrowserClick(browserApp, initialBrowserState, targetDescription, null);
1200
+ validated = browserValidation.ok;
1201
+ validationReason = browserValidation.reason;
1202
+ }
1203
+ else {
1204
+ validated = true;
1205
+ }
1206
+ if (validated) {
1207
+ resultPayload.last_click = {
1208
+ strategy: "native_media_transport",
1209
+ matched_text: targetDescription,
1210
+ };
1211
+ completionNotes.push(`Acionei ${targetDescription} usando o controle de mídia nativo do macOS.`);
1212
+ clickSucceeded = true;
1213
+ break;
1214
+ }
1215
+ lastFailureReason = validationReason || `O controle de mídia nativo do macOS nao confirmou ${targetDescription}.`;
1216
+ await reporter.progress(progressPercent, "O controle de mídia nativo nao foi suficiente; vou tentar DOM/OCR");
1217
+ }
1218
+ catch (error) {
1219
+ lastFailureReason = error instanceof Error ? error.message : String(error);
1220
+ }
1221
+ }
1072
1222
  if (browserApp === "Safari") {
1073
1223
  await reporter.progress(progressPercent, `Tentando localizar ${targetDescription} diretamente no Safari`);
1074
1224
  const domClick = await this.trySafariDomClick(targetDescription);
@@ -1086,6 +1236,8 @@ export class NativeMacOSJobExecutor {
1086
1236
  validationReason = browserValidation.reason;
1087
1237
  }
1088
1238
  if (validated) {
1239
+ this.lastVisualTargetDescription = targetDescription;
1240
+ this.lastVisualTargetApp = browserApp || action.app || this.lastActiveApp;
1089
1241
  resultPayload.last_click = {
1090
1242
  strategy: domClick.strategy || "safari_dom",
1091
1243
  matched_text: domClick.matchedText || null,
@@ -1127,6 +1279,8 @@ export class NativeMacOSJobExecutor {
1127
1279
  }
1128
1280
  if (validated) {
1129
1281
  const region = ocrClick.region || null;
1282
+ this.lastVisualTargetDescription = targetDescription;
1283
+ this.lastVisualTargetApp = browserApp || action.app || this.lastActiveApp;
1130
1284
  resultPayload.last_click = {
1131
1285
  strategy: ocrClick.strategy || "local_ocr",
1132
1286
  score: ocrClick.score || null,
@@ -1202,6 +1356,8 @@ export class NativeMacOSJobExecutor {
1202
1356
  }
1203
1357
  }
1204
1358
  completionNotes.push(`Localizei e cliquei em ${targetDescription}.`);
1359
+ this.lastVisualTargetDescription = targetDescription;
1360
+ this.lastVisualTargetApp = browserApp || action.app || this.lastActiveApp;
1205
1361
  clickSucceeded = true;
1206
1362
  break;
1207
1363
  }
@@ -1387,7 +1543,26 @@ end tell
1387
1543
  const beforePlayerState = normalizeText(before?.playerState || "");
1388
1544
  const afterPlayerState = normalizeText(after.playerState || "");
1389
1545
  const playerLooksActive = afterPlayerState.includes("pause") || afterPlayerState.includes("pausar");
1546
+ const playerLooksPaused = !playerLooksActive && /play|tocar|reproduzir|continuar|retomar|resume/.test(afterPlayerState);
1547
+ const wantsNext = descriptionWantsNext(targetDescription);
1548
+ const wantsPrevious = descriptionWantsPrevious(targetDescription);
1549
+ const wantsPause = descriptionWantsPause(targetDescription);
1550
+ const wantsResume = descriptionWantsResume(targetDescription);
1551
+ const mediaQueryTokens = extractMediaQueryTokens(targetDescription);
1552
+ const mediaMatchCount = countMatchingTokens(after.playerTitle || "", mediaQueryTokens);
1390
1553
  if (afterUrl.includes("music.youtube.com")) {
1554
+ if (wantsPause && beforePlayerState && beforePlayerState !== afterPlayerState && playerLooksPaused) {
1555
+ return true;
1556
+ }
1557
+ if (wantsResume && playerLooksActive && beforePlayerState !== afterPlayerState) {
1558
+ return true;
1559
+ }
1560
+ if ((wantsNext || wantsPrevious) && beforePlayerTitle && afterPlayerTitle && beforePlayerTitle !== afterPlayerTitle) {
1561
+ return true;
1562
+ }
1563
+ if (mediaQueryTokens.length >= 2 && mediaMatchCount >= Math.max(2, Math.ceil(mediaQueryTokens.length * 0.5)) && playerLooksActive) {
1564
+ return true;
1565
+ }
1391
1566
  if (beforePlayerState && afterPlayerState && beforePlayerState !== afterPlayerState && playerLooksActive) {
1392
1567
  return true;
1393
1568
  }
@@ -1440,6 +1615,23 @@ end tell
1440
1615
  if (!key) {
1441
1616
  throw new Error(`Invalid shortcut: ${shortcut}`);
1442
1617
  }
1618
+ const normalizedShortcut = normalizeText(shortcut).replace(/[\s+-]+/g, "_");
1619
+ const mediaCommandMap = {
1620
+ media_play: "play_pause",
1621
+ media_pause: "play_pause",
1622
+ media_play_pause: "play_pause",
1623
+ media_resume: "play_pause",
1624
+ media_next: "next",
1625
+ media_proxima: "next",
1626
+ media_previous: "previous",
1627
+ media_prev: "previous",
1628
+ media_anterior: "previous",
1629
+ };
1630
+ const mediaCommand = mediaCommandMap[normalizedShortcut];
1631
+ if (mediaCommand) {
1632
+ await this.triggerMacOSMediaTransport(mediaCommand);
1633
+ return;
1634
+ }
1443
1635
  const namedKeyCodes = {
1444
1636
  return: 36,
1445
1637
  enter: 36,
@@ -1465,6 +1657,45 @@ end tell
1465
1657
  `tell application "System Events" to keystroke "${escapeAppleScript(key)}"${usingClause}`,
1466
1658
  ]);
1467
1659
  }
1660
+ async triggerMacOSMediaTransport(command) {
1661
+ const keyTypeMap = {
1662
+ play_pause: 16,
1663
+ next: 17,
1664
+ previous: 18,
1665
+ };
1666
+ const keyType = keyTypeMap[command];
1667
+ const swiftScript = `
1668
+ import AppKit
1669
+ import Foundation
1670
+
1671
+ let keyType = ${keyType}
1672
+
1673
+ func postMediaKey(_ keyType: Int32, down: Bool) {
1674
+ let eventFlags = NSEvent.ModifierFlags(rawValue: 0xA00)
1675
+ let state = down ? 0xA : 0xB
1676
+ let data1 = Int((keyType << 16) | (Int32(state) << 8))
1677
+ guard let event = NSEvent.otherEvent(
1678
+ with: .systemDefined,
1679
+ location: .zero,
1680
+ modifierFlags: eventFlags,
1681
+ timestamp: 0,
1682
+ windowNumber: 0,
1683
+ context: nil,
1684
+ subtype: 8,
1685
+ data1: data1,
1686
+ data2: -1
1687
+ ) else {
1688
+ return
1689
+ }
1690
+ event.cgEvent?.post(tap: .cghidEventTap)
1691
+ }
1692
+
1693
+ postMediaKey(Int32(keyType), down: true)
1694
+ usleep(90000)
1695
+ postMediaKey(Int32(keyType), down: false)
1696
+ `;
1697
+ await this.runCommand("swift", ["-e", swiftScript]);
1698
+ }
1468
1699
  async typeText(text) {
1469
1700
  const previousClipboard = await this.readClipboardText();
1470
1701
  try {
@@ -1477,6 +1708,227 @@ end tell
1477
1708
  }
1478
1709
  }
1479
1710
  }
1711
+ resolveLikelySearchShortcut(app) {
1712
+ const normalizedHint = normalizeText(this.lastVisualTargetDescription || "");
1713
+ const looksLikeSearchTarget = /\b(busca|pesquisa|search|campo|caixa|icone|ícone)\b/.test(normalizedHint);
1714
+ if (!looksLikeSearchTarget || !app) {
1715
+ return null;
1716
+ }
1717
+ if (app === "Spotify") {
1718
+ return "cmd+l";
1719
+ }
1720
+ if (app === "Music") {
1721
+ return "cmd+f";
1722
+ }
1723
+ return null;
1724
+ }
1725
+ async guidedTypeText(text, preferredApp) {
1726
+ const app = preferredApp || this.lastActiveApp || await this.getFrontmostAppName();
1727
+ if (app === "Safari") {
1728
+ const safariResult = await this.trySafariGuidedType(text);
1729
+ if (safariResult.ok) {
1730
+ return {
1731
+ ...safariResult,
1732
+ app,
1733
+ };
1734
+ }
1735
+ }
1736
+ const searchShortcut = this.resolveLikelySearchShortcut(app);
1737
+ if (searchShortcut) {
1738
+ await this.pressShortcut(searchShortcut).catch(() => undefined);
1739
+ await delay(180);
1740
+ }
1741
+ await this.typeText(text);
1742
+ return {
1743
+ ok: true,
1744
+ verified: false,
1745
+ strategy: searchShortcut ? `clipboard_paste_after_${searchShortcut}` : "clipboard_paste",
1746
+ app: app || null,
1747
+ attempts: 1,
1748
+ };
1749
+ }
1750
+ async trySafariGuidedType(text) {
1751
+ for (let attempt = 0; attempt < 3; attempt += 1) {
1752
+ try {
1753
+ const result = await this.runSafariJsonScript(`
1754
+ const inputText = String(__input?.text || "");
1755
+ function isVisible(element) {
1756
+ if (!(element instanceof HTMLElement)) return false;
1757
+ const rect = element.getBoundingClientRect();
1758
+ if (rect.width < 4 || rect.height < 4) return false;
1759
+ const style = window.getComputedStyle(element);
1760
+ if (style.visibility === "hidden" || style.display === "none" || Number(style.opacity || "1") === 0) return false;
1761
+ return rect.bottom >= 0 && rect.right >= 0 && rect.top <= window.innerHeight && rect.left <= window.innerWidth;
1762
+ }
1763
+ function isEditable(element) {
1764
+ return element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement || (element instanceof HTMLElement && element.isContentEditable);
1765
+ }
1766
+ function readEditableValue(element) {
1767
+ if (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement) {
1768
+ return String(element.value || "").trim();
1769
+ }
1770
+ if (element instanceof HTMLElement && element.isContentEditable) {
1771
+ return String(element.innerText || element.textContent || "").trim();
1772
+ }
1773
+ return "";
1774
+ }
1775
+ function clearAndFill(element, value) {
1776
+ if (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement) {
1777
+ element.focus();
1778
+ element.select?.();
1779
+ element.value = "";
1780
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "deleteContentBackward", data: null }));
1781
+ element.value = value;
1782
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "insertText", data: value }));
1783
+ element.dispatchEvent(new Event("change", { bubbles: true }));
1784
+ return;
1785
+ }
1786
+ if (element instanceof HTMLElement && element.isContentEditable) {
1787
+ element.focus();
1788
+ const selection = window.getSelection();
1789
+ const range = document.createRange();
1790
+ range.selectNodeContents(element);
1791
+ selection?.removeAllRanges();
1792
+ selection?.addRange(range);
1793
+ document.execCommand("selectAll", false);
1794
+ document.execCommand("delete", false);
1795
+ document.execCommand("insertText", false, value);
1796
+ element.dispatchEvent(new InputEvent("input", { bubbles: true, inputType: "insertText", data: value }));
1797
+ }
1798
+ }
1799
+ const selectors = location.hostname.includes("music.youtube.com")
1800
+ ? [
1801
+ "ytmusic-search-box input#input",
1802
+ "ytmusic-search-box input",
1803
+ "input[placeholder*='Search']",
1804
+ "input[placeholder*='Pesquisar']",
1805
+ "[role='searchbox'] input"
1806
+ ]
1807
+ : location.hostname.includes("open.spotify.com")
1808
+ ? [
1809
+ "input[data-testid='search-input']",
1810
+ "[role='searchbox'] input",
1811
+ "input[placeholder*='Search']"
1812
+ ]
1813
+ : location.hostname.includes("deezer.com")
1814
+ ? [
1815
+ "input[type='search']",
1816
+ "input[placeholder*='Search']",
1817
+ "input[placeholder*='Pesquisar']",
1818
+ "form input[type='text']"
1819
+ ]
1820
+ : location.hostname.includes("soundcloud.com")
1821
+ ? [
1822
+ "input[type='search']",
1823
+ "input[placeholder*='Search']",
1824
+ "form input[type='search']",
1825
+ "form input[type='text']"
1826
+ ]
1827
+ : location.hostname.includes("music.amazon.com")
1828
+ ? [
1829
+ "input[type='search']",
1830
+ "input[aria-label*='Search']",
1831
+ "input[placeholder*='Search']",
1832
+ "[role='searchbox'] input"
1833
+ ]
1834
+ : [
1835
+ "textarea",
1836
+ "input[type='search']",
1837
+ "input[type='text']",
1838
+ "input:not([type])",
1839
+ "[contenteditable='true'][role='textbox']",
1840
+ "[contenteditable='true']"
1841
+ ];
1842
+ const active = document.activeElement;
1843
+ let target = null;
1844
+ if (active instanceof HTMLElement && isVisible(active) && isEditable(active)) {
1845
+ target = active;
1846
+ }
1847
+ if (!target) {
1848
+ const candidates = selectors
1849
+ .flatMap((selector) => Array.from(document.querySelectorAll(selector)))
1850
+ .filter((node) => node instanceof HTMLElement)
1851
+ .filter((node) => isVisible(node))
1852
+ .map((node, index) => {
1853
+ const label = String(
1854
+ node.getAttribute("aria-label")
1855
+ || node.getAttribute("placeholder")
1856
+ || node.getAttribute("title")
1857
+ || node.textContent
1858
+ || ""
1859
+ ).toLowerCase();
1860
+ let score = 0;
1861
+ if (label.includes("search") || label.includes("pesquis")) score += 80;
1862
+ if (node === document.activeElement) score += 30;
1863
+ score += Math.max(0, 12 - index);
1864
+ return { node, score };
1865
+ })
1866
+ .sort((left, right) => right.score - left.score);
1867
+ target = candidates[0]?.node || null;
1868
+ }
1869
+ if (!(target instanceof HTMLElement) || !isEditable(target)) {
1870
+ return { ok: false, reason: "Nao achei um campo editavel confiavel no Safari.", strategy: "safari_guided_type_no_field" };
1871
+ }
1872
+ target.scrollIntoView({ block: "center", inline: "center", behavior: "auto" });
1873
+ clearAndFill(target, inputText);
1874
+ return {
1875
+ ok: true,
1876
+ actualText: readEditableValue(target),
1877
+ strategy: target === active ? "safari_guided_type_active_field" : "safari_guided_type_search_field",
1878
+ };
1879
+ `, { text });
1880
+ if (result?.ok && typedTextLooksApplied(result.actualText || "", text)) {
1881
+ return {
1882
+ ok: true,
1883
+ verified: true,
1884
+ strategy: result.strategy || "safari_guided_type",
1885
+ attempts: attempt + 1,
1886
+ };
1887
+ }
1888
+ await delay(180);
1889
+ }
1890
+ catch (error) {
1891
+ const detail = error instanceof Error ? error.message : String(error);
1892
+ if (detail.toLowerCase().includes("allow javascript from apple events")) {
1893
+ break;
1894
+ }
1895
+ }
1896
+ }
1897
+ await this.typeText(text);
1898
+ await delay(180);
1899
+ try {
1900
+ const verification = await this.runSafariJsonScript(`
1901
+ function readEditableValue(element) {
1902
+ if (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement) {
1903
+ return String(element.value || "").trim();
1904
+ }
1905
+ if (element instanceof HTMLElement && element.isContentEditable) {
1906
+ return String(element.innerText || element.textContent || "").trim();
1907
+ }
1908
+ return "";
1909
+ }
1910
+ const active = document.activeElement;
1911
+ return { actualText: active ? readEditableValue(active) : "" };
1912
+ `, {});
1913
+ if (typedTextLooksApplied(verification.actualText || "", text)) {
1914
+ return {
1915
+ ok: true,
1916
+ verified: true,
1917
+ strategy: "safari_clipboard_retry",
1918
+ attempts: 4,
1919
+ };
1920
+ }
1921
+ }
1922
+ catch {
1923
+ // ignore and fall back to unverified success below
1924
+ }
1925
+ return {
1926
+ ok: true,
1927
+ verified: false,
1928
+ strategy: "clipboard_paste",
1929
+ attempts: 1,
1930
+ };
1931
+ }
1480
1932
  async scrollView(direction, amount = "medium", steps = 1) {
1481
1933
  const clampedSteps = Math.max(1, Math.min(Math.round(steps || 1), 6));
1482
1934
  const lineDelta = {
@@ -1849,11 +2301,17 @@ const normalizedDescription = normalize(rawDescription);
1849
2301
  const isYouTubeMusic = location.hostname.includes("music.youtube.com");
1850
2302
  const wantsFirst = /\\b(primeir[ao]?|first)\\b/.test(normalizedDescription);
1851
2303
  const wantsVideo = /\\b(video|videos|musica|faixa|youtube|resultado|watch)\\b/.test(normalizedDescription) || location.hostname.includes("youtube");
2304
+ const wantsNext = /\\b(proxim[ao]?|next|skip|pular|avanca|avancar)\\b/.test(normalizedDescription);
2305
+ const wantsPrevious = /\\b(anterior|previous|volta[ar]?|back|retorna[ar]?)\\b/.test(normalizedDescription);
2306
+ const wantsPause = /\\b(pausa|pause|pausar)\\b/.test(normalizedDescription);
2307
+ const wantsResume = /\\b(retoma|retomar|resume|continu[ae]r|despausa|play)\\b/.test(normalizedDescription);
1852
2308
  const stopWords = new Set([
1853
2309
  "o", "a", "os", "as", "um", "uma", "uns", "umas", "de", "da", "do", "das", "dos",
1854
2310
  "em", "no", "na", "nos", "nas", "para", "por", "com", "que", "visivel", "visiveis",
1855
2311
  "visivel", "tela", "pagina", "page", "site", "link", "botao", "botao", "clicar",
1856
- "clique", "seleciona", "selecionar", "resultado", "resultados"
2312
+ "clique", "seleciona", "selecionar", "resultado", "resultados", "youtube", "music",
2313
+ "melhor", "tocavel", "relacionado", "diretamente", "navegador", "player", "fila",
2314
+ "play", "pause", "pausa", "proxima", "proximo", "anterior"
1857
2315
  ]);
1858
2316
  const quotedPhrases = Array.from(rawDescription.matchAll(/["'“”‘’]([^"'“”‘’]{2,80})["'“”‘’]/g))
1859
2317
  .map((match) => normalize(match[1]));
@@ -1928,6 +2386,173 @@ function deriveText(element) {
1928
2386
  return "";
1929
2387
  }
1930
2388
 
2389
+ function clickElement(element, strategy, matchedText, matchedHref, score, totalCandidates) {
2390
+ if (!(element instanceof HTMLElement || element instanceof HTMLAnchorElement)) {
2391
+ return null;
2392
+ }
2393
+ element.scrollIntoView({ block: "center", inline: "center", behavior: "auto" });
2394
+ const rect = element.getBoundingClientRect();
2395
+ for (const eventName of ["mouseover", "mousedown", "mouseup", "click"]) {
2396
+ element.dispatchEvent(new MouseEvent(eventName, {
2397
+ bubbles: true,
2398
+ cancelable: true,
2399
+ view: window,
2400
+ clientX: rect.left + (rect.width / 2),
2401
+ clientY: rect.top + (rect.height / 2),
2402
+ }));
2403
+ }
2404
+ if (typeof element.click === "function") {
2405
+ element.click();
2406
+ }
2407
+ return {
2408
+ clicked: true,
2409
+ matchedText: String(matchedText || "").slice(0, 180),
2410
+ matchedHref: String(matchedHref || ""),
2411
+ score,
2412
+ totalCandidates,
2413
+ strategy,
2414
+ };
2415
+ }
2416
+
2417
+ function attemptYouTubeMusicTransportClick() {
2418
+ if (!isYouTubeMusic || !(wantsNext || wantsPrevious || wantsPause || wantsResume)) {
2419
+ return null;
2420
+ }
2421
+
2422
+ const playerButtons = Array.from(document.querySelectorAll(
2423
+ "ytmusic-player-bar button, ytmusic-player-bar [role='button'], ytmusic-player-bar tp-yt-paper-icon-button"
2424
+ ))
2425
+ .filter((node) => node instanceof HTMLElement)
2426
+ .filter((node) => isVisible(node));
2427
+
2428
+ const ranked = playerButtons
2429
+ .map((node, index) => {
2430
+ const label = normalize([
2431
+ deriveText(node),
2432
+ node.getAttribute("aria-label"),
2433
+ node.getAttribute("title"),
2434
+ node.id,
2435
+ ].filter(Boolean).join(" "));
2436
+ let score = 0;
2437
+ if (wantsNext && /proxim|next|skip/.test(label)) score += 140;
2438
+ if (wantsPrevious && /anterior|previous|back|volta/.test(label)) score += 140;
2439
+ if (wantsPause && /pause|pausa|pausar/.test(label)) score += 140;
2440
+ if (wantsResume && /play|tocar|reproduzir|resume|retomar|continuar/.test(label)) score += 140;
2441
+ if (label.includes("player")) score += 12;
2442
+ score += Math.max(0, 12 - index);
2443
+ return score > 0 ? { node, label, score } : null;
2444
+ })
2445
+ .filter(Boolean)
2446
+ .sort((left, right) => right.score - left.score);
2447
+
2448
+ if (!ranked.length) {
2449
+ return null;
2450
+ }
2451
+
2452
+ const winner = ranked[0];
2453
+ return clickElement(winner.node, "safari_dom_ytmusic_transport", winner.label, "", winner.score, ranked.length);
2454
+ }
2455
+
2456
+ function attemptYouTubeMusicSearchResultClick() {
2457
+ if (!isYouTubeMusic || wantsNext || wantsPrevious || wantsPause) {
2458
+ return null;
2459
+ }
2460
+ if (!quotedPhrases.length && !tokens.length) {
2461
+ return null;
2462
+ }
2463
+
2464
+ const rows = Array.from(document.querySelectorAll("ytmusic-responsive-list-item-renderer"))
2465
+ .filter((node) => node instanceof HTMLElement)
2466
+ .filter((node) => isVisible(node));
2467
+
2468
+ const rankedRows = rows
2469
+ .map((row, index) => {
2470
+ const titleNode = row.querySelector("#title, .title, yt-formatted-string.title");
2471
+ const subtitleNode = row.querySelector(".subtitle, .byline, .secondary-flex-columns");
2472
+ const titleText = String((titleNode && titleNode.textContent) || "").trim();
2473
+ const subtitleText = String((subtitleNode && subtitleNode.textContent) || "").trim();
2474
+ const rowText = deriveText(row);
2475
+ const normalizedTitle = normalize(titleText);
2476
+ const normalizedSubtitle = normalize(subtitleText);
2477
+ const normalizedRow = normalize(rowText);
2478
+ let score = 0;
2479
+
2480
+ for (const phrase of quotedPhrases) {
2481
+ if (!phrase) continue;
2482
+ if (normalizedTitle.includes(phrase)) score += 160;
2483
+ else if (normalizedRow.includes(phrase)) score += 110;
2484
+ }
2485
+
2486
+ for (const token of tokens) {
2487
+ if (normalizedTitle.includes(token)) score += 28;
2488
+ else if (normalizedSubtitle.includes(token)) score += 16;
2489
+ else if (normalizedRow.includes(token)) score += 10;
2490
+ }
2491
+
2492
+ if (tokens.length > 1) {
2493
+ const titleMatches = tokens.filter((token) => normalizedTitle.includes(token)).length;
2494
+ const rowMatches = tokens.filter((token) => normalizedRow.includes(token)).length;
2495
+ if (titleMatches >= Math.max(2, Math.ceil(tokens.length * 0.5))) score += 80;
2496
+ if (rowMatches === tokens.length) score += 40;
2497
+ }
2498
+
2499
+ score += Math.max(0, 10 - index);
2500
+
2501
+ const clickableCandidates = Array.from(row.querySelectorAll(
2502
+ "ytmusic-item-thumbnail-overlay-renderer button, button[aria-label], tp-yt-paper-icon-button, a[href*='watch?v=']"
2503
+ ))
2504
+ .filter((candidate) => candidate instanceof HTMLElement || candidate instanceof HTMLAnchorElement)
2505
+ .filter((candidate) => isVisible(candidate))
2506
+ .map((candidate) => {
2507
+ const label = normalize([
2508
+ deriveText(candidate),
2509
+ candidate.getAttribute("aria-label"),
2510
+ candidate.getAttribute("title"),
2511
+ ].filter(Boolean).join(" "));
2512
+ let candidateScore = 0;
2513
+ if (/play|tocar|reproduzir|assistir/.test(label)) candidateScore += 30;
2514
+ if (candidate instanceof HTMLAnchorElement && normalize(candidate.href).includes("watch?v=")) candidateScore += 18;
2515
+ if (candidate.closest("ytmusic-item-thumbnail-overlay-renderer")) candidateScore += 14;
2516
+ return { candidate, label, candidateScore };
2517
+ })
2518
+ .sort((left, right) => right.candidateScore - left.candidateScore);
2519
+
2520
+ return score > 0 ? {
2521
+ row,
2522
+ titleText,
2523
+ href: clickableCandidates[0]?.candidate instanceof HTMLAnchorElement ? clickableCandidates[0].candidate.href : "",
2524
+ score: score + (clickableCandidates[0]?.candidateScore || 0),
2525
+ target: clickableCandidates[0]?.candidate || row,
2526
+ } : null;
2527
+ })
2528
+ .filter(Boolean)
2529
+ .sort((left, right) => right.score - left.score);
2530
+
2531
+ if (!rankedRows.length) {
2532
+ return null;
2533
+ }
2534
+
2535
+ const winner = rankedRows[0];
2536
+ return clickElement(
2537
+ winner.target,
2538
+ "safari_dom_ytmusic_result",
2539
+ winner.titleText || deriveText(winner.row),
2540
+ winner.href || "",
2541
+ winner.score,
2542
+ rankedRows.length,
2543
+ );
2544
+ }
2545
+
2546
+ const ytmTransport = attemptYouTubeMusicTransportClick();
2547
+ if (ytmTransport) {
2548
+ return ytmTransport;
2549
+ }
2550
+
2551
+ const ytmResult = attemptYouTubeMusicSearchResultClick();
2552
+ if (ytmResult) {
2553
+ return ytmResult;
2554
+ }
2555
+
1931
2556
  function scoreCandidate(element, rank) {
1932
2557
  const text = deriveText(element);
1933
2558
  const href = element instanceof HTMLAnchorElement
package/dist/types.js CHANGED
@@ -1,5 +1,5 @@
1
1
  export const BRIDGE_CONFIG_VERSION = 1;
2
- export const BRIDGE_VERSION = "0.5.8";
2
+ export const BRIDGE_VERSION = "0.5.9";
3
3
  export const BRIDGE_PACKAGE_NAME = "@leg3ndy/otto-bridge";
4
4
  export const DEFAULT_API_BASE_URL = "http://localhost:8000";
5
5
  export const DEFAULT_POLL_INTERVAL_MS = 3000;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@leg3ndy/otto-bridge",
3
- "version": "0.5.8",
3
+ "version": "0.5.9",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "Local companion for Otto Bridge device pairing and WebSocket runtime.",