safari-pilot 0.1.29 → 0.1.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.claude-plugin/commands/stats.md +5 -0
  2. package/.claude-plugin/plugin.json +12 -2
  3. package/README.md +35 -5
  4. package/bin/Safari Pilot.app/Contents/CodeResources +0 -0
  5. package/bin/Safari Pilot.app/Contents/Info.plist +2 -2
  6. package/bin/Safari Pilot.app/Contents/MacOS/Safari Pilot +0 -0
  7. package/bin/Safari Pilot.app/Contents/PlugIns/Safari Pilot Extension.appex/Contents/Info.plist +2 -2
  8. package/bin/Safari Pilot.app/Contents/PlugIns/Safari Pilot Extension.appex/Contents/MacOS/Safari Pilot Extension +0 -0
  9. package/bin/Safari Pilot.app/Contents/PlugIns/Safari Pilot Extension.appex/Contents/Resources/background.js +50 -0
  10. package/bin/Safari Pilot.app/Contents/PlugIns/Safari Pilot Extension.appex/Contents/Resources/content-main.js +155 -0
  11. package/bin/Safari Pilot.app/Contents/PlugIns/Safari Pilot Extension.appex/Contents/Resources/locator.js +294 -0
  12. package/bin/Safari Pilot.app/Contents/PlugIns/Safari Pilot Extension.appex/Contents/Resources/manifest.json +2 -1
  13. package/bin/Safari Pilot.app/Contents/PlugIns/Safari Pilot Extension.appex/Contents/_CodeSignature/CodeResources +17 -6
  14. package/bin/Safari Pilot.app/Contents/Resources/Base.lproj/Main.storyboardc/Info.plist +0 -0
  15. package/bin/Safari Pilot.app/Contents/Resources/Base.lproj/Main.storyboardc/MainMenu.nib +0 -0
  16. package/bin/Safari Pilot.app/Contents/Resources/Base.lproj/Main.storyboardc/NSWindowController-B8D-0N-5wS.nib +0 -0
  17. package/bin/Safari Pilot.app/Contents/_CodeSignature/CodeResources +7 -7
  18. package/bin/Safari Pilot.zip +0 -0
  19. package/bin/SafariPilotd +0 -0
  20. package/dist/cli/format.d.ts +4 -0
  21. package/dist/cli/format.js +25 -0
  22. package/dist/cli/format.js.map +1 -0
  23. package/dist/cli/stats.d.ts +2 -0
  24. package/dist/cli/stats.js +177 -0
  25. package/dist/cli/stats.js.map +1 -0
  26. package/dist/engine-selector.js +5 -1
  27. package/dist/engine-selector.js.map +1 -1
  28. package/dist/engines/daemon.js +17 -5
  29. package/dist/engines/daemon.js.map +1 -1
  30. package/dist/errors.d.ts +10 -0
  31. package/dist/errors.js +36 -0
  32. package/dist/errors.js.map +1 -1
  33. package/dist/overlays/app-install.json +31 -0
  34. package/dist/overlays/cookie-consent.json +82 -0
  35. package/dist/overlays/index.d.ts +4 -0
  36. package/dist/overlays/index.js +84 -0
  37. package/dist/overlays/index.js.map +1 -0
  38. package/dist/overlays/paywalls.json +45 -0
  39. package/dist/overlays/registration-walls.json +45 -0
  40. package/dist/overlays/types.d.ts +33 -0
  41. package/dist/overlays/types.js +2 -0
  42. package/dist/overlays/types.js.map +1 -0
  43. package/dist/server.js +84 -29
  44. package/dist/server.js.map +1 -1
  45. package/dist/tools/extraction.d.ts +1 -3
  46. package/dist/tools/extraction.js +108 -51
  47. package/dist/tools/extraction.js.map +1 -1
  48. package/dist/tools/interaction.d.ts +1 -0
  49. package/dist/tools/interaction.js +59 -0
  50. package/dist/tools/interaction.js.map +1 -1
  51. package/dist/tools/overlays.d.ts +22 -0
  52. package/dist/tools/overlays.js +104 -0
  53. package/dist/tools/overlays.js.map +1 -0
  54. package/dist/types.d.ts +12 -0
  55. package/extension/background.js +50 -0
  56. package/extension/content-main.js +155 -0
  57. package/extension/locator.js +294 -0
  58. package/extension/manifest.json +2 -1
  59. package/hooks/session-start.sh +8 -0
  60. package/package.json +7 -3
  61. package/scripts/build-extension-test.sh +34 -0
  62. package/scripts/build-extension.sh +10 -0
  63. package/scripts/pre-tag-check.sh +25 -9
  64. package/skills/dismiss-overlays-recovery.SKILL.md +24 -0
  65. package/skills/evidence-grounded-screenshot.SKILL.md +27 -0
  66. package/skills/temporal-substitution.SKILL.md +40 -0
  67. package/skills/visible-evidence-grounding.SKILL.md +32 -0
@@ -0,0 +1,5 @@
1
+ ---
2
+ description: Local metrics summary over ~/.safari-pilot/trace.ndjson — per-tool count/error-rate/p50/p95, top errors, top domains.
3
+ ---
4
+
5
+ !`node "${CLAUDE_PLUGIN_ROOT}/dist/cli/stats.js" $ARGUMENTS`
@@ -8,7 +8,8 @@
8
8
  "components": {
9
9
  "commands": [
10
10
  "commands/start.md",
11
- "commands/stop.md"
11
+ "commands/stop.md",
12
+ "commands/stats.md"
12
13
  ],
13
14
  "mcpServers": {
14
15
  "safari": {
@@ -20,7 +21,16 @@
20
21
  }
21
22
  }
22
23
  },
23
- "skills": ["skills/safari-pilot/SKILL.md"],
24
+ "skills": [
25
+ "skills/safari-pilot/SKILL.md",
26
+ "skills/login.SKILL.md",
27
+ "skills/paginate-and-scrape.SKILL.md",
28
+ "skills/robust-form-fill.SKILL.md",
29
+ "skills/evidence-grounded-screenshot.SKILL.md",
30
+ "skills/dismiss-overlays-recovery.SKILL.md",
31
+ "skills/visible-evidence-grounding.SKILL.md",
32
+ "skills/temporal-substitution.SKILL.md"
33
+ ],
24
34
  "hooks": [
25
35
  {
26
36
  "event": "SessionStart",
package/README.md CHANGED
@@ -120,19 +120,24 @@ Monitor news.ycombinator.com for any post about our company
120
120
  Open my X.com bookmarks and extract the top 5 posts with author profiles
121
121
  ```
122
122
 
123
- ## Tool Catalog (82 Tools)
123
+ ## Tool Catalog (88 Tools)
124
124
 
125
125
  ### Navigation (7)
126
126
  `safari_navigate` | `safari_navigate_back` | `safari_navigate_forward` | `safari_reload` | `safari_new_tab` | `safari_close_tab` | `safari_list_tabs`
127
127
 
128
- ### Interaction (11)
129
- `safari_click` | `safari_double_click` | `safari_fill` | `safari_select_option` | `safari_check` | `safari_hover` | `safari_type` | `safari_press_key` | `safari_scroll` | `safari_drag` | `safari_handle_dialog`
128
+ ### Interaction (12)
129
+ `safari_click` | `safari_double_click` | `safari_fill` | `safari_select_option` | `safari_check` | `safari_hover` | `safari_type` | `safari_press_key` | `safari_scroll` | `safari_scroll_to_element` | `safari_drag` | `safari_handle_dialog`
130
+
131
+ `safari_scroll_to_element` (v0.1.31) scrolls a specific element into the visible viewport via {selector, text, role+name}. Open shadow root penetration; same-origin iframe traversal. Returns matched-node descriptor + viewport state + multi-match candidates.
132
+
133
+ ### Overlays (1)
134
+ `safari_dismiss_overlays` — detects and dismisses ~14 known overlay patterns (cookie-consent, registration-wall, app-install, paywall) using a curated allowlist with a two-signal-per-pattern rule. id-only sanitized response. Six safety mitigations including a kill switch (`SAFARI_PILOT_DISABLE_OVERLAY_DISMISS=true`) and paywall opt-IN-by-default flag (`SAFARI_PILOT_ENABLE_PAYWALL_DISMISS=true`). New in v0.1.31.
130
135
 
131
136
  ### File Upload (1)
132
137
  `safari_file_upload` — programmatic upload to standard `<input type=file>` elements, including hidden inputs behind `<label>` (use `force: true`). 25 MiB / file × 4 / call. Path B architecture: out-of-band byte transport via daemon staging → extension fetch. Does NOT support drag-and-drop dropzones, custom pickers, or native OS dialogs.
133
138
 
134
- ### Extraction (7)
135
- `safari_snapshot` | `safari_get_text` | `safari_get_html` | `safari_get_attribute` | `safari_evaluate` | `safari_take_screenshot` | `safari_get_console_messages`
139
+ ### Extraction (8)
140
+ `safari_snapshot` | `safari_get_text` | `safari_get_html` | `safari_get_attribute` | `safari_evaluate` | `safari_take_screenshot` | `safari_get_console_messages` | `safari_query_all`
136
141
 
137
142
  ### Network (10)
138
143
  `safari_list_network_requests` | `safari_get_network_request` | `safari_intercept_requests` | `safari_network_throttle` | `safari_network_offline` | `safari_mock_request` | `safari_websocket_listen` | `safari_websocket_filter` | `safari_dump_har` | `safari_route_from_har`
@@ -182,6 +187,31 @@ Open my X.com bookmarks and extract the top 5 posts with author profiles
182
187
  ### System (2)
183
188
  `safari_health_check` | `safari_emergency_stop`
184
189
 
190
+ ### Discovery (1)
191
+ `safari_tool_search` — query the registered tool index with natural-language intent strings; returns ranked tool descriptors. Reduces total-tokens cost when the agent needs a specific capability without preloading the full tool surface.
192
+
193
+ ### Skills (2)
194
+ `safari_run_skill` | `safari_list_skills` — invoke or enumerate plugin skills from `skills/`. Sub-step dispatch bypasses the security pipeline (the outer `safari_run_skill` call is fully secured; inner steps are not individually audited — accepted trade-off for nested skill flows).
195
+
196
+ ## Plugin Skills (8)
197
+
198
+ `safari_run_skill` and `safari_list_skills` consume `skills/*.SKILL.md` files. Eight ship today:
199
+
200
+ - **safari-pilot** (base) — entry-point overview of the tool surface.
201
+ - **login** — credential-flow strategy.
202
+ - **paginate-and-scrape** — multi-page extraction recipe.
203
+ - **robust-form-fill** — defensive form-fill with verify-on-readback.
204
+ - **evidence-grounded-screenshot** *(v0.1.31)* — procedural workflow: dismiss → scroll → screenshot.
205
+ - **dismiss-overlays-recovery** *(v0.1.31)* — strategy for recovering when extraction returns suspiciously short content.
206
+ - **visible-evidence-grounding** *(v0.1.31)* — strategy for grounding factual answers in current visible page state, not prior knowledge.
207
+ - **temporal-substitution** *(v0.1.31)* — strategy for substituting past-relative dates ("yesterday", "January 2024") with the nearest available equivalent today; pairs with the SessionStart hook's `Current date: YYYY-MM-DD` injection.
208
+
209
+ ## Slash Commands
210
+
211
+ - `/safari-pilot:start` — start the daemon + open Safari.
212
+ - `/safari-pilot:stop` — graceful shutdown.
213
+ - `/safari-pilot:stats` *(v0.1.31)* — local-only metrics summary over `~/.safari-pilot/trace.ndjson`. Per-tool count/error-rate/p50/p95, top errors, top domains. Supports `--since 7d|24h|all`, `--by-tool`, `--by-error`, `--by-domain`, `--tail`, `--json`. Test-only: `SAFARI_PILOT_TRACE_OVERRIDE=<path>` env var points at a fake trace file for hermeticity.
214
+
185
215
  ## Architecture
186
216
 
187
217
  ```
@@ -23,13 +23,13 @@
23
23
  <key>CFBundlePackageType</key>
24
24
  <string>APPL</string>
25
25
  <key>CFBundleShortVersionString</key>
26
- <string>0.1.29</string>
26
+ <string>0.1.33</string>
27
27
  <key>CFBundleSupportedPlatforms</key>
28
28
  <array>
29
29
  <string>MacOSX</string>
30
30
  </array>
31
31
  <key>CFBundleVersion</key>
32
- <string>202605060230</string>
32
+ <string>202605121922</string>
33
33
  <key>DTCompiler</key>
34
34
  <string>com.apple.compilers.llvm.clang.1_0</string>
35
35
  <key>DTPlatformBuild</key>
@@ -19,13 +19,13 @@
19
19
  <key>CFBundlePackageType</key>
20
20
  <string>XPC!</string>
21
21
  <key>CFBundleShortVersionString</key>
22
- <string>0.1.29</string>
22
+ <string>0.1.33</string>
23
23
  <key>CFBundleSupportedPlatforms</key>
24
24
  <array>
25
25
  <string>MacOSX</string>
26
26
  </array>
27
27
  <key>CFBundleVersion</key>
28
- <string>202605060230</string>
28
+ <string>202605121922</string>
29
29
  <key>DTCompiler</key>
30
30
  <string>com.apple.compilers.llvm.clang.1_0</string>
31
31
  <key>DTPlatformBuild</key>
@@ -368,6 +368,56 @@ async function executeCommand(cmd) {
368
368
  return result;
369
369
  }
370
370
 
371
+ // safari_take_screenshot — capture the visible viewport of the target tab
372
+ // via browser.tabs.captureVisibleTab. Triggered by the __SP_TAKE_SCREENSHOT__
373
+ // sentinel from src/tools/extraction.ts. Briefly activates target tab in its
374
+ // window (no Safari app activation), captures, restores prior active tab.
375
+ if (cmd.script === '__SP_TAKE_SCREENSHOT__') {
376
+ let prevActiveTabId = null;
377
+ try {
378
+ if (tab.windowId == null) {
379
+ throw { name: 'WINDOW_CLOSED', message: 'tab.windowId missing' };
380
+ }
381
+
382
+ // Snapshot the previous active tab so we can restore it.
383
+ const prevActive = await browser.tabs.query({ windowId: tab.windowId, active: true });
384
+ prevActiveTabId = prevActive[0]?.id ?? null;
385
+
386
+ // Activate the target tab if it isn't already active. tabs.update resolves
387
+ // before Safari's internal active-tab state settles, so we verify by
388
+ // polling tabs.query before the capture (TOCTOU narrows but doesn't close).
389
+ if (prevActiveTabId !== tab.id) {
390
+ await browser.tabs.update(tab.id, { active: true });
391
+ let activated = false;
392
+ for (let attempt = 0; attempt < 5; attempt++) {
393
+ await new Promise((r) => setTimeout(r, 40));
394
+ const check = await browser.tabs.query({ windowId: tab.windowId, active: true });
395
+ if (check[0]?.id === tab.id) { activated = true; break; }
396
+ }
397
+ if (!activated) {
398
+ throw { name: 'CAPTURE_RACE', message: 'target tab did not become active within 200ms' };
399
+ }
400
+ }
401
+
402
+ const dataUrl = await browser.tabs.captureVisibleTab(tab.windowId, { format: 'png' });
403
+ const commaIdx = dataUrl.indexOf(',');
404
+ const base64 = commaIdx >= 0 ? dataUrl.slice(commaIdx + 1) : dataUrl;
405
+
406
+ const result = { ok: true, value: base64 };
407
+ await updatePendingEntry(commandId, { status: 'completed', result });
408
+ return result;
409
+ } catch (e) {
410
+ const errName = e?.name && typeof e.name === 'string' ? e.name : 'CAPTURE_FAILED';
411
+ const result = { ok: false, error: { name: errName, message: e?.message ?? String(e) } };
412
+ await updatePendingEntry(commandId, { status: 'completed', result });
413
+ return result;
414
+ } finally {
415
+ if (prevActiveTabId != null && prevActiveTabId !== tab.id) {
416
+ try { await browser.tabs.update(prevActiveTabId, { active: true }); } catch { /* tab may have closed */ }
417
+ }
418
+ }
419
+ }
420
+
371
421
  // 5A.1 phase-0: file upload probe sentinel — dispatched via the storage bus
372
422
  // to content-isolated.js (Test A: content-script fetch; Test B: File structured-
373
423
  // clone). Unlike cookie/DNR sentinels (which execute entirely in background.js),
@@ -550,6 +550,161 @@
550
550
  break;
551
551
  }
552
552
  case 'execute_script': {
553
+ // ── EARLY INTERCEPT: __SP_SCROLL_TO_ELEMENT__:<json> (v0.1.31 Task 5) ──
554
+ // Sentinel-routed handler for safari_scroll_to_element. Sits at the
555
+ // top of the case so it runs before commandId caching and the
556
+ // _Function compile path. Errors thrown here flow through the outer
557
+ // catch → respond(false, { error: { message, name } }), which the
558
+ // daemon (ExtensionBridge.handleResult) maps to error.code on the
559
+ // Node side via StructuredError.code = error.name.
560
+ if (typeof params.script === 'string' && params.script.startsWith('__SP_SCROLL_TO_ELEMENT__:')) {
561
+ const args = JSON.parse(params.script.slice('__SP_SCROLL_TO_ELEMENT__:'.length));
562
+ const sel = args.selector;
563
+ const txt = args.text;
564
+ const role = args.role;
565
+ const name = args.name;
566
+ const nth = typeof args.nth === 'number' ? args.nth : 0;
567
+ const behavior = args.behavior === 'smooth' ? 'smooth' : 'instant';
568
+ const L = window.__SP_LOCATOR__;
569
+ if (!L) {
570
+ throw Object.assign(
571
+ new Error('locator.js not loaded in MAIN world'),
572
+ { name: 'TARGET_NOT_FOUND' },
573
+ );
574
+ }
575
+ const candidates = L.resolveScrollTargets({ selector: sel, text: txt, role, name });
576
+ if (candidates.length === 0) {
577
+ const hidden = L.resolveScrollTargets({ selector: sel, text: txt, role, name, includeHidden: true });
578
+ if (hidden.length > 0) {
579
+ throw Object.assign(
580
+ new Error('element exists but is not visible (display:none, hidden, or in closed <details>)'),
581
+ { name: 'TARGET_HIDDEN' },
582
+ );
583
+ }
584
+ throw Object.assign(
585
+ new Error('no element matched the provided locator'),
586
+ { name: 'TARGET_NOT_FOUND' },
587
+ );
588
+ }
589
+ if (nth >= candidates.length) {
590
+ throw Object.assign(
591
+ new Error('nth=' + nth + ' out of range (matchCount=' + candidates.length + ')'),
592
+ { name: 'INVALID_PARAMS' },
593
+ );
594
+ }
595
+ const target = candidates[nth];
596
+ const fromY = window.scrollY;
597
+ target.element.scrollIntoView({ behavior, block: 'center', inline: 'nearest' });
598
+ await L.waitForScrollSettle(500);
599
+ const matchedNode = L.serializeNode(target.element);
600
+ const allMatches = candidates.length > 1
601
+ ? candidates.slice(0, 5).map((c) => L.serializeNode(c.element, true))
602
+ : undefined;
603
+ result = {
604
+ scrolledTo: { strategy: target.strategy, matchedNode, matchCount: candidates.length, allMatches },
605
+ viewport: { scrollX: window.scrollX, scrollY: window.scrollY, innerWidth: window.innerWidth, innerHeight: window.innerHeight },
606
+ scrolledFromY: fromY,
607
+ };
608
+ break;
609
+ }
610
+ // ── EARLY INTERCEPT: __SP_DISMISS_OVERLAYS__:<json> (v0.1.31 Task 10) ──
611
+ // Sentinel-routed handler for safari_dismiss_overlays. Same Option A shape
612
+ // as the scroll intercept above: success → result = {...}; break;
613
+ // failure → throw with error.name → daemon maps to error.code.
614
+ if (typeof params.script === 'string' && params.script.startsWith('__SP_DISMISS_OVERLAYS__:')) {
615
+ try {
616
+ const args = JSON.parse(params.script.slice('__SP_DISMISS_OVERLAYS__:'.length));
617
+ const { categories, patterns, killSwitchEngaged, paywallEnabled } = args;
618
+ const L = window.__SP_LOCATOR__;
619
+ if (!L) {
620
+ throw Object.assign(
621
+ new Error('locator.js not loaded in MAIN world'),
622
+ { name: 'NO_LOCATOR' },
623
+ );
624
+ }
625
+ if (killSwitchEngaged) {
626
+ result = {
627
+ dismissed: [],
628
+ skipped: [{ reason: 'kill_switch_engaged' }],
629
+ overlaysAtStart: 0,
630
+ overlaysAtEnd: 0,
631
+ };
632
+ break;
633
+ }
634
+ const dismissed = [];
635
+ const skipped = [];
636
+ let overlaysAtStart = 0;
637
+ const filtered = (patterns || []).filter((p) => !categories || categories.includes(p.category));
638
+ for (const pattern of filtered) {
639
+ // Paywall opt-in gate
640
+ if (pattern.category === 'paywall' && !paywallEnabled) {
641
+ const root = L.findPatternRoot(pattern);
642
+ if (root) {
643
+ const sel = pattern.signals.find((s) => s.type === 'selector');
644
+ skipped.push({
645
+ reason: 'paywall_opt_in_required',
646
+ candidate: { selector: sel ? sel.value : undefined, category: 'paywall' },
647
+ });
648
+ }
649
+ continue;
650
+ }
651
+ const root = L.findPatternRoot(pattern);
652
+ if (!root) {
653
+ const sel = pattern.signals.find((s) => s.type === 'selector');
654
+ skipped.push({
655
+ reason: 'allowlist_miss',
656
+ candidate: { selector: sel ? sel.value : undefined, category: pattern.category },
657
+ });
658
+ continue;
659
+ }
660
+ overlaysAtStart++;
661
+ try {
662
+ const verifyResult = await L.dismissPattern(pattern, root);
663
+ if (!verifyResult.verified) {
664
+ const sel = pattern.signals.find((s) => s.type === 'selector');
665
+ skipped.push({
666
+ reason: 'verify_failed_overlay_persists',
667
+ candidate: { selector: sel ? sel.value : undefined, hint: pattern.id },
668
+ });
669
+ } else {
670
+ const sel = pattern.signals.find((s) => s.type === 'selector');
671
+ dismissed.push({
672
+ category: pattern.category,
673
+ id: pattern.id,
674
+ selector: sel ? sel.value : '',
675
+ action: pattern.dismiss.action,
676
+ site: window.location.hostname,
677
+ verified: true,
678
+ });
679
+ }
680
+ } catch (e) {
681
+ skipped.push({
682
+ reason: 'click_failed',
683
+ candidate: { hint: String((e && e.message) || e) },
684
+ });
685
+ }
686
+ }
687
+ // Recount remaining
688
+ let remaining = 0;
689
+ for (const p of filtered) { if (L.findPatternRoot(p)) remaining++; }
690
+ result = {
691
+ dismissed,
692
+ skipped,
693
+ overlaysAtStart,
694
+ overlaysAtEnd: remaining,
695
+ };
696
+ break;
697
+ } catch (e) {
698
+ // Re-throw with NO_LOCATOR semantic (any unexpected failure surfaces here).
699
+ // If it already has a .name, preserve it; otherwise tag NO_LOCATOR.
700
+ if (e && e.name && e.name !== 'Error') throw e;
701
+ throw Object.assign(
702
+ new Error(String((e && e.message) || e)),
703
+ { name: 'NO_LOCATOR' },
704
+ );
705
+ }
706
+ }
707
+ // ── existing default execute_script path ──
553
708
  const commandId = params.commandId;
554
709
  if (commandId && window.__safariPilotExecutedCommands.has(commandId)) {
555
710
  const cached = window.__safariPilotExecutedCommands.get(commandId);
@@ -0,0 +1,294 @@
1
+ // extension/locator.js — MAIN-world helpers for safari_scroll_to_element
2
+ // (v0.1.31 Task 5) and forthcoming safari_dismiss_overlays (v0.1.31 Task 10).
3
+ //
4
+ // This file is registered in manifest.json's MAIN-world content_scripts
5
+ // BEFORE content-main.js, so window.__SP_LOCATOR__ is guaranteed to exist
6
+ // by the time content-main.js processes any execute_script sentinel.
7
+ //
8
+ // Helpers:
9
+ // querySelectorWithShadow(selector, root) — single-element search through
10
+ // open shadow roots; returns first match or null.
11
+ // resolveScrollTargets({ selector, text, role, name, includeHidden }) —
12
+ // precedence selector > role+name > text; same-origin iframe traversal;
13
+ // visibility filter; returns [{ element, strategy }, ...].
14
+ // waitForScrollSettle(maxMs) — RAF-driven idle detection (50ms grace, capped
15
+ // at maxMs). Resolves when window.scrollY stops changing.
16
+ // serializeNode(el, shallow) — { tagName, role, text(80c), xpath, bbox }.
17
+
18
+ (function () {
19
+ 'use strict';
20
+
21
+ // ── querySelectorWithShadow: traverses open shadow roots ─────────────────
22
+ function querySelectorWithShadow(selector, root) {
23
+ root = root || document;
24
+ const direct = root.querySelector(selector);
25
+ if (direct) return direct;
26
+ const stack = [root];
27
+ while (stack.length) {
28
+ const node = stack.pop();
29
+ const children = node.querySelectorAll ? node.querySelectorAll('*') : [];
30
+ for (const el of children) {
31
+ if (el.shadowRoot && el.shadowRoot.mode === 'open') {
32
+ const found = el.shadowRoot.querySelector(selector);
33
+ if (found) return found;
34
+ stack.push(el.shadowRoot);
35
+ }
36
+ }
37
+ }
38
+ return null;
39
+ }
40
+
41
+ // ── resolveInDoc: helper for same-origin iframe traversal ───────────────
42
+ function resolveInDoc(doc, opts) {
43
+ const { selector, text, role, name } = opts;
44
+ const out = [];
45
+ if (selector) {
46
+ out.push(...Array.from(doc.querySelectorAll(selector)));
47
+ } else if (role) {
48
+ const m = Array.from(doc.querySelectorAll('[role="' + role + '"]'));
49
+ if (name) {
50
+ const needle = name.toLowerCase();
51
+ for (const el of m) {
52
+ const accName = (el.getAttribute('aria-label') || el.textContent || '').trim().toLowerCase();
53
+ if (accName.includes(needle)) out.push(el);
54
+ }
55
+ } else {
56
+ out.push(...m);
57
+ }
58
+ } else if (text) {
59
+ const needle = text.toLowerCase().replace(/\s+/g, ' ').trim();
60
+ const all = doc.querySelectorAll('body *:not(script):not(style)');
61
+ for (const el of all) {
62
+ const directText = Array.from(el.childNodes)
63
+ .filter((n) => n.nodeType === 3)
64
+ .map((n) => n.textContent || '')
65
+ .join('')
66
+ .toLowerCase()
67
+ .replace(/\s+/g, ' ')
68
+ .trim();
69
+ if (directText.includes(needle)) out.push(el);
70
+ }
71
+ }
72
+ return out;
73
+ }
74
+
75
+ // ── resolveScrollTargets: precedence selector > role+name > text ─────────
76
+ function resolveScrollTargets(opts) {
77
+ opts = opts || {};
78
+ const { selector, text, role, name } = opts;
79
+ const includeHidden = opts.includeHidden === true;
80
+ let candidates = [];
81
+ let strategy = null;
82
+
83
+ if (selector) {
84
+ strategy = 'selector';
85
+ candidates = Array.from(document.querySelectorAll(selector));
86
+ } else if (role) {
87
+ strategy = 'role';
88
+ const roleMatches = Array.from(document.querySelectorAll('[role="' + role + '"]'));
89
+ if (name) {
90
+ const needle = name.toLowerCase();
91
+ candidates = roleMatches.filter((el) => {
92
+ const accName = (el.getAttribute('aria-label') || el.textContent || '').trim().toLowerCase();
93
+ return accName.includes(needle);
94
+ });
95
+ } else {
96
+ candidates = roleMatches;
97
+ }
98
+ } else if (text) {
99
+ strategy = 'text';
100
+ const needle = text.toLowerCase().replace(/\s+/g, ' ').trim();
101
+ const all = document.querySelectorAll('body *:not(script):not(style)');
102
+ candidates = Array.from(all).filter((el) => {
103
+ const directText = Array.from(el.childNodes)
104
+ .filter((n) => n.nodeType === 3)
105
+ .map((n) => n.textContent || '')
106
+ .join('')
107
+ .toLowerCase()
108
+ .replace(/\s+/g, ' ')
109
+ .trim();
110
+ return directText.includes(needle);
111
+ });
112
+ }
113
+
114
+ // Same-origin iframe traversal — cross-origin frames silently skip
115
+ // (yields TARGET_NOT_FOUND when no frame matches, per spec).
116
+ const frames = document.querySelectorAll('iframe');
117
+ for (const frame of frames) {
118
+ let frameDoc = null;
119
+ try { frameDoc = frame.contentDocument; } catch (_e) { frameDoc = null; }
120
+ if (!frameDoc) continue;
121
+ candidates.push(...resolveInDoc(frameDoc, { selector, text, role, name }));
122
+ }
123
+
124
+ // Visibility filter (skipped when includeHidden=true)
125
+ const filtered = candidates
126
+ .filter((el) => el && el.nodeType === 1)
127
+ .filter((el) => {
128
+ if (includeHidden) return true;
129
+ if (el.offsetParent === null) return false;
130
+ const rect = el.getBoundingClientRect();
131
+ return rect.height > 0 && rect.width > 0;
132
+ });
133
+
134
+ return filtered.map((element) => ({ element, strategy }));
135
+ }
136
+
137
+ // ── waitForScrollSettle: RAF + 50ms grace, capped at maxMs ──────────────
138
+ function waitForScrollSettle(maxMs) {
139
+ const cap = typeof maxMs === 'number' ? maxMs : 500;
140
+ return new Promise((resolve) => {
141
+ let lastY = window.scrollY;
142
+ const start = Date.now();
143
+ function tick() {
144
+ if (Date.now() - start >= cap) { resolve(); return; }
145
+ const currentY = window.scrollY;
146
+ if (currentY === lastY) {
147
+ setTimeout(resolve, 50);
148
+ } else {
149
+ lastY = currentY;
150
+ requestAnimationFrame(tick);
151
+ }
152
+ }
153
+ requestAnimationFrame(tick);
154
+ });
155
+ }
156
+
157
+ // ── computeXPath: minimal positional XPath ──────────────────────────────
158
+ function computeXPath(el) {
159
+ if (el.id) return '//*[@id="' + el.id + '"]';
160
+ const parts = [];
161
+ let cur = el;
162
+ while (cur && cur.nodeType === 1 && cur !== document.body) {
163
+ let idx = 1;
164
+ let sib = cur.previousElementSibling;
165
+ while (sib) {
166
+ if (sib.tagName === cur.tagName) idx++;
167
+ sib = sib.previousElementSibling;
168
+ }
169
+ parts.unshift(cur.tagName.toLowerCase() + '[' + idx + ']');
170
+ cur = cur.parentElement;
171
+ }
172
+ return '/html/body/' + parts.join('/');
173
+ }
174
+
175
+ // ── serializeNode: small JSON-safe element descriptor ───────────────────
176
+ function serializeNode(el, shallow) {
177
+ const text = (el.textContent || '').trim().slice(0, 80);
178
+ const role = el.getAttribute('role') || undefined;
179
+ const out = {
180
+ tagName: el.tagName.toLowerCase(),
181
+ role,
182
+ text,
183
+ };
184
+ if (!shallow) {
185
+ const rect = el.getBoundingClientRect();
186
+ out.xpath = computeXPath(el);
187
+ out.bbox = { x: rect.x, y: rect.y, width: rect.width, height: rect.height };
188
+ } else {
189
+ out.xpath = '';
190
+ }
191
+ return out;
192
+ }
193
+
194
+ // ── matchSignal: does element satisfy a single signal? ──────────────────
195
+ function matchSignal(el, signal) {
196
+ switch (signal.type) {
197
+ case 'selector':
198
+ // Element-matches, not document-querySelector. The latter returns false
199
+ // for shadow-encapsulated elements because hostDoc is the outer light-DOM
200
+ // document. el.matches() works in both shadow and light DOM.
201
+ return !!(el.matches && el.matches(signal.value));
202
+ case 'aria-label-substring': {
203
+ const label = (el.getAttribute && el.getAttribute('aria-label')) || '';
204
+ const v = signal.caseInsensitive ? signal.value.toLowerCase() : signal.value;
205
+ const l = signal.caseInsensitive ? label.toLowerCase() : label;
206
+ return l.includes(v);
207
+ }
208
+ case 'aria-role':
209
+ return (el.getAttribute && el.getAttribute('role')) === signal.value;
210
+ case 'fixed-position': {
211
+ const cs = el.ownerDocument.defaultView.getComputedStyle(el);
212
+ return cs.position === 'fixed';
213
+ }
214
+ case 'z-index-above': {
215
+ const cs = el.ownerDocument.defaultView.getComputedStyle(el);
216
+ const z = parseInt(cs.zIndex, 10);
217
+ return Number.isFinite(z) && z > parseInt(signal.value, 10);
218
+ }
219
+ default:
220
+ return false;
221
+ }
222
+ }
223
+
224
+ // ── findPatternRoot: finds the first element matching ALL signals ──────
225
+ function findPatternRoot(pattern) {
226
+ // Primary signal is selector if present
227
+ const primarySignal = pattern.signals.find((s) => s.type === 'selector');
228
+ const primarySelector = primarySignal ? primarySignal.value : '*';
229
+ const candidates = [];
230
+ // Main document (with shadow penetration)
231
+ const mainCandidate = querySelectorWithShadow(primarySelector);
232
+ if (mainCandidate) candidates.push(mainCandidate);
233
+ // Same-origin iframes
234
+ const frames = document.querySelectorAll('iframe');
235
+ for (const frame of frames) {
236
+ let frameDoc = null;
237
+ try { frameDoc = frame.contentDocument; } catch (_e) { continue; }
238
+ if (!frameDoc) continue;
239
+ const c = frameDoc.querySelector(primarySelector);
240
+ if (c) candidates.push(c);
241
+ }
242
+ for (const el of candidates) {
243
+ const allMatch = pattern.signals.every((s) => matchSignal(el, s));
244
+ if (allMatch) return el;
245
+ }
246
+ return null;
247
+ }
248
+
249
+ // ── dismissPattern: execute the dismiss action, verify removal ─────────
250
+ async function dismissPattern(pattern, root) {
251
+ const action = pattern.dismiss.action;
252
+ let actionExecuted = false;
253
+ try {
254
+ if (action === 'click') {
255
+ const target = (pattern.dismiss.selector
256
+ ? root.ownerDocument.querySelector(pattern.dismiss.selector) || querySelectorWithShadow(pattern.dismiss.selector)
257
+ : root);
258
+ if (target) { target.click(); actionExecuted = true; }
259
+ } else if (action === 'esc-key') {
260
+ const evt = new KeyboardEvent('keydown', { key: 'Escape', bubbles: true });
261
+ document.dispatchEvent(evt); actionExecuted = true;
262
+ } else if (action === 'remove-node') {
263
+ const target = pattern.dismiss.selector
264
+ ? root.ownerDocument.querySelector(pattern.dismiss.selector) || querySelectorWithShadow(pattern.dismiss.selector)
265
+ : root;
266
+ if (target && target.parentNode) { target.parentNode.removeChild(target); actionExecuted = true; }
267
+ }
268
+ } catch (e) {
269
+ // try fallback if defined
270
+ if (pattern.dismiss.fallbackAction) {
271
+ return dismissPattern({ ...pattern, dismiss: { action: pattern.dismiss.fallbackAction, selector: pattern.dismiss.fallbackSelector } }, root);
272
+ }
273
+ throw e;
274
+ }
275
+ if (!actionExecuted && pattern.dismiss.fallbackAction) {
276
+ return dismissPattern({ ...pattern, dismiss: { action: pattern.dismiss.fallbackAction, selector: pattern.dismiss.fallbackSelector } }, root);
277
+ }
278
+ // Verify after stabilityMs
279
+ await new Promise((r) => setTimeout(r, pattern.verify.stabilityMs));
280
+ const stillThere = findPatternRoot(pattern);
281
+ return { verified: !stillThere };
282
+ }
283
+
284
+ // Expose on window for content-main.js sentinel intercepts.
285
+ window.__SP_LOCATOR__ = {
286
+ querySelectorWithShadow,
287
+ resolveScrollTargets,
288
+ waitForScrollSettle,
289
+ serializeNode,
290
+ matchSignal,
291
+ findPatternRoot,
292
+ dismissPattern,
293
+ };
294
+ })();
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "manifest_version": 3,
3
3
  "name": "Safari Pilot",
4
- "version": "0.1.29",
4
+ "version": "0.1.33",
5
5
  "description": "Native Safari automation for AI agents",
6
6
  "permissions": [
7
7
  "activeTab",
@@ -44,6 +44,7 @@
44
44
  "<all_urls>"
45
45
  ],
46
46
  "js": [
47
+ "locator.js",
47
48
  "content-main.js"
48
49
  ],
49
50
  "run_at": "document_idle",