autokap 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. package/assets/cursors/macos.svg +4 -0
  2. package/assets/cursors/windows.svg +15 -0
  3. package/assets/skill/OPCODE-REFERENCE.md +607 -0
  4. package/assets/skill/README.md +39 -0
  5. package/assets/skill/SKILL.md +453 -468
  6. package/assets/skill/STUDIO-SKILL.md +476 -0
  7. package/assets/skill/references/examples.md +104 -0
  8. package/assets/skill/references/interactive-demo.md +225 -0
  9. package/assets/skill/references/mock-data.md +178 -0
  10. package/dist/action-verifier.d.ts +29 -0
  11. package/dist/action-verifier.js +133 -0
  12. package/dist/agent-action-recovery.d.ts +45 -0
  13. package/dist/agent-action-recovery.js +370 -0
  14. package/dist/agent-message-utils.d.ts +21 -0
  15. package/dist/agent-message-utils.js +77 -0
  16. package/dist/agent-url-utils.d.ts +30 -0
  17. package/dist/agent-url-utils.js +138 -0
  18. package/dist/agent.d.ts +92 -8
  19. package/dist/agent.js +2936 -781
  20. package/dist/ak-tree.d.ts +39 -0
  21. package/dist/ak-tree.js +368 -0
  22. package/dist/alt-text.d.ts +26 -0
  23. package/dist/alt-text.js +55 -0
  24. package/dist/auth-capture.d.ts +17 -0
  25. package/dist/auth-capture.js +164 -0
  26. package/dist/benchmark.d.ts +59 -0
  27. package/dist/benchmark.js +135 -0
  28. package/dist/browser-bar.d.ts +14 -6
  29. package/dist/browser-bar.js +145 -8
  30. package/dist/browser-pool.d.ts +7 -0
  31. package/dist/browser-pool.js +15 -5
  32. package/dist/browser-utils.d.ts +31 -0
  33. package/dist/browser-utils.js +97 -0
  34. package/dist/browser.d.ts +51 -1
  35. package/dist/browser.js +1481 -31
  36. package/dist/capture-alt-text.js +2 -1
  37. package/dist/capture-language-preflight.js +14 -0
  38. package/dist/capture-llm-page-identity.js +22 -10
  39. package/dist/capture-page-identity.d.ts +5 -7
  40. package/dist/capture-page-identity.js +211 -78
  41. package/dist/capture-preset-credentials.d.ts +50 -0
  42. package/dist/capture-preset-credentials.js +127 -0
  43. package/dist/capture-request-plan.d.ts +2 -2
  44. package/dist/capture-request-plan.js +64 -16
  45. package/dist/capture-run-optimizer.js +48 -33
  46. package/dist/capture-selector-memory.d.ts +5 -0
  47. package/dist/capture-selector-memory.js +18 -0
  48. package/dist/capture-strategy.d.ts +36 -0
  49. package/dist/capture-strategy.js +95 -0
  50. package/dist/capture-studio-sync.d.ts +1 -0
  51. package/dist/capture-studio-sync.js +9 -3
  52. package/dist/capture-surface-contract.d.ts +36 -0
  53. package/dist/capture-surface-contract.js +299 -0
  54. package/dist/capture-transition-engine.d.ts +28 -0
  55. package/dist/capture-transition-engine.js +292 -0
  56. package/dist/capture-variant-state.d.ts +2 -0
  57. package/dist/capture-variant-state.js +26 -0
  58. package/dist/capture-verification.d.ts +35 -0
  59. package/dist/capture-verification.js +95 -0
  60. package/dist/capture-viewport-lock.d.ts +48 -0
  61. package/dist/capture-viewport-lock.js +74 -0
  62. package/dist/circuit-breaker.d.ts +42 -0
  63. package/dist/circuit-breaker.js +119 -0
  64. package/dist/cli-config.d.ts +8 -1
  65. package/dist/cli-config.js +62 -6
  66. package/dist/cli-contract.d.ts +15 -0
  67. package/dist/cli-contract.js +167 -0
  68. package/dist/cli-runner-local.d.ts +12 -0
  69. package/dist/cli-runner-local.js +102 -0
  70. package/dist/cli-runner.d.ts +34 -0
  71. package/dist/cli-runner.js +433 -0
  72. package/dist/cli-utils.d.ts +0 -1
  73. package/dist/cli-utils.js +2 -5
  74. package/dist/cli.js +1005 -267
  75. package/dist/clip-orchestrator.js +9 -2
  76. package/dist/clip-postprocess.js +25 -16
  77. package/dist/cookie-dismiss.d.ts +2 -0
  78. package/dist/cookie-dismiss.js +48 -13
  79. package/dist/cost-logging.d.ts +8 -0
  80. package/dist/cost-logging.js +160 -46
  81. package/dist/cost-resolution-monitor.d.ts +16 -0
  82. package/dist/cost-resolution-monitor.js +34 -0
  83. package/dist/credential-templates.js +2 -2
  84. package/dist/cursor-overlay-script.d.ts +6 -0
  85. package/dist/cursor-overlay-script.js +169 -0
  86. package/dist/dom-css-purger.d.ts +65 -0
  87. package/dist/dom-css-purger.js +333 -0
  88. package/dist/dom-font-inliner.d.ts +45 -0
  89. package/dist/dom-font-inliner.js +148 -0
  90. package/dist/dom-patch-resolver.d.ts +52 -0
  91. package/dist/dom-patch-resolver.js +242 -0
  92. package/dist/dom-serializer.d.ts +82 -0
  93. package/dist/dom-serializer.js +378 -0
  94. package/dist/element-capture.d.ts +1 -41
  95. package/dist/element-capture.js +202 -446
  96. package/dist/env-validation.d.ts +5 -0
  97. package/dist/env-validation.js +29 -0
  98. package/dist/execution-schema.d.ts +4423 -0
  99. package/dist/execution-schema.js +507 -0
  100. package/dist/execution-types.d.ts +886 -0
  101. package/dist/execution-types.js +65 -0
  102. package/dist/fonts-loader.d.ts +14 -0
  103. package/dist/fonts-loader.js +55 -0
  104. package/dist/hybrid-navigator.js +12 -12
  105. package/dist/index.d.ts +9 -6
  106. package/dist/index.js +10 -4
  107. package/dist/legacy/agent-action-recovery.d.ts +45 -0
  108. package/dist/legacy/agent-action-recovery.js +370 -0
  109. package/dist/legacy/agent-message-utils.d.ts +21 -0
  110. package/dist/legacy/agent-message-utils.js +77 -0
  111. package/dist/legacy/agent-url-utils.d.ts +30 -0
  112. package/dist/legacy/agent-url-utils.js +138 -0
  113. package/dist/legacy/agent.d.ts +226 -0
  114. package/dist/legacy/agent.js +6666 -0
  115. package/dist/legacy/clip-orchestrator.d.ts +148 -0
  116. package/dist/legacy/clip-orchestrator.js +957 -0
  117. package/dist/legacy/credential-templates.d.ts +5 -0
  118. package/dist/legacy/credential-templates.js +60 -0
  119. package/dist/legacy/hybrid-navigator.d.ts +138 -0
  120. package/dist/legacy/hybrid-navigator.js +468 -0
  121. package/dist/legacy/llm-usage.d.ts +17 -0
  122. package/dist/legacy/llm-usage.js +45 -0
  123. package/dist/legacy/prompt-cache.d.ts +10 -0
  124. package/dist/legacy/prompt-cache.js +24 -0
  125. package/dist/legacy/prompts.d.ts +175 -0
  126. package/dist/legacy/prompts.js +1038 -0
  127. package/dist/legacy/tools.d.ts +4 -0
  128. package/dist/legacy/tools.js +216 -0
  129. package/dist/legacy/video-agent.d.ts +143 -0
  130. package/dist/legacy/video-agent.js +4788 -0
  131. package/dist/legacy/video-observation.d.ts +36 -0
  132. package/dist/legacy/video-observation.js +192 -0
  133. package/dist/legacy/video-planner.d.ts +12 -0
  134. package/dist/legacy/video-planner.js +501 -0
  135. package/dist/legacy/video-prompts.d.ts +37 -0
  136. package/dist/legacy/video-prompts.js +569 -0
  137. package/dist/legacy/video-tools.d.ts +3 -0
  138. package/dist/legacy/video-tools.js +59 -0
  139. package/dist/legacy/video-variant-state.d.ts +29 -0
  140. package/dist/legacy/video-variant-state.js +80 -0
  141. package/dist/legacy/vision-model.d.ts +17 -0
  142. package/dist/legacy/vision-model.js +74 -0
  143. package/dist/llm-healer.d.ts +63 -0
  144. package/dist/llm-healer.js +166 -0
  145. package/dist/llm-provider.d.ts +29 -0
  146. package/dist/llm-provider.js +80 -0
  147. package/dist/logger.d.ts +6 -2
  148. package/dist/logger.js +15 -1
  149. package/dist/mockup-html.js +35 -25
  150. package/dist/mockup.d.ts +95 -2
  151. package/dist/mockup.js +427 -166
  152. package/dist/mouse-animation.d.ts +2 -2
  153. package/dist/mouse-animation.js +34 -20
  154. package/dist/opcode-actions.d.ts +42 -0
  155. package/dist/opcode-actions.js +511 -0
  156. package/dist/opcode-runner.d.ts +51 -0
  157. package/dist/opcode-runner.js +770 -0
  158. package/dist/openrouter-client.d.ts +40 -0
  159. package/dist/openrouter-client.js +16 -0
  160. package/dist/overlay-engine.d.ts +24 -0
  161. package/dist/overlay-engine.js +176 -0
  162. package/dist/postcondition.d.ts +16 -0
  163. package/dist/postcondition.js +269 -0
  164. package/dist/program-patcher.d.ts +25 -0
  165. package/dist/program-patcher.js +44 -0
  166. package/dist/prompts.d.ts +13 -5
  167. package/dist/prompts.js +224 -351
  168. package/dist/provider-config.d.ts +12 -0
  169. package/dist/provider-config.js +15 -0
  170. package/dist/recovery-chain.d.ts +37 -0
  171. package/dist/recovery-chain.js +350 -0
  172. package/dist/remote-browser.d.ts +28 -4
  173. package/dist/remote-browser.js +60 -5
  174. package/dist/safari-browser-bar.d.ts +15 -0
  175. package/dist/safari-browser-bar.js +95 -0
  176. package/dist/safari-toolbar-asset.d.ts +15 -0
  177. package/dist/safari-toolbar-asset.js +12 -0
  178. package/dist/security.d.ts +2 -1
  179. package/dist/security.js +49 -10
  180. package/dist/selector-resolver.d.ts +34 -0
  181. package/dist/selector-resolver.js +181 -0
  182. package/dist/semantic-resolver.d.ts +35 -0
  183. package/dist/semantic-resolver.js +161 -0
  184. package/dist/server-capture-runtime.d.ts +5 -3
  185. package/dist/server-capture-runtime.js +42 -95
  186. package/dist/server-credit-usage.d.ts +2 -2
  187. package/dist/server-project-webhooks.d.ts +15 -1
  188. package/dist/server-project-webhooks.js +34 -8
  189. package/dist/server-screenshot-watermark.js +27 -5
  190. package/dist/session-profile.js +164 -1
  191. package/dist/sf-pro-symbols.d.ts +1 -0
  192. package/dist/sf-pro-symbols.js +55 -0
  193. package/dist/skill-packaging.d.ts +28 -0
  194. package/dist/skill-packaging.js +169 -0
  195. package/dist/smart-wait.d.ts +27 -0
  196. package/dist/smart-wait.js +81 -0
  197. package/dist/status-bar-render.d.ts +20 -0
  198. package/dist/status-bar-render.js +410 -0
  199. package/dist/status-bar.d.ts +9 -0
  200. package/dist/status-bar.js +298 -14
  201. package/dist/svg-browser-bar.d.ts +33 -0
  202. package/dist/svg-browser-bar.js +206 -0
  203. package/dist/svg-status-bar.d.ts +36 -0
  204. package/dist/svg-status-bar.js +597 -0
  205. package/dist/svg-text.d.ts +61 -0
  206. package/dist/svg-text.js +118 -0
  207. package/dist/tools.js +89 -451
  208. package/dist/types.d.ts +240 -5
  209. package/dist/types.js +23 -1
  210. package/dist/v2/action-verifier.d.ts +29 -0
  211. package/dist/v2/action-verifier.js +133 -0
  212. package/dist/v2/alt-text.d.ts +26 -0
  213. package/dist/v2/alt-text.js +55 -0
  214. package/dist/v2/benchmark.d.ts +59 -0
  215. package/dist/v2/benchmark.js +135 -0
  216. package/dist/v2/capture-strategy.d.ts +30 -0
  217. package/dist/v2/capture-strategy.js +67 -0
  218. package/dist/v2/capture-verification.d.ts +35 -0
  219. package/dist/v2/capture-verification.js +95 -0
  220. package/dist/v2/circuit-breaker.d.ts +42 -0
  221. package/dist/v2/circuit-breaker.js +119 -0
  222. package/dist/v2/cli-runner-local.d.ts +11 -0
  223. package/dist/v2/cli-runner-local.js +91 -0
  224. package/dist/v2/cli-runner.d.ts +34 -0
  225. package/dist/v2/cli-runner.js +300 -0
  226. package/dist/v2/compiler-prompts.d.ts +27 -0
  227. package/dist/v2/compiler-prompts.js +123 -0
  228. package/dist/v2/compiler.d.ts +37 -0
  229. package/dist/v2/compiler.js +147 -0
  230. package/dist/v2/explorer.d.ts +41 -0
  231. package/dist/v2/explorer.js +56 -0
  232. package/dist/v2/index.d.ts +37 -0
  233. package/dist/v2/index.js +31 -0
  234. package/dist/v2/llm-healer.d.ts +62 -0
  235. package/dist/v2/llm-healer.js +166 -0
  236. package/dist/v2/llm-provider.d.ts +29 -0
  237. package/dist/v2/llm-provider.js +80 -0
  238. package/dist/v2/opcode-runner.d.ts +47 -0
  239. package/dist/v2/opcode-runner.js +634 -0
  240. package/dist/v2/overlay-engine.d.ts +24 -0
  241. package/dist/v2/overlay-engine.js +150 -0
  242. package/dist/v2/postcondition.d.ts +16 -0
  243. package/dist/v2/postcondition.js +249 -0
  244. package/dist/v2/program-patcher.d.ts +25 -0
  245. package/dist/v2/program-patcher.js +44 -0
  246. package/dist/v2/recovery-chain.d.ts +30 -0
  247. package/dist/v2/recovery-chain.js +368 -0
  248. package/dist/v2/schema.d.ts +2580 -0
  249. package/dist/v2/schema.js +295 -0
  250. package/dist/v2/selector-resolver.d.ts +34 -0
  251. package/dist/v2/selector-resolver.js +181 -0
  252. package/dist/v2/semantic-resolver.d.ts +35 -0
  253. package/dist/v2/semantic-resolver.js +161 -0
  254. package/dist/v2/smart-wait.d.ts +27 -0
  255. package/dist/v2/smart-wait.js +81 -0
  256. package/dist/v2/types.d.ts +444 -0
  257. package/dist/v2/types.js +19 -0
  258. package/dist/v2/web-playwright-local.d.ts +69 -0
  259. package/dist/v2/web-playwright-local.js +392 -0
  260. package/dist/version.d.ts +1 -0
  261. package/dist/version.js +5 -0
  262. package/dist/video-agent.js +18 -13
  263. package/dist/video-planner.js +2 -1
  264. package/dist/video-prompts.js +3 -3
  265. package/dist/web-playwright-local.d.ts +126 -0
  266. package/dist/web-playwright-local.js +819 -0
  267. package/dist/ws-auth.js +4 -1
  268. package/dist/ws-broadcast.d.ts +34 -0
  269. package/dist/ws-broadcast.js +85 -0
  270. package/dist/ws-connection-limits.d.ts +12 -0
  271. package/dist/ws-connection-limits.js +44 -0
  272. package/dist/ws-handler-utils.d.ts +32 -0
  273. package/dist/ws-handler-utils.js +139 -0
  274. package/dist/ws-handler.js +294 -164
  275. package/dist/ws-metrics-server.d.ts +9 -0
  276. package/dist/ws-metrics-server.js +31 -0
  277. package/dist/ws-server.js +41 -1
  278. package/package.json +51 -34
package/dist/browser.js CHANGED
@@ -1,6 +1,7 @@
1
1
  import { chromium } from 'playwright';
2
2
  import sharp from 'sharp';
3
3
  import { createHash } from 'crypto';
4
+ import { buildAKNodeRuntimeIndex, deriveInteractiveElementsFromAKTree, disambiguateFingerprint, focusAKTree, fingerprintAKNode, serializeAKTree, } from './ak-tree.js';
4
5
  /**
5
6
  * Set-of-Marks (SoM) annotation: overlays colored [N] badges on each visible
6
7
  * interactive element so the vision model can reference elements by their badge index.
@@ -95,8 +96,9 @@ function resolveEffectivePadding(config, bbox) {
95
96
  left = config.paddingLeft;
96
97
  return { top, right, bottom, left };
97
98
  }
98
- import { dismissCookiesAndWidgets } from './cookie-dismiss.js';
99
+ import { dismissCookiesAndWidgets, ensureCaptureHideStyles } from './cookie-dismiss.js';
99
100
  import { CHROMIUM_ARGS, browserPool } from './browser-pool.js';
101
+ import { logger } from './logger.js';
100
102
  async function withHelperTimeout(label, timeoutMs, work) {
101
103
  if (!timeoutMs || timeoutMs <= 0) {
102
104
  return work();
@@ -272,6 +274,22 @@ export function describeObservationChange(before, after) {
272
274
  && !changes.some((entry) => entry.startsWith('URL ->'))) {
273
275
  changes.push(`visible text changed to "${after.textSample.slice(0, 80)}"`);
274
276
  }
277
+ if (after.overlaySurface !== before.overlaySurface
278
+ && !changes.some((entry) => entry.startsWith('visible text changed'))) {
279
+ changes.push(after.overlaySurface
280
+ ? `overlay surface -> "${after.overlaySurface.slice(0, 80)}"`
281
+ : 'overlay surface cleared');
282
+ }
283
+ if (after.configurationSurface !== before.configurationSurface
284
+ && !changes.some((entry) => entry.startsWith('visible text changed'))) {
285
+ changes.push(after.configurationSurface
286
+ ? `configuration surface -> "${after.configurationSurface.slice(0, 80)}"`
287
+ : 'configuration surface cleared');
288
+ }
289
+ if (after.strongSurfaceSignature !== before.strongSurfaceSignature
290
+ && !changes.some((entry) => entry.startsWith('visible text changed'))) {
291
+ changes.push('content surface changed');
292
+ }
275
293
  return {
276
294
  before,
277
295
  after,
@@ -287,6 +305,7 @@ export class Browser {
287
305
  context = null;
288
306
  page = null;
289
307
  elementMap = new Map();
308
+ akNodeIndex = new Map();
290
309
  poolContext = false;
291
310
  constructor(options) {
292
311
  this.options = options;
@@ -324,7 +343,7 @@ export class Browser {
324
343
  // Dedicated browser process for video — cannot use the pool because
325
344
  // `recordVideo` must be set at context creation time.
326
345
  instance.browser = await chromium.launch({
327
- headless: true,
346
+ headless: !options.headed,
328
347
  args: CHROMIUM_ARGS,
329
348
  });
330
349
  instance.context = await instance.browser.newContext({
@@ -360,14 +379,54 @@ export class Browser {
360
379
  }
361
380
  async launch() {
362
381
  // Pool mode: context and page are already initialized via fromPool()
363
- if (this.poolContext)
382
+ if (this.poolContext) {
383
+ this.attachDebugLifecycleListeners();
364
384
  return;
385
+ }
365
386
  this.browser = await chromium.launch({
366
387
  headless: !this.options.headed,
367
388
  args: CHROMIUM_ARGS,
368
389
  });
369
390
  this.context = await this.browser.newContext(this.buildContextOptions());
370
391
  this.page = await this.context.newPage();
392
+ this.attachDebugLifecycleListeners();
393
+ }
394
+ /** Wires debug-only lifecycle listeners on the page/context/browser to surface
395
+ * navigations, console errors, page crashes, and unexpected closures.
396
+ * No-op when debug logging is disabled. */
397
+ attachDebugLifecycleListeners() {
398
+ if (!this.page || !this.context)
399
+ return;
400
+ const page = this.page;
401
+ const context = this.context;
402
+ page.on('crash', () => {
403
+ logger.debug(`[page] CRASH on ${page.url()}`);
404
+ });
405
+ page.on('close', () => {
406
+ logger.debug(`[page] CLOSE event on ${page.url()}`);
407
+ });
408
+ page.on('framenavigated', (frame) => {
409
+ if (frame === page.mainFrame()) {
410
+ logger.debug(`[page] navigated to ${frame.url()}`);
411
+ }
412
+ });
413
+ page.on('pageerror', (err) => {
414
+ logger.debug(`[page] pageerror: ${err.message.split('\n')[0]}`);
415
+ });
416
+ page.on('console', (msg) => {
417
+ const type = msg.type();
418
+ if (type === 'error' || type === 'warning') {
419
+ logger.debug(`[page] console.${type}: ${msg.text().slice(0, 200)}`);
420
+ }
421
+ });
422
+ context.on('close', () => {
423
+ logger.debug(`[context] CLOSE event`);
424
+ });
425
+ if (this.browser) {
426
+ this.browser.on('disconnected', () => {
427
+ logger.debug(`[browser] DISCONNECTED`);
428
+ });
429
+ }
371
430
  }
372
431
  async recreateContext(options = {}) {
373
432
  if (this.poolContext) {
@@ -456,10 +515,42 @@ export class Browser {
456
515
  }
457
516
  async navigateTo(url) {
458
517
  const page = this.ensurePage();
518
+ // Reject empty / whitespace URLs early — without this guard the
519
+ // `'https://' + url` fallback below produces "https://" which Playwright
520
+ // accepts but then crashes on with a confusing "Cannot navigate to invalid URL".
521
+ const trimmedUrl = (url ?? '').trim();
522
+ if (!trimmedUrl) {
523
+ throw new Error('Browser.navigateTo: empty URL. Check that all `{{loginUrl}}` / `{{email}}` ' +
524
+ 'placeholders in your NAVIGATE opcodes are backed by valid credentials on the preset.');
525
+ }
526
+ url = trimmedUrl;
459
527
  // Fix common URL typos (htpps, hhtps, htps, etc.)
460
528
  url = url.replace(/^h+t+p+s?:\/\//i, (m) => /s/i.test(m) ? 'https://' : 'http://');
461
529
  if (!/^https?:\/\//i.test(url)) {
462
- url = 'https://' + url;
530
+ // Resolve relative URLs (e.g. "/home", "dashboard") against the current page origin
531
+ if (url.startsWith('/')) {
532
+ const currentUrl = page.url();
533
+ try {
534
+ const origin = new URL(currentUrl).origin;
535
+ url = `${origin}${url}`;
536
+ }
537
+ catch {
538
+ url = 'https://' + url;
539
+ }
540
+ }
541
+ else {
542
+ url = 'https://' + url;
543
+ }
544
+ }
545
+ // Final sanity check: must have a host after all the auto-prepend logic.
546
+ try {
547
+ const parsed = new URL(url);
548
+ if (!parsed.host) {
549
+ throw new Error(`Browser.navigateTo: resolved URL "${url}" has no host`);
550
+ }
551
+ }
552
+ catch (err) {
553
+ throw new Error(`Browser.navigateTo: invalid URL "${url}" — ${err instanceof Error ? err.message : String(err)}`);
463
554
  }
464
555
  try {
465
556
  await page.goto(url, { waitUntil: 'load', timeout: 20000 });
@@ -504,31 +595,26 @@ export class Browser {
504
595
  const page = this.ensurePage();
505
596
  // Move cursor off-screen to avoid hover effects in screenshots
506
597
  await page.mouse.move(0, 0);
598
+ await ensureCaptureHideStyles(page);
507
599
  return Buffer.from(await page.screenshot({ type: 'png', fullPage: false }));
508
600
  }
509
601
  async takeScreenshotForAI(options = {}) {
510
602
  const page = this.ensurePage();
511
603
  return withHelperTimeout('takeScreenshotForAI', options.timeoutMs, async () => {
512
604
  await page.mouse.move(0, 0);
513
- // Normalize the screenshot to viewport CSS pixels via browser canvas.
605
+ await ensureCaptureHideStyles(page);
606
+ // Normalize the screenshot to viewport CSS pixels via sharp (Node-side).
607
+ // This avoids the expensive round-trip of sending a full-res PNG base64
608
+ // to the browser via CDP for canvas resize.
514
609
  const viewport = page.viewportSize();
610
+ const fullBuf = await page.screenshot({ type: 'png', fullPage: false });
515
611
  if (viewport) {
516
- const fullBuf = await page.screenshot({ type: 'png', fullPage: false });
517
- const resized = await page.evaluate(async ({ pngBase64, w, h }) => {
518
- const img = new Image();
519
- img.src = `data:image/png;base64,${pngBase64}`;
520
- await new Promise((resolve) => { img.onload = () => resolve(); });
521
- const canvas = document.createElement('canvas');
522
- canvas.width = w;
523
- canvas.height = h;
524
- const ctx = canvas.getContext('2d');
525
- ctx.drawImage(img, 0, 0, w, h);
526
- // Return as JPEG base64 (strip the data:image/jpeg;base64, prefix)
527
- return canvas.toDataURL('image/jpeg', 0.70).split(',')[1];
528
- }, { pngBase64: fullBuf.toString('base64'), w: viewport.width, h: viewport.height });
529
- return Buffer.from(resized, 'base64');
530
- }
531
- return Buffer.from(await page.screenshot({ type: 'jpeg', quality: 70, fullPage: false }));
612
+ return await sharp(fullBuf)
613
+ .resize(viewport.width, viewport.height, { fit: 'fill' })
614
+ .jpeg({ quality: 70 })
615
+ .toBuffer();
616
+ }
617
+ return await sharp(fullBuf).jpeg({ quality: 70 }).toBuffer();
532
618
  });
533
619
  }
534
620
  async getAccessibilityTree(options = {}) {
@@ -852,6 +938,7 @@ export class Browser {
852
938
  tag: el.tag,
853
939
  text: el.text,
854
940
  ariaLabel: el.ariaLabel,
941
+ ariaHasPopup: el.ariaHasPopup,
855
942
  role: el.role,
856
943
  inputType: el.inputType,
857
944
  });
@@ -963,12 +1050,680 @@ export class Browser {
963
1050
  return ''; // Fallback: empty DOM, accessibility tree still available
964
1051
  }
965
1052
  }
1053
+ mapRawNodeType(raw) {
1054
+ const tag = raw.tagName.toLowerCase();
1055
+ const role = (raw.role || raw.attributes.role || '').toLowerCase();
1056
+ const inputType = (raw.attributes.type || '').toLowerCase();
1057
+ if (tag === 'a' || role === 'link')
1058
+ return 'link';
1059
+ if (tag === 'button' || role === 'button')
1060
+ return 'button';
1061
+ if (role === 'menuitem')
1062
+ return 'button';
1063
+ if (role === 'menuitemcheckbox')
1064
+ return 'checkbox';
1065
+ if (role === 'menuitemradio')
1066
+ return 'radio';
1067
+ if (role === 'option')
1068
+ return 'button';
1069
+ if (tag === 'input') {
1070
+ if (inputType === 'checkbox')
1071
+ return 'checkbox';
1072
+ if (inputType === 'radio')
1073
+ return 'radio';
1074
+ if (inputType === 'range')
1075
+ return 'slider';
1076
+ return 'input';
1077
+ }
1078
+ if (tag === 'select' || role === 'combobox' || role === 'listbox')
1079
+ return 'select';
1080
+ if (tag === 'textarea')
1081
+ return 'input';
1082
+ if (role === 'switch')
1083
+ return 'toggle';
1084
+ if (role === 'tab')
1085
+ return 'tab';
1086
+ if (tag === 'nav' || role === 'navigation')
1087
+ return 'nav';
1088
+ if (tag === 'form' || role === 'form')
1089
+ return 'form';
1090
+ if (tag === 'img' || role === 'img')
1091
+ return 'image';
1092
+ if (tag === 'svg')
1093
+ return raw.interactive || raw.label ? 'icon' : 'container';
1094
+ if (/^h[1-6]$/.test(tag) || role === 'heading')
1095
+ return 'heading';
1096
+ if (tag === 'ul' || tag === 'ol' || role === 'list')
1097
+ return 'list';
1098
+ if (tag === 'table' || role === 'table' || role === 'grid')
1099
+ return 'table';
1100
+ if (tag === 'video')
1101
+ return 'video';
1102
+ if (tag === 'audio')
1103
+ return 'audio';
1104
+ if (tag === 'iframe')
1105
+ return 'iframe';
1106
+ if (tag === '#text')
1107
+ return 'text';
1108
+ return 'container';
1109
+ }
1110
+ convertRawNode(raw) {
1111
+ return {
1112
+ id: '',
1113
+ type: this.mapRawNodeType(raw),
1114
+ label: raw.label,
1115
+ value: raw.value,
1116
+ bounds: raw.bounds,
1117
+ visible: raw.visible,
1118
+ interactive: raw.interactive,
1119
+ state: { ...raw.state },
1120
+ style: { ...raw.style },
1121
+ semantic: {
1122
+ confidence: 'medium',
1123
+ traits: [],
1124
+ },
1125
+ scroll: raw.scroll ? { ...raw.scroll } : undefined,
1126
+ attributes: { ...raw.attributes, tagName: raw.tagName, ...(raw.role ? { role: raw.role } : {}) },
1127
+ children: raw.children.map((child) => this.convertRawNode(child)),
1128
+ sourceRef: raw.sourceRef,
1129
+ };
1130
+ }
1131
+ shouldCollapseNode(node) {
1132
+ const ownText = (node.attributes.__ownText || '').trim();
1133
+ const semanticTag = node.attributes.__semanticTag === 'true';
1134
+ const hasBackground = node.attributes.__hasBackground === 'true';
1135
+ const hasBorder = node.attributes.__hasBorder === 'true';
1136
+ const hasShadow = node.attributes.__hasShadow === 'true';
1137
+ return !ownText
1138
+ && !node.interactive
1139
+ && !semanticTag
1140
+ && node.children.length === 1
1141
+ && !hasBackground
1142
+ && !hasBorder
1143
+ && !hasShadow
1144
+ && !node.scroll;
1145
+ }
1146
+ collapseNode(node) {
1147
+ const collapsedChildren = node.children.flatMap((child) => this.collapseNode(child));
1148
+ const nextNode = { ...node, children: collapsedChildren };
1149
+ if (this.shouldCollapseNode(nextNode)) {
1150
+ return collapsedChildren;
1151
+ }
1152
+ return [nextNode];
1153
+ }
1154
+ collectFingerprintEntries(node, currentHint, entries) {
1155
+ const tagName = node.attributes.tagName || node.type.toUpperCase();
1156
+ const input = {
1157
+ tagName,
1158
+ role: node.attributes.role || '',
1159
+ textContent: node.label,
1160
+ ariaLabel: node.attributes['aria-label'] || '',
1161
+ htmlId: node.attributes.id || '',
1162
+ name: node.attributes.name || '',
1163
+ inputType: node.attributes.type || '',
1164
+ href: node.attributes.href || '',
1165
+ placeholder: node.attributes.placeholder || '',
1166
+ bounds: node.bounds,
1167
+ };
1168
+ entries.push({ node, parentHint: currentHint || `${node.bounds.x},${node.bounds.y}`, input });
1169
+ const nextHint = (node.attributes.__semanticTag === 'true'
1170
+ || node.interactive
1171
+ || node.type !== 'container'
1172
+ || node.label)
1173
+ ? `${tagName}|${node.label.slice(0, 30)}`
1174
+ : currentHint;
1175
+ for (const child of node.children) {
1176
+ this.collectFingerprintEntries(child, nextHint, entries);
1177
+ }
1178
+ }
1179
+ assignFingerprints(root) {
1180
+ const entries = [];
1181
+ this.collectFingerprintEntries(root, '', entries);
1182
+ const baseCounts = new Map();
1183
+ for (const entry of entries) {
1184
+ const base = fingerprintAKNode(entry.input);
1185
+ baseCounts.set(base, (baseCounts.get(base) ?? 0) + 1);
1186
+ }
1187
+ const usedIds = new Map();
1188
+ for (const entry of entries) {
1189
+ const base = fingerprintAKNode(entry.input);
1190
+ let id = base;
1191
+ if ((baseCounts.get(base) ?? 0) > 1) {
1192
+ id = disambiguateFingerprint(entry.input, entry.parentHint);
1193
+ }
1194
+ if (usedIds.has(id)) {
1195
+ id = disambiguateFingerprint(entry.input, `${entry.parentHint}|${entry.input.bounds.x},${entry.input.bounds.y}`);
1196
+ }
1197
+ usedIds.set(id, 1);
1198
+ entry.node.id = id;
1199
+ }
1200
+ }
1201
+ applyTraitAnnotations(node, page) {
1202
+ const traits = new Set(node.semantic.traits);
1203
+ const position = (node.attributes.__position || '').toLowerCase();
1204
+ const zIndex = Number(node.attributes.__zIndex || '0');
1205
+ const coveragePercent = Number(node.attributes.__coveragePercent || '0');
1206
+ if (position === 'fixed')
1207
+ traits.add('fixed');
1208
+ if (position === 'sticky')
1209
+ traits.add('sticky');
1210
+ if ((position === 'fixed' || position === 'absolute') && zIndex > 10)
1211
+ traits.add('floating');
1212
+ if (coveragePercent > 50 && ((node.style.opacity ?? 1) < 1 || !!node.style.bgColor))
1213
+ traits.add('overlay');
1214
+ if (node.bounds.y + node.bounds.h < page.viewport.height)
1215
+ traits.add('above-fold');
1216
+ if (node.bounds.y > page.viewport.height)
1217
+ traits.add('below-fold');
1218
+ if (node.bounds.w >= page.viewport.width * 0.9)
1219
+ traits.add('full-width');
1220
+ if (node.scroll && (node.scroll.overflowX || node.scroll.overflowY))
1221
+ traits.add('scrollable');
1222
+ node.semantic.traits = [...traits];
1223
+ for (const child of node.children) {
1224
+ this.applyTraitAnnotations(child, page);
1225
+ }
1226
+ }
1227
+ findFirstPattern(root, pattern) {
1228
+ const nodes = [root];
1229
+ while (nodes.length > 0) {
1230
+ const current = nodes.shift();
1231
+ if (!current)
1232
+ continue;
1233
+ if (current.semantic.pattern === pattern)
1234
+ return current;
1235
+ nodes.unshift(...current.children);
1236
+ }
1237
+ return null;
1238
+ }
1239
+ flattenNodes(root) {
1240
+ const nodes = [];
1241
+ const stack = [root];
1242
+ while (stack.length > 0) {
1243
+ const current = stack.pop();
1244
+ if (!current)
1245
+ continue;
1246
+ nodes.push(current);
1247
+ stack.push(...current.children);
1248
+ }
1249
+ return nodes;
1250
+ }
1251
+ annotateSemanticPatterns(tree) {
1252
+ const nodes = this.flattenNodes(tree.root);
1253
+ const lowerLabel = (node) => `${node.label} ${node.value || ''}`.toLowerCase();
1254
+ const hasButtons = (node) => this.flattenNodes(node).filter((child) => child.type === 'button' || child.type === 'link').length;
1255
+ const hasLinks = (node) => this.flattenNodes(node).some((child) => child.type === 'link');
1256
+ const hasHeading = (node) => this.flattenNodes(node).some((child) => child.type === 'heading');
1257
+ for (const node of nodes) {
1258
+ const text = lowerLabel(node);
1259
+ const position = (node.attributes.__position || '').toLowerCase();
1260
+ const zIndex = Number(node.attributes.__zIndex || '0');
1261
+ const centered = Math.abs((node.bounds.x + node.bounds.w / 2) - tree.page.viewport.width / 2) <= Math.max(120, tree.page.viewport.width * 0.18);
1262
+ const nearTopOrBottom = node.bounds.y < 120 || node.bounds.y + node.bounds.h > tree.page.viewport.height - 140;
1263
+ if (!node.semantic.pattern) {
1264
+ if ((position === 'fixed' || position === 'sticky') && nearTopOrBottom && hasButtons(node) >= 1 && hasButtons(node) <= 3 && zIndex > 1000) {
1265
+ node.semantic.pattern = 'cookie-banner';
1266
+ node.semantic.confidence = /\b(cookie|consent|privacy|rgpd|gdpr)\b/i.test(text) ? 'high' : 'medium';
1267
+ }
1268
+ else if (node.attributes.role === 'dialog'
1269
+ || node.attributes['aria-modal'] === 'true'
1270
+ || (centered && zIndex > 100 && (position === 'fixed' || position === 'absolute'))) {
1271
+ node.semantic.pattern = 'modal';
1272
+ node.semantic.confidence = node.attributes.role === 'dialog' || node.attributes['aria-modal'] === 'true' ? 'high' : 'medium';
1273
+ }
1274
+ else if (node.attributes.role === 'menu'
1275
+ || node.attributes.role === 'listbox'
1276
+ || ((position === 'absolute' || position === 'fixed')
1277
+ && zIndex > 20
1278
+ && node.bounds.h < tree.page.viewport.height * 0.7
1279
+ && node.bounds.w < tree.page.viewport.width * 0.7
1280
+ && this.flattenNodes(node).some((child) => child.interactive
1281
+ && child.id !== node.id
1282
+ && ['button', 'link', 'checkbox', 'radio', 'select', 'toggle', 'tab'].includes(child.type)))) {
1283
+ node.semantic.pattern = 'dropdown';
1284
+ node.semantic.confidence =
1285
+ node.attributes.role === 'menu' || node.attributes.role === 'listbox'
1286
+ ? 'high'
1287
+ : 'medium';
1288
+ }
1289
+ else if ((node.type === 'nav' || node.attributes.role === 'navigation') && node.bounds.y < 100 && hasLinks(node)) {
1290
+ node.semantic.pattern = 'navbar';
1291
+ node.semantic.confidence = (position === 'sticky' || position === 'fixed') ? 'high' : 'medium';
1292
+ }
1293
+ else if ((node.attributes.tagName || '').toLowerCase() === 'footer' && hasLinks(node)) {
1294
+ node.semantic.pattern = 'footer';
1295
+ node.semantic.confidence = 'high';
1296
+ }
1297
+ else if (node.type === 'form') {
1298
+ node.semantic.pattern = 'form';
1299
+ node.semantic.confidence = 'high';
1300
+ }
1301
+ else if (node.bounds.h > 300 && hasHeading(node) && hasButtons(node) > 0 && node.bounds.y < tree.page.viewport.height) {
1302
+ node.semantic.pattern = 'hero';
1303
+ node.semantic.confidence = 'medium';
1304
+ }
1305
+ }
1306
+ }
1307
+ for (const node of nodes) {
1308
+ if (node.semantic.pattern)
1309
+ continue;
1310
+ const buttonChildren = node.children.filter((child) => child.type === 'button' || child.type === 'link');
1311
+ if (buttonChildren.length >= 1 && buttonChildren.length <= 3) {
1312
+ const sorted = [...buttonChildren].sort((a, b) => a.bounds.x - b.bounds.x);
1313
+ const closeEnough = sorted.every((child, index) => index === 0 || Math.abs(child.bounds.x - sorted[index - 1].bounds.x) < 220);
1314
+ if (closeEnough) {
1315
+ node.semantic.pattern = 'cta-group';
1316
+ node.semantic.confidence = 'medium';
1317
+ }
1318
+ }
1319
+ }
1320
+ const navbar = this.findFirstPattern(tree.root, 'navbar');
1321
+ if (navbar) {
1322
+ const heroCandidate = tree.root.children.find((child) => child.id !== navbar.id
1323
+ && child.type === 'container'
1324
+ && child.bounds.y >= navbar.bounds.y + navbar.bounds.h
1325
+ && child.bounds.h > 300
1326
+ && this.flattenNodes(child).some((descendant) => descendant.type === 'heading'));
1327
+ if (heroCandidate && !heroCandidate.semantic.pattern) {
1328
+ heroCandidate.semantic.pattern = 'hero';
1329
+ heroCandidate.semantic.confidence = 'medium';
1330
+ }
1331
+ }
1332
+ }
1333
+ computeOverlays(tree) {
1334
+ return this.flattenNodes(tree.root)
1335
+ .filter((node) => {
1336
+ const zIndex = Number(node.attributes.__zIndex || '0');
1337
+ const position = (node.attributes.__position || '').toLowerCase();
1338
+ return zIndex > 100 && (position === 'fixed' || position === 'absolute' || node.semantic.traits.includes('overlay'));
1339
+ })
1340
+ .map((node) => ({
1341
+ nodeId: node.id,
1342
+ zIndex: Number(node.attributes.__zIndex || '0'),
1343
+ coveragePercent: Number(node.attributes.__coveragePercent || '0'),
1344
+ blocksInteraction: node.attributes.__blocksInteraction === 'true',
1345
+ }))
1346
+ .sort((a, b) => b.zIndex - a.zIndex);
1347
+ }
1348
+ normalizeRawSnapshot(snapshot) {
1349
+ const rootNode = this.convertRawNode(snapshot.root);
1350
+ const collapsedRootChildren = rootNode.children.flatMap((child) => this.collapseNode(child));
1351
+ const normalizedRoot = {
1352
+ ...rootNode,
1353
+ children: collapsedRootChildren,
1354
+ };
1355
+ this.assignFingerprints(normalizedRoot);
1356
+ this.applyTraitAnnotations(normalizedRoot, snapshot.page);
1357
+ const tree = {
1358
+ root: normalizedRoot,
1359
+ page: snapshot.page,
1360
+ overlays: [],
1361
+ };
1362
+ this.annotateSemanticPatterns(tree);
1363
+ tree.overlays = this.computeOverlays(tree);
1364
+ this.akNodeIndex = buildAKNodeRuntimeIndex(tree);
1365
+ return tree;
1366
+ }
1367
+ async getAKTree() {
1368
+ const page = this.ensurePage();
1369
+ const snapshot = await page.evaluate(() => {
1370
+ const SKIP_TAGS = new Set(['STYLE', 'SCRIPT', 'LINK', 'META', 'NOSCRIPT', 'SOURCE', 'PATH', 'G', 'DEFS', 'CLIPPATH']);
1371
+ const SEMANTIC_TAGS = new Set(['NAV', 'MAIN', 'HEADER', 'FOOTER', 'SECTION', 'ARTICLE', 'ASIDE', 'FORM']);
1372
+ const TEST_ID_ATTRS = ['data-testid', 'data-test', 'data-qa', 'data-cy'];
1373
+ const quoteForSelector = (value) => value
1374
+ .replace(/\\/g, '\\\\')
1375
+ .replace(/"/g, '\\"');
1376
+ const quoteForHasText = (value) => value
1377
+ .replace(/\\/g, '\\\\')
1378
+ .replace(/"/g, '\\"');
1379
+ const cssEscape = (value) => {
1380
+ if (typeof CSS !== 'undefined' && typeof CSS.escape === 'function') {
1381
+ return CSS.escape(value);
1382
+ }
1383
+ return value.replace(/(^-?\d)|[^a-zA-Z0-9_-]/g, (match) => `\\${match}`);
1384
+ };
1385
+ const hasVisualChrome = (style) => {
1386
+ const background = style.backgroundColor !== 'rgba(0, 0, 0, 0)' && style.backgroundColor !== 'transparent';
1387
+ const border = ['Top', 'Right', 'Bottom', 'Left'].some((side) => {
1388
+ const width = style.getPropertyValue(`border-${side.toLowerCase()}-width`);
1389
+ const color = style.getPropertyValue(`border-${side.toLowerCase()}-color`);
1390
+ return parseFloat(width || '0') > 0 && color !== 'transparent' && color !== 'rgba(0, 0, 0, 0)';
1391
+ });
1392
+ const shadow = !!style.boxShadow && style.boxShadow !== 'none';
1393
+ return { background, border, shadow };
1394
+ };
1395
+ const isElementInteractive = (el, style) => {
1396
+ const tag = el.tagName.toLowerCase();
1397
+ const role = (el.getAttribute('role') || '').toLowerCase();
1398
+ if (['a', 'button', 'input', 'select', 'textarea', 'summary'].includes(tag))
1399
+ return true;
1400
+ if (['button', 'link', 'tab', 'menuitem', 'menuitemcheckbox', 'menuitemradio', 'checkbox', 'radio', 'switch', 'combobox', 'option', 'slider', 'spinbutton', 'listbox', 'treeitem', 'gridcell'].includes(role)) {
1401
+ return true;
1402
+ }
1403
+ return el.tabIndex >= 0
1404
+ || el.hasAttribute('onclick')
1405
+ || el.hasAttribute('aria-haspopup')
1406
+ || el.getAttribute('contenteditable') === 'true'
1407
+ || style.cursor === 'pointer';
1408
+ };
1409
+ const getOwnText = (el) => {
1410
+ let text = '';
1411
+ for (const child of Array.from(el.childNodes)) {
1412
+ if (child.nodeType === Node.TEXT_NODE) {
1413
+ const next = (child.textContent || '').replace(/\s+/g, ' ').trim();
1414
+ if (next)
1415
+ text += `${text ? ' ' : ''}${next}`;
1416
+ }
1417
+ }
1418
+ return text.trim();
1419
+ };
1420
+ const getShallowText = (el) => {
1421
+ const own = getOwnText(el);
1422
+ if (own)
1423
+ return own;
1424
+ let text = '';
1425
+ for (const child of Array.from(el.children)) {
1426
+ const next = getOwnText(child);
1427
+ if (next)
1428
+ text += `${text ? ' ' : ''}${next}`;
1429
+ }
1430
+ return text.trim();
1431
+ };
1432
+ const isCodeLikeText = (value) => {
1433
+ const normalized = value.replace(/\s+/g, ' ').trim();
1434
+ if (!normalized)
1435
+ return false;
1436
+ if (/(=>|function\s*\(|document\.|window\.|return\s+|const\s+|let\s+|var\s+)/i.test(normalized)) {
1437
+ return true;
1438
+ }
1439
+ const punctuationCount = (normalized.match(/[{}()[\];=<>]/g) ?? []).length;
1440
+ return punctuationCount / Math.max(normalized.length, 1) > 0.12;
1441
+ };
1442
+ const collectVisibleDescendantText = (el, maxLength = 160) => {
1443
+ const walker = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, {
1444
+ acceptNode(node) {
1445
+ const parent = node.parentElement;
1446
+ if (!parent)
1447
+ return NodeFilter.FILTER_REJECT;
1448
+ if (SKIP_TAGS.has(parent.tagName))
1449
+ return NodeFilter.FILTER_REJECT;
1450
+ const style = window.getComputedStyle(parent);
1451
+ if (style.display === 'none' || style.visibility === 'hidden' || parseFloat(style.opacity || '1') === 0) {
1452
+ return NodeFilter.FILTER_REJECT;
1453
+ }
1454
+ return NodeFilter.FILTER_ACCEPT;
1455
+ },
1456
+ });
1457
+ let text = '';
1458
+ while (walker.nextNode() && text.length < maxLength) {
1459
+ const next = (walker.currentNode.textContent || '').replace(/\s+/g, ' ').trim();
1460
+ if (!next || isCodeLikeText(next))
1461
+ continue;
1462
+ text += `${text ? ' ' : ''}${next}`;
1463
+ }
1464
+ return text.slice(0, maxLength).trim();
1465
+ };
1466
+ const buildStructuralSelector = (el) => {
1467
+ const segments = [];
1468
+ let current = el;
1469
+ while (current && current.tagName !== 'BODY' && segments.length < 6) {
1470
+ const tag = current.tagName.toLowerCase();
1471
+ const parent = current.parentElement;
1472
+ if (!parent)
1473
+ break;
1474
+ const sameTagSiblings = Array.from(parent.children)
1475
+ .filter((sibling) => sibling.tagName === current.tagName);
1476
+ const index = sameTagSiblings.indexOf(current) + 1;
1477
+ segments.unshift(`${tag}:nth-of-type(${Math.max(1, index)})`);
1478
+ current = parent;
1479
+ }
1480
+ return segments.join(' > ') || 'body';
1481
+ };
1482
+ const isUniqueSelector = (selector, expected) => {
1483
+ try {
1484
+ const matches = document.querySelectorAll(selector);
1485
+ return matches.length === 1 && matches[0] === expected;
1486
+ }
1487
+ catch {
1488
+ return false;
1489
+ }
1490
+ };
1491
+ const buildSourceRef = (el, label) => {
1492
+ // Reuse the same stable-selector strategy as getInteractiveElements():
1493
+ // generate candidate selectors from attributes and pick the first that
1494
+ // uniquely identifies the element in the current DOM.
1495
+ const tag = el.tagName.toLowerCase();
1496
+ const candidates = [];
1497
+ const push = (selector) => {
1498
+ if (selector && !candidates.includes(selector))
1499
+ candidates.push(selector);
1500
+ };
1501
+ const role = el.getAttribute('role');
1502
+ const type = el.getAttribute('type');
1503
+ const htmlId = el.getAttribute('id');
1504
+ if (htmlId)
1505
+ push(`#${cssEscape(htmlId)}`);
1506
+ for (const attr of TEST_ID_ATTRS) {
1507
+ const value = el.getAttribute(attr);
1508
+ if (!value)
1509
+ continue;
1510
+ push(`[${attr}="${quoteForSelector(value)}"]`);
1511
+ push(`${tag}[${attr}="${quoteForSelector(value)}"]`);
1512
+ }
1513
+ const name = el.getAttribute('name');
1514
+ if (name) {
1515
+ push(`${tag}[name="${quoteForSelector(name)}"]`);
1516
+ if (type)
1517
+ push(`${tag}[name="${quoteForSelector(name)}"][type="${quoteForSelector(type)}"]`);
1518
+ }
1519
+ const rawHref = el.getAttribute('href');
1520
+ if (tag === 'a' && rawHref) {
1521
+ push(`a[href="${quoteForSelector(rawHref)}"]`);
1522
+ }
1523
+ for (const attr of ['aria-label', 'title', 'placeholder']) {
1524
+ const value = el.getAttribute(attr);
1525
+ if (!value)
1526
+ continue;
1527
+ push(`${tag}[${attr}="${quoteForSelector(value)}"]`);
1528
+ if (role)
1529
+ push(`${tag}[role="${quoteForSelector(role)}"][${attr}="${quoteForSelector(value)}"]`);
1530
+ if (type)
1531
+ push(`${tag}[type="${quoteForSelector(type)}"][${attr}="${quoteForSelector(value)}"]`);
1532
+ }
1533
+ if (type)
1534
+ push(`${tag}[type="${quoteForSelector(type)}"]`);
1535
+ // Pick the first unique candidate
1536
+ for (const selector of candidates) {
1537
+ if (isUniqueSelector(selector, el))
1538
+ return selector;
1539
+ }
1540
+ // Fallback: :has-text() with role if available (for disambiguation)
1541
+ if (role && label) {
1542
+ const roleSelector = `[role="${quoteForSelector(role)}"]:has-text("${quoteForHasText(label.slice(0, 80))}")`;
1543
+ if (isUniqueSelector(roleSelector, el))
1544
+ return roleSelector;
1545
+ }
1546
+ if (label) {
1547
+ const textSelector = `${tag}:has-text("${quoteForHasText(label.slice(0, 80))}")`;
1548
+ if (isUniqueSelector(textSelector, el))
1549
+ return textSelector;
1550
+ }
1551
+ return buildStructuralSelector(el);
1552
+ };
1553
+ const collectScrollState = (el) => {
1554
+ const style = getComputedStyle(el);
1555
+ const overflowY = ['auto', 'scroll'].includes(style.overflowY) && el.scrollHeight > el.clientHeight;
1556
+ const overflowX = ['auto', 'scroll'].includes(style.overflowX) && el.scrollWidth > el.clientWidth;
1557
+ if (!overflowY && !overflowX)
1558
+ return undefined;
1559
+ return {
1560
+ scrollTop: Math.round(el.scrollTop),
1561
+ scrollLeft: Math.round(el.scrollLeft),
1562
+ scrollHeight: Math.round(el.scrollHeight),
1563
+ scrollWidth: Math.round(el.scrollWidth),
1564
+ clientHeight: Math.round(el.clientHeight),
1565
+ clientWidth: Math.round(el.clientWidth),
1566
+ overflowY,
1567
+ overflowX,
1568
+ };
1569
+ };
1570
+ const collectAttributes = (el) => {
1571
+ const attrs = {};
1572
+ const copyAttrs = [
1573
+ 'id', 'name', 'type', 'href', 'placeholder', 'title', 'role', 'aria-label',
1574
+ 'aria-controls', 'aria-expanded', 'aria-haspopup', 'aria-modal', 'data-testid',
1575
+ 'data-test', 'data-qa', 'data-cy', 'alt',
1576
+ ];
1577
+ for (const attr of copyAttrs) {
1578
+ const value = el.getAttribute(attr);
1579
+ if (value)
1580
+ attrs[attr] = value.slice(0, 180);
1581
+ }
1582
+ return attrs;
1583
+ };
1584
+ const parseNode = (element) => {
1585
+ if (!(element instanceof HTMLElement))
1586
+ return null;
1587
+ if (SKIP_TAGS.has(element.tagName))
1588
+ return null;
1589
+ const style = getComputedStyle(element);
1590
+ const rect = element.getBoundingClientRect();
1591
+ const visible = style.display !== 'none'
1592
+ && style.visibility !== 'hidden'
1593
+ && parseFloat(style.opacity || '1') !== 0
1594
+ && rect.width > 0
1595
+ && rect.height > 0;
1596
+ const ownText = getOwnText(element).slice(0, 160);
1597
+ const shallowText = getShallowText(element).slice(0, 160);
1598
+ const sanitizedDescendantText = collectVisibleDescendantText(element, 160);
1599
+ const fallbackLabel = (element.getAttribute('aria-label')
1600
+ || element.getAttribute('title')
1601
+ || (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement
1602
+ ? element.placeholder || element.value
1603
+ : '')
1604
+ || element.getAttribute('alt')
1605
+ || shallowText
1606
+ || sanitizedDescendantText).slice(0, 160);
1607
+ const interactive = isElementInteractive(element, style);
1608
+ const children = element.tagName === 'IFRAME'
1609
+ ? []
1610
+ : Array.from(element.children)
1611
+ .map((child) => parseNode(child))
1612
+ .filter((child) => child != null);
1613
+ const scroll = collectScrollState(element);
1614
+ const visualChrome = hasVisualChrome(style);
1615
+ const zIndexRaw = Number.parseInt(style.zIndex || '0', 10);
1616
+ const zIndex = Number.isFinite(zIndexRaw) ? zIndexRaw : 0;
1617
+ const coveragePercent = rect.width > 0 && rect.height > 0
1618
+ ? (Math.min(rect.width, window.innerWidth) * Math.min(rect.height, window.innerHeight) / (window.innerWidth * window.innerHeight)) * 100
1619
+ : 0;
1620
+ const attributes = collectAttributes(element);
1621
+ attributes.__ownText = ownText;
1622
+ attributes.__semanticTag = SEMANTIC_TAGS.has(element.tagName) ? 'true' : 'false';
1623
+ attributes.__position = style.position || '';
1624
+ attributes.__zIndex = String(zIndex);
1625
+ attributes.__hasBackground = visualChrome.background ? 'true' : 'false';
1626
+ attributes.__hasBorder = visualChrome.border ? 'true' : 'false';
1627
+ attributes.__hasShadow = visualChrome.shadow ? 'true' : 'false';
1628
+ attributes.__coveragePercent = String(Math.round(coveragePercent));
1629
+ attributes.__blocksInteraction = style.pointerEvents !== 'none' ? 'true' : 'false';
1630
+ return {
1631
+ tagName: element.tagName,
1632
+ role: element.getAttribute('role'),
1633
+ label: fallbackLabel,
1634
+ ownText,
1635
+ value: (element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement || element instanceof HTMLSelectElement)
1636
+ ? String(element.value || '').slice(0, 160)
1637
+ : undefined,
1638
+ bounds: {
1639
+ x: Math.round(rect.x + window.scrollX),
1640
+ y: Math.round(rect.y + window.scrollY),
1641
+ w: Math.round(rect.width),
1642
+ h: Math.round(rect.height),
1643
+ },
1644
+ visible,
1645
+ interactive,
1646
+ state: {
1647
+ disabled: !!(element.disabled),
1648
+ focused: document.activeElement === element,
1649
+ checked: element instanceof HTMLInputElement && ['checkbox', 'radio'].includes(element.type) ? element.checked : undefined,
1650
+ expanded: element.hasAttribute('aria-expanded') ? element.getAttribute('aria-expanded') === 'true' : undefined,
1651
+ selected: element instanceof HTMLOptionElement
1652
+ ? element.selected
1653
+ : element.getAttribute('aria-selected') === 'true'
1654
+ ? true
1655
+ : undefined,
1656
+ },
1657
+ style: {
1658
+ bgColor: style.backgroundColor !== 'rgba(0, 0, 0, 0)' ? style.backgroundColor : undefined,
1659
+ fgColor: style.color || undefined,
1660
+ fontSize: Number.parseFloat(style.fontSize || '0') || undefined,
1661
+ opacity: Number.parseFloat(style.opacity || '1'),
1662
+ },
1663
+ scroll,
1664
+ attributes,
1665
+ sourceRef: buildSourceRef(element, fallbackLabel),
1666
+ children,
1667
+ };
1668
+ };
1669
+ const makePageScroll = () => ({
1670
+ scrollTop: Math.round(window.scrollY),
1671
+ scrollLeft: Math.round(window.scrollX),
1672
+ scrollHeight: Math.round(document.documentElement.scrollHeight),
1673
+ scrollWidth: Math.round(document.documentElement.scrollWidth),
1674
+ clientHeight: Math.round(window.innerHeight),
1675
+ clientWidth: Math.round(window.innerWidth),
1676
+ overflowY: document.documentElement.scrollHeight > window.innerHeight,
1677
+ overflowX: document.documentElement.scrollWidth > window.innerWidth,
1678
+ });
1679
+ return {
1680
+ root: parseNode(document.body) ?? {
1681
+ tagName: 'BODY',
1682
+ role: null,
1683
+ label: '',
1684
+ ownText: '',
1685
+ bounds: {
1686
+ x: 0,
1687
+ y: 0,
1688
+ w: Math.round(window.innerWidth),
1689
+ h: Math.round(window.innerHeight),
1690
+ },
1691
+ visible: true,
1692
+ interactive: false,
1693
+ state: { disabled: false, focused: false },
1694
+ style: { opacity: 1 },
1695
+ attributes: {
1696
+ __semanticTag: 'true',
1697
+ __position: 'static',
1698
+ __zIndex: '0',
1699
+ __hasBackground: 'false',
1700
+ __hasBorder: 'false',
1701
+ __hasShadow: 'false',
1702
+ __coveragePercent: '100',
1703
+ __blocksInteraction: 'false',
1704
+ },
1705
+ sourceRef: 'body',
1706
+ children: [],
1707
+ },
1708
+ page: {
1709
+ url: window.location.href,
1710
+ title: document.title,
1711
+ viewport: {
1712
+ width: Math.round(window.innerWidth),
1713
+ height: Math.round(window.innerHeight),
1714
+ },
1715
+ scroll: makePageScroll(),
1716
+ },
1717
+ };
1718
+ });
1719
+ return this.normalizeRawSnapshot(snapshot);
1720
+ }
966
1721
  async getPageState(opts) {
967
1722
  const page = this.ensurePage();
968
- const [cleanScreenshot, accessibilityTree, interactiveElements, scrollInfo, simplifiedDOM] = await Promise.all([
1723
+ const [cleanScreenshot, akTree, accessibilityTree, scrollInfo, simplifiedDOM] = await Promise.all([
969
1724
  this.takeScreenshotForAI(),
1725
+ this.getAKTree(),
970
1726
  this.getAccessibilityTree(),
971
- this.getInteractiveElements(),
972
1727
  page.evaluate(() => ({
973
1728
  scrollY: Math.round(window.scrollY),
974
1729
  scrollHeight: document.documentElement.scrollHeight,
@@ -976,12 +1731,16 @@ export class Browser {
976
1731
  })),
977
1732
  this.getSimplifiedDOM(),
978
1733
  ]);
1734
+ const interactiveElements = deriveInteractiveElementsFromAKTree(akTree);
979
1735
  const annotatedScreenshot = opts?.skipAnnotation
980
1736
  ? cleanScreenshot
981
1737
  : await annotateWithSetOfMarks(cleanScreenshot, interactiveElements);
1738
+ const serializedAKTree = serializeAKTree(akTree);
982
1739
  return {
983
1740
  cleanScreenshot,
984
1741
  screenshot: annotatedScreenshot,
1742
+ akTree,
1743
+ serializedAKTree,
985
1744
  accessibilityTree,
986
1745
  interactiveElements,
987
1746
  simplifiedDOM,
@@ -991,9 +1750,9 @@ export class Browser {
991
1750
  /** Lightweight page state without screenshots — skips takeScreenshotForAI + SoM annotation. */
992
1751
  async getPageStateLite() {
993
1752
  const page = this.ensurePage();
994
- const [accessibilityTree, interactiveElements, scrollInfo, simplifiedDOM] = await Promise.all([
1753
+ const [akTree, accessibilityTree, scrollInfo, simplifiedDOM] = await Promise.all([
1754
+ this.getAKTree(),
995
1755
  this.getAccessibilityTree(),
996
- this.getInteractiveElements(),
997
1756
  page.evaluate(() => ({
998
1757
  scrollY: Math.round(window.scrollY),
999
1758
  scrollHeight: document.documentElement.scrollHeight,
@@ -1001,7 +1760,15 @@ export class Browser {
1001
1760
  })),
1002
1761
  this.getSimplifiedDOM(),
1003
1762
  ]);
1004
- return { accessibilityTree, interactiveElements, simplifiedDOM, scrollInfo };
1763
+ const interactiveElements = deriveInteractiveElementsFromAKTree(akTree);
1764
+ return {
1765
+ akTree,
1766
+ serializedAKTree: serializeAKTree(akTree),
1767
+ accessibilityTree,
1768
+ interactiveElements,
1769
+ simplifiedDOM,
1770
+ scrollInfo,
1771
+ };
1005
1772
  }
1006
1773
  async exportStorageState() {
1007
1774
  const context = this.ensureContext();
@@ -1221,11 +1988,23 @@ export class Browser {
1221
1988
  const rect = node.getBoundingClientRect();
1222
1989
  return rect.width > 0 && rect.height > 0;
1223
1990
  };
1991
+ const isCodeLikeText = (value) => {
1992
+ const normalized = value.replace(/\s+/g, ' ').trim();
1993
+ if (!normalized)
1994
+ return false;
1995
+ if (/(=>|function\s*\(|document\.|window\.|return\s+|const\s+|let\s+|var\s+)/i.test(normalized)) {
1996
+ return true;
1997
+ }
1998
+ const punctuationCount = (normalized.match(/[{}()[\];=<>]/g) ?? []).length;
1999
+ return punctuationCount / Math.max(normalized.length, 1) > 0.12;
2000
+ };
1224
2001
  const textFor = (node) => {
1225
2002
  if (!(node instanceof HTMLElement))
1226
2003
  return '';
1227
2004
  const aria = node.getAttribute('aria-label') || node.getAttribute('title') || '';
1228
- const text = (node.innerText || node.textContent || '').replace(/\s+/g, ' ').trim();
2005
+ const text = (node.innerText || '').replace(/\s+/g, ' ').trim();
2006
+ if (isCodeLikeText(text))
2007
+ return aria.slice(0, 120);
1229
2008
  return (text || aria).slice(0, 120);
1230
2009
  };
1231
2010
  const quoteForSelector = (value) => value
@@ -1316,7 +2095,7 @@ export class Browser {
1316
2095
  if (!isVisible(parent))
1317
2096
  continue;
1318
2097
  const nextText = (walker.currentNode.textContent || '').replace(/\s+/g, ' ').trim();
1319
- if (!nextText)
2098
+ if (!nextText || isCodeLikeText(nextText))
1320
2099
  continue;
1321
2100
  visibleText += `${visibleText ? ' ' : ''}${nextText}`;
1322
2101
  }
@@ -1606,9 +2385,36 @@ export class Browser {
1606
2385
  const root = document.body ?? document.documentElement;
1607
2386
  const walker = document.createTreeWalker(root, NodeFilter.SHOW_TEXT);
1608
2387
  let textSample = '';
2388
+ const dedupe = (values, max = 6) => {
2389
+ const seen = new Set();
2390
+ const result = [];
2391
+ for (const value of values) {
2392
+ const normalized = (value || '').replace(/\s+/g, ' ').trim();
2393
+ if (!normalized || seen.has(normalized))
2394
+ continue;
2395
+ seen.add(normalized);
2396
+ result.push(normalized.slice(0, 120));
2397
+ if (result.length >= max)
2398
+ break;
2399
+ }
2400
+ return result;
2401
+ };
2402
+ const isCodeLikeText = (value) => {
2403
+ const normalized = value.replace(/\s+/g, ' ').trim();
2404
+ if (!normalized)
2405
+ return false;
2406
+ if (/(=>|function\s*\(|document\.|window\.|return\s+|const\s+|let\s+|var\s+)/i.test(normalized)) {
2407
+ return true;
2408
+ }
2409
+ const punctuationCount = (normalized.match(/[{}()[\];=<>]/g) ?? []).length;
2410
+ return punctuationCount / Math.max(normalized.length, 1) > 0.12;
2411
+ };
1609
2412
  while (walker.nextNode() && textSample.length < 240) {
2413
+ const parent = walker.currentNode.parentElement;
2414
+ if (!parent)
2415
+ continue;
1610
2416
  const nextText = (walker.currentNode.textContent || '').replace(/\s+/g, ' ').trim();
1611
- if (!nextText)
2417
+ if (!nextText || isCodeLikeText(nextText))
1612
2418
  continue;
1613
2419
  textSample += `${textSample ? ' ' : ''}${nextText}`;
1614
2420
  }
@@ -1628,14 +2434,153 @@ export class Browser {
1628
2434
  const rect = node.getBoundingClientRect();
1629
2435
  return rect.width > 0 && rect.height > 0;
1630
2436
  };
1631
- const dialogCount = Array.from(document.querySelectorAll('dialog[open], [role="dialog"], [aria-modal="true"]')).filter(isVisible).length;
2437
+ const textFor = (node) => {
2438
+ if (!(node instanceof HTMLElement))
2439
+ return '';
2440
+ const label = [
2441
+ node.getAttribute('aria-label'),
2442
+ node.getAttribute('title'),
2443
+ node instanceof HTMLInputElement || node instanceof HTMLTextAreaElement
2444
+ ? node.placeholder || node.value
2445
+ : '',
2446
+ node.innerText,
2447
+ ]
2448
+ .filter((value) => typeof value === 'string' && value.trim().length > 0)
2449
+ .join(' ')
2450
+ .replace(/\s+/g, ' ')
2451
+ .trim();
2452
+ if (isCodeLikeText(label)) {
2453
+ return [
2454
+ node.getAttribute('aria-label'),
2455
+ node.getAttribute('title'),
2456
+ node instanceof HTMLInputElement || node instanceof HTMLTextAreaElement
2457
+ ? node.placeholder || node.value
2458
+ : '',
2459
+ ]
2460
+ .filter((value) => typeof value === 'string' && value.trim().length > 0)
2461
+ .join(' ')
2462
+ .replace(/\s+/g, ' ')
2463
+ .trim()
2464
+ .slice(0, 160);
2465
+ }
2466
+ return label.slice(0, 160);
2467
+ };
2468
+ const collectVisibleTexts = (selector, max = 6) => dedupe(Array.from(document.querySelectorAll(selector))
2469
+ .filter(isVisible)
2470
+ .map((node) => textFor(node)), max);
2471
+ const isLikelyModalSurface = (node) => {
2472
+ if (!(node instanceof HTMLElement) || !isVisible(node))
2473
+ return false;
2474
+ const style = window.getComputedStyle(node);
2475
+ const rect = node.getBoundingClientRect();
2476
+ if (rect.width < 180 || rect.height < 80)
2477
+ return false;
2478
+ if (!['fixed', 'absolute', 'sticky'].includes(style.position))
2479
+ return false;
2480
+ const centeredEnough = Math.abs((rect.left + rect.width / 2) - window.innerWidth / 2) <= Math.max(120, window.innerWidth * 0.18)
2481
+ && Math.abs((rect.top + rect.height / 2) - window.innerHeight / 2) <= Math.max(120, window.innerHeight * 0.18);
2482
+ const coversLargeArea = rect.width >= window.innerWidth * 0.35 && rect.height >= window.innerHeight * 0.2;
2483
+ const overlayLikeSelector = [
2484
+ node.getAttribute('data-testid') || '',
2485
+ node.getAttribute('data-test') || '',
2486
+ node.className || '',
2487
+ node.id || '',
2488
+ ].join(' ').toLowerCase();
2489
+ const overlayLikeName = /\b(modal|dialog|drawer|sheet|popover|popup|overlay)\b/.test(overlayLikeSelector);
2490
+ return overlayLikeName && (centeredEnough || coversLargeArea);
2491
+ };
2492
+ const semanticDialogs = Array.from(document.querySelectorAll('dialog[open], [role="dialog"], [aria-modal="true"]')).filter(isVisible);
2493
+ const heuristicDialogs = Array.from(document.querySelectorAll([
2494
+ '[data-testid*="modal" i]',
2495
+ '[data-testid*="dialog" i]',
2496
+ '[data-test*="modal" i]',
2497
+ '[data-test*="dialog" i]',
2498
+ '[class*="modal" i]',
2499
+ '[class*="dialog" i]',
2500
+ '[class*="drawer" i]',
2501
+ '[class*="popover" i]',
2502
+ '[id*="modal" i]',
2503
+ '[id*="dialog" i]',
2504
+ '[data-radix-dialog-content]',
2505
+ '[data-radix-dialog-content][data-state="open"]',
2506
+ '[data-radix-popover-content][data-state="open"]',
2507
+ '[data-radix-portal] [data-state="open"]',
2508
+ '[data-reach-dialog-content]',
2509
+ '[data-reach-dialog-overlay]',
2510
+ '[data-headlessui-dialog-panel]',
2511
+ '[data-headlessui-dialog-backdrop]',
2512
+ '[data-headlessui-portal] [data-headlessui-state="open"]',
2513
+ '[data-headlessui-state="open"]',
2514
+ ].join(','))).filter(isLikelyModalSurface);
2515
+ const dialogCount = new Set([...semanticDialogs, ...heuristicDialogs]).size;
1632
2516
  const expandedCount = Array.from(document.querySelectorAll('[aria-expanded="true"], details[open]')).filter(isVisible).length;
2517
+ const visibleMenus = Array.from(document.querySelectorAll('[role="menu"], [role="listbox"], [data-headlessui-state="open"], [data-radix-popper-content-wrapper]')).filter(isVisible);
1633
2518
  const loadingSelectors = [
1634
2519
  '[aria-busy="true"]',
1635
2520
  '[class*="spinner"]', '[class*="loading"]', '[class*="skeleton"]',
1636
2521
  '[class*="Spinner"]', '[class*="Loading"]', '[class*="Skeleton"]',
1637
2522
  ];
1638
2523
  const loadingIndicatorCount = Array.from(document.querySelectorAll(loadingSelectors.join(','))).filter(isVisible).length;
2524
+ const headingTexts = collectVisibleTexts('main h1, main h2, [role="main"] h1, [role="main"] h2, h1, h2', 4);
2525
+ const breadcrumbTexts = collectVisibleTexts('[aria-label*="breadcrumb" i] a, [data-testid*="breadcrumb" i] a, nav[aria-label*="breadcrumb" i] a', 4);
2526
+ const navTexts = collectVisibleTexts('header a, header button, nav a, nav button, aside a, aside button, [role="navigation"] a, [role="navigation"] button', 8);
2527
+ const overlayTexts = dedupe([
2528
+ ...semanticDialogs.map((node) => textFor(node)),
2529
+ ...heuristicDialogs.map((node) => textFor(node)),
2530
+ ...visibleMenus.map((node) => textFor(node)),
2531
+ ], 6);
2532
+ const configurationTexts = collectVisibleTexts([
2533
+ 'dialog label',
2534
+ 'dialog legend',
2535
+ 'dialog textarea',
2536
+ 'dialog input',
2537
+ '[role="dialog"] label',
2538
+ '[role="dialog"] legend',
2539
+ '[role="dialog"] textarea',
2540
+ '[role="dialog"] input',
2541
+ '[role="dialog"] button',
2542
+ '[role="dialog"] [data-testid*="preview" i]',
2543
+ '[role="dialog"] [data-testid*="device" i]',
2544
+ '[role="dialog"] [data-testid*="mockup" i]',
2545
+ 'form label',
2546
+ 'form legend',
2547
+ 'textarea',
2548
+ '[data-testid*="settings" i]',
2549
+ '[data-testid*="config" i]',
2550
+ '[data-testid*="preview" i]',
2551
+ '[data-testid*="device" i]',
2552
+ '[data-testid*="mockup" i]',
2553
+ ].join(','), 6);
2554
+ const primarySurface = dedupe([
2555
+ ...headingTexts,
2556
+ ...breadcrumbTexts,
2557
+ textSample.slice(0, 140),
2558
+ ], 4).join(' | ');
2559
+ const navigationSurface = navTexts.join(' | ');
2560
+ const overlaySurface = overlayTexts.length > 0 ? overlayTexts.join(' | ') : null;
2561
+ const configurationSurface = configurationTexts.length > 0 ? configurationTexts.join(' | ') : null;
2562
+ const hasBlockingOverlay = dialogCount > 0 || (expandedCount > 0 && overlayTexts.length > 0);
2563
+ const strongSurfaceSignature = [
2564
+ window.location.pathname,
2565
+ document.title || '',
2566
+ primarySurface,
2567
+ navigationSurface,
2568
+ configurationSurface || '',
2569
+ overlaySurface || '',
2570
+ String(dialogCount),
2571
+ String(expandedCount),
2572
+ String(loadingIndicatorCount),
2573
+ ]
2574
+ .join('|')
2575
+ .slice(0, 900);
2576
+ const surfaceSignature = [
2577
+ window.location.pathname,
2578
+ document.title || '',
2579
+ String(dialogCount),
2580
+ String(expandedCount),
2581
+ String(loadingIndicatorCount),
2582
+ textSample.slice(0, 400),
2583
+ ].join('|');
1639
2584
  return {
1640
2585
  url: window.location.href,
1641
2586
  title: document.title || '',
@@ -1648,6 +2593,13 @@ export class Browser {
1648
2593
  expandedCount,
1649
2594
  loadingIndicatorCount,
1650
2595
  textSample: textSample.slice(0, 240),
2596
+ surfaceSignature,
2597
+ primarySurface,
2598
+ overlaySurface,
2599
+ navigationSurface,
2600
+ configurationSurface,
2601
+ hasBlockingOverlay,
2602
+ strongSurfaceSignature,
1651
2603
  };
1652
2604
  });
1653
2605
  }
@@ -1820,6 +2772,50 @@ export class Browser {
1820
2772
  y: Math.round((top + bottom) / 2),
1821
2773
  };
1822
2774
  }
2775
+ async runClickHitTest(x, y, selector) {
2776
+ const page = this.ensurePage();
2777
+ try {
2778
+ return await page.evaluate(({ pointX, pointY, targetSelector }) => {
2779
+ const hitEl = document.elementFromPoint(pointX, pointY);
2780
+ if (!hitEl)
2781
+ return true;
2782
+ if (!targetSelector)
2783
+ return true;
2784
+ const target = document.querySelector(targetSelector);
2785
+ if (!target)
2786
+ return true;
2787
+ // Direct containment or identity
2788
+ if (target.contains(hitEl) || hitEl.contains(target) || hitEl === target)
2789
+ return true;
2790
+ // Label↔input relationship: clicking a label that wraps or references
2791
+ // the target input is valid — the browser forwards the click.
2792
+ if (target instanceof HTMLInputElement) {
2793
+ // hitEl is inside a <label> that wraps the target input
2794
+ const parentLabel = hitEl.closest('label');
2795
+ if (parentLabel && parentLabel.contains(target))
2796
+ return true;
2797
+ // hitEl is inside a <label for="id"> that references the target input
2798
+ if (parentLabel?.htmlFor && parentLabel.htmlFor === target.id)
2799
+ return true;
2800
+ }
2801
+ if (hitEl instanceof HTMLLabelElement || hitEl.closest('label')) {
2802
+ const label = hitEl instanceof HTMLLabelElement ? hitEl : hitEl.closest('label');
2803
+ if (label.contains(target))
2804
+ return true;
2805
+ if (label.htmlFor && target instanceof HTMLElement && label.htmlFor === target.id)
2806
+ return true;
2807
+ }
2808
+ return false;
2809
+ }, {
2810
+ pointX: x,
2811
+ pointY: y,
2812
+ targetSelector: selector || null,
2813
+ });
2814
+ }
2815
+ catch {
2816
+ return null;
2817
+ }
2818
+ }
1823
2819
  async withResolvedLocator(element, fn) {
1824
2820
  const page = this.ensurePage();
1825
2821
  if (!element.selector)
@@ -2122,19 +3118,333 @@ export class Browser {
2122
3118
  const viewport = page.viewportSize();
2123
3119
  if (!viewport)
2124
3120
  throw new Error('Viewport unavailable during click');
3121
+ // For form controls (radio, checkbox, select) and popup triggers,
3122
+ // prefer Playwright's locator.click() which handles actionability checks,
3123
+ // label→input forwarding, and React event dispatch more reliably than
3124
+ // raw coordinate clicks.
3125
+ const tag = element.tag?.toLowerCase();
3126
+ const role = element.role?.toLowerCase();
3127
+ const inputType = element.inputType?.toLowerCase();
3128
+ const isFormControl = tag === 'input' || tag === 'select' || tag === 'textarea';
3129
+ const isPopupTrigger = element.ariaHasPopup != null && element.ariaHasPopup !== 'false';
3130
+ const isLabelLike = tag === 'label' || role === 'label';
3131
+ if ((isFormControl || isPopupTrigger || isLabelLike) && element.selector) {
3132
+ const clickedViaLocator = await this.activateSelectorTarget(element.selector);
3133
+ if (clickedViaLocator)
3134
+ return;
3135
+ // Locator failed — fall through to coordinate click as last resort.
3136
+ }
2125
3137
  // Prefer coordinate click — it's always precise after re-centering and
2126
3138
  // avoids ambiguous selectors when multiple elements share the same classes.
2127
3139
  const point = this.getVisiblePoint(measured.boundingBox, viewport);
3140
+ // Hit-test: verify no overlay intercepts the click target.
3141
+ // Uses elementFromPoint to check if the element at click coordinates matches
3142
+ // the intended target (or is a descendant/ancestor of it).
3143
+ const hitTestPassed = await this.runClickHitTest(point.x, point.y, element.selector);
3144
+ if (hitTestPassed === false) {
3145
+ // An overlay is intercepting the click. Try dismissing it with Escape and retry.
3146
+ await page.keyboard.press('Escape');
3147
+ await page.waitForTimeout(400);
3148
+ // Re-measure after dismiss
3149
+ const remeasured = await this.ensureElementInView(index, 32);
3150
+ const retryPoint = this.getVisiblePoint(remeasured.boundingBox, viewport);
3151
+ // Second hit-test after dismiss
3152
+ const retryHitTest = await this.runClickHitTest(retryPoint.x, retryPoint.y, element.selector);
3153
+ if (!retryHitTest) {
3154
+ // Still intercepted after dismiss — fall back to Playwright's click with selector
3155
+ // which handles actionability checks natively.
3156
+ const clickedViaLocator = await this.withResolvedLocator(element, async (locator) => {
3157
+ await locator.click({ timeout: 5000 });
3158
+ return true;
3159
+ });
3160
+ if (!clickedViaLocator) {
3161
+ if (element.selector) {
3162
+ throw new Error(`Click target ${index} remained intercepted after dismiss and could not be re-resolved by selector ${element.selector}`);
3163
+ }
3164
+ throw new Error(`Click target ${index} remained intercepted after dismiss and has no reusable selector`);
3165
+ }
3166
+ return;
3167
+ }
3168
+ // Click with updated coordinates after dismiss
3169
+ await page.mouse.click(retryPoint.x, retryPoint.y);
3170
+ return;
3171
+ }
3172
+ if (hitTestPassed === null && element.selector) {
3173
+ const clickedViaLocator = await this.withResolvedLocator(element, async (locator) => {
3174
+ await locator.click({ timeout: 5000 });
3175
+ return true;
3176
+ });
3177
+ if (!clickedViaLocator) {
3178
+ throw new Error(`Hit-test failed for click target ${index} and selector ${element.selector} could not be re-resolved`);
3179
+ }
3180
+ return;
3181
+ }
2128
3182
  await page.mouse.click(point.x, point.y);
2129
3183
  }
2130
3184
  async clickBySelector(selector, options) {
2131
3185
  const page = this.ensurePage();
2132
- await page.click(selector, { timeout: 5000, force: options?.force });
3186
+ const t0 = Date.now();
3187
+ try {
3188
+ const existence = await page.evaluate((sel) => {
3189
+ const matches = document.querySelectorAll(sel);
3190
+ return {
3191
+ url: window.location.href,
3192
+ count: matches.length,
3193
+ firstVisible: matches.length > 0 ? matches[0].offsetParent !== null : null,
3194
+ docState: document.readyState,
3195
+ };
3196
+ }, selector);
3197
+ logger.debug(`[clickBySelector] "${selector}" — DOM check: url=${existence.url}, matches=${existence.count}, firstVisible=${existence.firstVisible}, docState=${existence.docState}`);
3198
+ }
3199
+ catch (probeErr) {
3200
+ logger.debug(`[clickBySelector] "${selector}" — DOM probe failed: ${probeErr instanceof Error ? probeErr.message.split('\n')[0] : String(probeErr)}`);
3201
+ }
3202
+ logger.debug(`[clickBySelector] "${selector}" — trying activateSelectorTarget`);
3203
+ const clickedViaLocator = await this.activateSelectorTarget(selector, options);
3204
+ logger.debug(`[clickBySelector] activateSelectorTarget returned ${clickedViaLocator} after ${Date.now() - t0}ms`);
3205
+ if (clickedViaLocator)
3206
+ return;
3207
+ logger.debug(`[clickBySelector] falling back to page.click("${selector}", timeout=5000)`);
3208
+ const t1 = Date.now();
3209
+ try {
3210
+ await page.click(selector, { timeout: 5000, force: options?.force });
3211
+ logger.debug(`[clickBySelector] page.click succeeded after ${Date.now() - t1}ms`);
3212
+ }
3213
+ catch (err) {
3214
+ logger.debug(`[clickBySelector] page.click FAILED after ${Date.now() - t1}ms — ${err instanceof Error ? err.message.split('\n')[0] : String(err)}`);
3215
+ throw err;
3216
+ }
3217
+ }
3218
+ async resolveActivationTarget(selector) {
3219
+ const page = this.ensurePage();
3220
+ try {
3221
+ return await page.locator(selector).first().evaluate((node) => {
3222
+ const ACTIONABLE_SELECTOR = [
3223
+ 'button',
3224
+ 'a[href]',
3225
+ 'input',
3226
+ 'select',
3227
+ 'textarea',
3228
+ '[role="button"]',
3229
+ '[role="link"]',
3230
+ '[role="menuitem"]',
3231
+ '[role="option"]',
3232
+ '[role="radio"]',
3233
+ '[role="switch"]',
3234
+ '[role="tab"]',
3235
+ ].join(', ');
3236
+ const isActionable = (candidate) => {
3237
+ if (!(candidate instanceof HTMLElement))
3238
+ return false;
3239
+ if (candidate instanceof HTMLLabelElement)
3240
+ return true;
3241
+ if (candidate instanceof HTMLButtonElement)
3242
+ return true;
3243
+ if (candidate instanceof HTMLAnchorElement && !!candidate.href)
3244
+ return true;
3245
+ if (candidate instanceof HTMLInputElement
3246
+ || candidate instanceof HTMLSelectElement
3247
+ || candidate instanceof HTMLTextAreaElement) {
3248
+ return true;
3249
+ }
3250
+ const role = (candidate.getAttribute('role') || '').toLowerCase();
3251
+ return ['button', 'link', 'menuitem', 'option', 'radio', 'switch', 'tab'].includes(role);
3252
+ };
3253
+ const findActivationNode = (start) => {
3254
+ let current = start;
3255
+ while (current) {
3256
+ if (current instanceof HTMLLabelElement) {
3257
+ return current.control instanceof HTMLElement ? current.control : current;
3258
+ }
3259
+ if (isActionable(current))
3260
+ return current;
3261
+ current = current.parentElement;
3262
+ }
3263
+ if (!(start instanceof HTMLElement))
3264
+ return null;
3265
+ const labelAncestor = start.closest('label');
3266
+ if (labelAncestor instanceof HTMLLabelElement) {
3267
+ return labelAncestor.control instanceof HTMLElement ? labelAncestor.control : labelAncestor;
3268
+ }
3269
+ const nested = start.querySelector(ACTIONABLE_SELECTOR);
3270
+ if (nested instanceof HTMLElement)
3271
+ return nested;
3272
+ return start;
3273
+ };
3274
+ const target = findActivationNode(node);
3275
+ if (!target)
3276
+ return null;
3277
+ const attrName = 'data-ak-activation-target';
3278
+ const win = window;
3279
+ if (typeof win.__akActivationTargetCounter === 'undefined') {
3280
+ win.__akActivationTargetCounter = 0;
3281
+ }
3282
+ let attrValue = target.getAttribute(attrName);
3283
+ if (!attrValue) {
3284
+ win.__akActivationTargetCounter += 1;
3285
+ attrValue = String(win.__akActivationTargetCounter);
3286
+ target.setAttribute(attrName, attrValue);
3287
+ }
3288
+ return {
3289
+ selector: `[${attrName}="${attrValue}"]`,
3290
+ tag: target.tagName.toLowerCase(),
3291
+ role: target.getAttribute('role') || '',
3292
+ inputType: target instanceof HTMLInputElement ? target.type : target.getAttribute('type'),
3293
+ };
3294
+ });
3295
+ }
3296
+ catch {
3297
+ return null;
3298
+ }
3299
+ }
3300
+ async activateSelectorTarget(selector, options) {
3301
+ const page = this.ensurePage();
3302
+ const tResolve = Date.now();
3303
+ const target = await this.resolveActivationTarget(selector);
3304
+ logger.debug(`[activateSelectorTarget] resolveActivationTarget took ${Date.now() - tResolve}ms — target=${target ? `tag=${target.tag},role=${target.role}` : 'null'}`);
3305
+ const activationSelector = target?.selector ?? selector;
3306
+ const activationLocator = page.locator(activationSelector).first();
3307
+ const targetTag = target?.tag.toLowerCase() ?? '';
3308
+ const targetRole = target?.role.toLowerCase() ?? '';
3309
+ const inputType = target?.inputType?.toLowerCase() ?? null;
3310
+ const noWaitAfter = (targetTag === 'label'
3311
+ || targetRole === 'radio'
3312
+ || targetRole === 'switch'
3313
+ || inputType === 'radio'
3314
+ || inputType === 'checkbox');
3315
+ try {
3316
+ const tScroll = Date.now();
3317
+ await activationLocator.scrollIntoViewIfNeeded({ timeout: 3000 }).catch((err) => {
3318
+ logger.debug(`[activateSelectorTarget] scrollIntoViewIfNeeded swallowed (${Date.now() - tScroll}ms): ${err instanceof Error ? err.message.split('\n')[0] : String(err)}`);
3319
+ });
3320
+ logger.debug(`[activateSelectorTarget] scrollIntoViewIfNeeded done in ${Date.now() - tScroll}ms`);
3321
+ if (inputType === 'radio' || inputType === 'checkbox') {
3322
+ await activationLocator.check({ timeout: 5000, force: options?.force, noWaitAfter: true });
3323
+ return true;
3324
+ }
3325
+ const tClick = Date.now();
3326
+ await activationLocator.click({ timeout: 5000, force: options?.force, noWaitAfter });
3327
+ logger.debug(`[activateSelectorTarget] primary click succeeded in ${Date.now() - tClick}ms`);
3328
+ return true;
3329
+ }
3330
+ catch (err) {
3331
+ logger.debug(`[activateSelectorTarget] primary click failed: ${err instanceof Error ? err.message.split('\n')[0] : String(err)}`);
3332
+ if (!target || activationSelector === selector) {
3333
+ return false;
3334
+ }
3335
+ try {
3336
+ const fallbackLocator = page.locator(selector).first();
3337
+ await fallbackLocator.scrollIntoViewIfNeeded({ timeout: 3000 }).catch(() => undefined);
3338
+ const tFb = Date.now();
3339
+ await fallbackLocator.click({ timeout: 5000, force: options?.force, noWaitAfter });
3340
+ logger.debug(`[activateSelectorTarget] fallback click succeeded in ${Date.now() - tFb}ms`);
3341
+ return true;
3342
+ }
3343
+ catch (err2) {
3344
+ logger.debug(`[activateSelectorTarget] fallback click failed: ${err2 instanceof Error ? err2.message.split('\n')[0] : String(err2)}`);
3345
+ return false;
3346
+ }
3347
+ }
2133
3348
  }
2134
3349
  async clickByCoordinates(x, y) {
2135
3350
  const page = this.ensurePage();
2136
3351
  await page.mouse.click(x, y);
2137
3352
  }
3353
+ buildElementSnapshotFromAKEntry(entry) {
3354
+ return {
3355
+ selector: entry.sourceRef,
3356
+ boundingBox: null,
3357
+ pageBox: {
3358
+ x: entry.bounds.x,
3359
+ y: entry.bounds.y,
3360
+ width: entry.bounds.w,
3361
+ height: entry.bounds.h,
3362
+ },
3363
+ scrollPos: { x: 0, y: 0 },
3364
+ tag: (entry.attributes.tagName || entry.type || 'div').toLowerCase(),
3365
+ text: entry.label || entry.attributes.__ownText || '',
3366
+ ariaLabel: entry.attributes['aria-label'] || null,
3367
+ ariaHasPopup: entry.attributes['aria-haspopup'] || null,
3368
+ role: entry.attributes.role || '',
3369
+ inputType: entry.attributes.type || null,
3370
+ };
3371
+ }
3372
+ async activateAKNodeEntry(entry, options) {
3373
+ const page = this.ensurePage();
3374
+ const snapshot = this.buildElementSnapshotFromAKEntry(entry);
3375
+ const targetTag = snapshot.tag.toLowerCase();
3376
+ const targetRole = snapshot.role.toLowerCase();
3377
+ const inputType = snapshot.inputType?.toLowerCase() ?? null;
3378
+ const noWaitAfter = (targetTag === 'label'
3379
+ || targetRole === 'label'
3380
+ || targetRole === 'radio'
3381
+ || targetRole === 'switch'
3382
+ || targetRole === 'option'
3383
+ || targetRole.startsWith('menuitem')
3384
+ || inputType === 'radio'
3385
+ || inputType === 'checkbox');
3386
+ const activated = await this.withResolvedLocator(snapshot, async (locator) => {
3387
+ await locator.scrollIntoViewIfNeeded().catch(() => undefined);
3388
+ if (targetRole.startsWith('menuitem')) {
3389
+ await locator.evaluate((node) => {
3390
+ if (!(node instanceof HTMLElement))
3391
+ return false;
3392
+ node.click();
3393
+ return true;
3394
+ });
3395
+ return true;
3396
+ }
3397
+ if (inputType === 'radio' || inputType === 'checkbox') {
3398
+ await locator.check({ timeout: 3000, force: options?.force, noWaitAfter: true });
3399
+ return true;
3400
+ }
3401
+ await locator.click({ timeout: 3000, force: options?.force, noWaitAfter });
3402
+ return true;
3403
+ });
3404
+ if (activated)
3405
+ return true;
3406
+ const clickedViaSelector = await this.activateSelectorTarget(entry.sourceRef, options);
3407
+ if (clickedViaSelector)
3408
+ return true;
3409
+ try {
3410
+ const locator = page.locator(entry.sourceRef).first();
3411
+ await locator.scrollIntoViewIfNeeded().catch(() => undefined);
3412
+ await locator.click({ timeout: 1500, force: options?.force, noWaitAfter });
3413
+ return true;
3414
+ }
3415
+ catch {
3416
+ return false;
3417
+ }
3418
+ }
3419
+ async clickAKNodeEntryByBounds(entry) {
3420
+ const page = this.ensurePage();
3421
+ const viewport = page.viewportSize();
3422
+ if (!viewport)
3423
+ throw new Error('Viewport unavailable during AK node click');
3424
+ const maxScroll = await page.evaluate(() => {
3425
+ const root = document.scrollingElement || document.documentElement;
3426
+ return {
3427
+ x: Math.max(0, root.scrollWidth - window.innerWidth),
3428
+ y: Math.max(0, root.scrollHeight - window.innerHeight),
3429
+ };
3430
+ });
3431
+ const targetScrollX = Math.max(0, Math.min(Math.round(entry.bounds.x + entry.bounds.w / 2 - viewport.width / 2), maxScroll.x));
3432
+ const targetScrollY = Math.max(0, Math.min(Math.round(entry.bounds.y + entry.bounds.h / 2 - viewport.height / 2), maxScroll.y));
3433
+ await page.evaluate(({ x, y }) => window.scrollTo(x, y), { x: targetScrollX, y: targetScrollY });
3434
+ await page.waitForTimeout(150);
3435
+ const scroll = await page.evaluate(() => ({ x: window.scrollX, y: window.scrollY }));
3436
+ const point = this.getVisiblePoint({
3437
+ x: entry.bounds.x - scroll.x,
3438
+ y: entry.bounds.y - scroll.y,
3439
+ width: entry.bounds.w,
3440
+ height: entry.bounds.h,
3441
+ }, viewport);
3442
+ const hitTestPassed = await this.runClickHitTest(point.x, point.y, entry.sourceRef);
3443
+ if (hitTestPassed === false) {
3444
+ throw new Error(`AK node "${entry.id}" remained intercepted during coordinate fallback`);
3445
+ }
3446
+ await page.mouse.click(point.x, point.y);
3447
+ }
2138
3448
  async typeText(text, options) {
2139
3449
  const page = this.ensurePage();
2140
3450
  if (options?.index !== undefined) {
@@ -2208,6 +3518,57 @@ export class Browser {
2208
3518
  await page.evaluate(({ deltaX, deltaY }) => window.scrollBy(deltaX, deltaY), { deltaX: dx, deltaY: dy });
2209
3519
  }
2210
3520
  }
3521
+ async centerNodeInView(centerNodeId, options = {}) {
3522
+ const page = this.ensurePage();
3523
+ const centerEntry = await this.resolveAKNode(centerNodeId);
3524
+ if (!centerEntry?.sourceRef) {
3525
+ throw new Error(`Unable to resolve center node "${centerNodeId}"`);
3526
+ }
3527
+ const centerLocator = page.locator(centerEntry.sourceRef).first();
3528
+ const centerRect = await centerLocator.evaluate((el) => {
3529
+ const rect = el.getBoundingClientRect();
3530
+ return {
3531
+ top: rect.top,
3532
+ left: rect.left,
3533
+ width: rect.width,
3534
+ height: rect.height,
3535
+ };
3536
+ });
3537
+ const bias = options.offset ?? 0;
3538
+ if (options.containerNodeId) {
3539
+ const containerEntry = await this.resolveAKNode(options.containerNodeId);
3540
+ if (!containerEntry?.sourceRef) {
3541
+ throw new Error(`Unable to resolve scroll container "${options.containerNodeId}"`);
3542
+ }
3543
+ const containerLocator = page.locator(containerEntry.sourceRef).first();
3544
+ const containerRect = await containerLocator.evaluate((el) => {
3545
+ const rect = el.getBoundingClientRect();
3546
+ return {
3547
+ top: rect.top,
3548
+ left: rect.left,
3549
+ width: rect.width,
3550
+ height: rect.height,
3551
+ };
3552
+ });
3553
+ const deltaX = Math.round((centerRect.left + centerRect.width / 2)
3554
+ - (containerRect.left + containerRect.width / 2)
3555
+ - bias);
3556
+ const deltaY = Math.round((centerRect.top + centerRect.height / 2)
3557
+ - (containerRect.top + containerRect.height / 2)
3558
+ - bias);
3559
+ await containerLocator.evaluate((el, deltas) => {
3560
+ el.scrollBy(deltas.deltaX, deltas.deltaY);
3561
+ }, { deltaX, deltaY });
3562
+ return;
3563
+ }
3564
+ const viewport = page.viewportSize();
3565
+ if (!viewport) {
3566
+ throw new Error('Viewport unavailable during centered scroll');
3567
+ }
3568
+ const deltaX = Math.round((centerRect.left + centerRect.width / 2) - (viewport.width / 2) - bias);
3569
+ const deltaY = Math.round((centerRect.top + centerRect.height / 2) - (viewport.height / 2) - bias);
3570
+ await page.evaluate(({ dx, dy }) => window.scrollBy(dx, dy), { dx: deltaX, dy: deltaY });
3571
+ }
2211
3572
  async pressKey(key) {
2212
3573
  const page = this.ensurePage();
2213
3574
  await page.keyboard.press(key);
@@ -2541,6 +3902,7 @@ export class Browser {
2541
3902
  /** Temporarily hide all fixed/sticky overlays (navbars, banners, FABs, etc.) */
2542
3903
  async hideFixedOverlays() {
2543
3904
  const page = this.ensurePage();
3905
+ await ensureCaptureHideStyles(page);
2544
3906
  await page.evaluate(() => {
2545
3907
  document.querySelectorAll('*').forEach(el => {
2546
3908
  const pos = getComputedStyle(el).position;
@@ -2832,6 +4194,94 @@ export class Browser {
2832
4194
  await this.restoreFixedOverlays();
2833
4195
  }
2834
4196
  }
4197
+ async resolveAKNode(nodeId, options = {}) {
4198
+ if (options.reparse === true) {
4199
+ // Force a fresh tree parse to get updated sourceRefs
4200
+ this.akNodeIndex.clear();
4201
+ await this.getAKTree();
4202
+ return this.akNodeIndex.get(nodeId) ?? null;
4203
+ }
4204
+ const cached = this.akNodeIndex.get(nodeId);
4205
+ if (cached)
4206
+ return cached;
4207
+ if (options.reparse === false)
4208
+ return null;
4209
+ await this.getAKTree();
4210
+ return this.akNodeIndex.get(nodeId) ?? null;
4211
+ }
4212
+ async tapNode(nodeId, options) {
4213
+ let entry = await this.resolveAKNode(nodeId);
4214
+ if (!entry) {
4215
+ throw new Error(`Unable to resolve node "${nodeId}" to a live sourceRef`);
4216
+ }
4217
+ const tryTapEntry = async (candidate) => {
4218
+ const activated = await this.activateAKNodeEntry(candidate, options);
4219
+ if (activated)
4220
+ return;
4221
+ await this.clickAKNodeEntryByBounds(candidate);
4222
+ };
4223
+ try {
4224
+ await tryTapEntry(entry);
4225
+ }
4226
+ catch (err) {
4227
+ // sourceRef may be stale — re-parse the tree and retry once
4228
+ entry = await this.resolveAKNode(nodeId, { reparse: true });
4229
+ if (!entry)
4230
+ throw err;
4231
+ await tryTapEntry(entry);
4232
+ }
4233
+ }
4234
+ async typeIntoNode(nodeId, text, options) {
4235
+ // Default to clearFirst: false → uses page.click(selector) + keyboard.type(text)
4236
+ // which types character-by-character and triggers all React onChange/onInput events.
4237
+ // page.fill() (clearFirst: true) sets the value atomically and can miss controlled
4238
+ // component handlers in frameworks like React/Supabase Auth.
4239
+ const clearFirst = options?.clearFirst ?? false;
4240
+ let entry = await this.resolveAKNode(nodeId);
4241
+ if (!entry || !entry.sourceRef) {
4242
+ throw new Error(`Unable to resolve node "${nodeId}" to a live sourceRef`);
4243
+ }
4244
+ try {
4245
+ await this.typeText(text, {
4246
+ selector: entry.sourceRef,
4247
+ clearFirst,
4248
+ });
4249
+ }
4250
+ catch (err) {
4251
+ // sourceRef may be stale — re-parse the tree and retry once
4252
+ entry = await this.resolveAKNode(nodeId, { reparse: true });
4253
+ if (!entry || !entry.sourceRef)
4254
+ throw err;
4255
+ await this.typeText(text, {
4256
+ selector: entry.sourceRef,
4257
+ clearFirst,
4258
+ });
4259
+ }
4260
+ }
4261
+ async captureNode(nodeId) {
4262
+ if (!nodeId) {
4263
+ return this.takeScreenshot();
4264
+ }
4265
+ const entry = await this.resolveAKNode(nodeId);
4266
+ if (!entry) {
4267
+ throw new Error(`Unable to resolve node "${nodeId}" for capture`);
4268
+ }
4269
+ return this.screenshotByRegion({
4270
+ x: entry.bounds.x,
4271
+ y: entry.bounds.y,
4272
+ width: entry.bounds.w,
4273
+ height: entry.bounds.h,
4274
+ });
4275
+ }
4276
+ async focusTree(query) {
4277
+ const akTree = await this.getAKTree();
4278
+ const result = focusAKTree(akTree, query);
4279
+ return {
4280
+ tree: result.tree,
4281
+ serialized: result.serialized,
4282
+ matches: result.matches,
4283
+ };
4284
+ }
2835
4285
  async wait(ms) {
2836
4286
  const page = this.ensurePage();
2837
4287
  await page.waitForTimeout(Math.min(ms, 5000));