@webhands/core 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +69 -6
  2. package/dist/errors.d.ts +112 -1
  3. package/dist/errors.d.ts.map +1 -1
  4. package/dist/errors.js +121 -0
  5. package/dist/errors.js.map +1 -1
  6. package/dist/hand-host.d.ts +198 -5
  7. package/dist/hand-host.d.ts.map +1 -1
  8. package/dist/hand-host.js +664 -21
  9. package/dist/hand-host.js.map +1 -1
  10. package/dist/index.d.ts +5 -4
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +4 -3
  13. package/dist/index.js.map +1 -1
  14. package/dist/playwright-attach-transport.d.ts +8 -1
  15. package/dist/playwright-attach-transport.d.ts.map +1 -1
  16. package/dist/playwright-attach-transport.js +19 -4
  17. package/dist/playwright-attach-transport.js.map +1 -1
  18. package/dist/playwright-launch-transport.d.ts +23 -0
  19. package/dist/playwright-launch-transport.d.ts.map +1 -1
  20. package/dist/playwright-launch-transport.js +40 -6
  21. package/dist/playwright-launch-transport.js.map +1 -1
  22. package/dist/profile-location.d.ts +19 -0
  23. package/dist/profile-location.d.ts.map +1 -1
  24. package/dist/profile-location.js +21 -0
  25. package/dist/profile-location.js.map +1 -1
  26. package/dist/seam.d.ts +501 -7
  27. package/dist/seam.d.ts.map +1 -1
  28. package/dist/seam.js +31 -0
  29. package/dist/seam.js.map +1 -1
  30. package/dist/session-rpc.d.ts +63 -1
  31. package/dist/session-rpc.d.ts.map +1 -1
  32. package/dist/session-rpc.js +174 -11
  33. package/dist/session-rpc.js.map +1 -1
  34. package/dist/socks-proxy.d.ts +61 -0
  35. package/dist/socks-proxy.d.ts.map +1 -0
  36. package/dist/socks-proxy.js +84 -0
  37. package/dist/socks-proxy.js.map +1 -0
  38. package/dist/stub-transport.d.ts.map +1 -1
  39. package/dist/stub-transport.js +74 -6
  40. package/dist/stub-transport.js.map +1 -1
  41. package/dist/test-fixtures/fixture-pages.d.ts.map +1 -1
  42. package/dist/test-fixtures/fixture-pages.js +994 -0
  43. package/dist/test-fixtures/fixture-pages.js.map +1 -1
  44. package/dist/test-fixtures/fixture-server.d.ts.map +1 -1
  45. package/dist/test-fixtures/fixture-server.js +33 -3
  46. package/dist/test-fixtures/fixture-server.js.map +1 -1
  47. package/package.json +1 -1
  48. package/src/errors.ts +164 -1
  49. package/src/hand-host.ts +797 -21
  50. package/src/index.ts +27 -1
  51. package/src/playwright-attach-transport.ts +25 -3
  52. package/src/playwright-launch-transport.ts +63 -4
  53. package/src/profile-location.ts +25 -0
  54. package/src/seam.ts +535 -7
  55. package/src/session-rpc.ts +276 -14
  56. package/src/socks-proxy.ts +127 -0
  57. package/src/stub-transport.ts +83 -6
  58. package/src/test-fixtures/fixture-pages.ts +1010 -0
  59. package/src/test-fixtures/fixture-server.ts +32 -3
package/src/hand-host.ts CHANGED
@@ -1,11 +1,35 @@
1
- import {errors as pwErrors, type BrowserContext, type Page} from 'playwright';
1
+ import {
2
+ errors as pwErrors,
3
+ type BrowserContext,
4
+ type Frame,
5
+ type Locator,
6
+ type Page,
7
+ } from 'playwright';
2
8
  import type {
9
+ ActionOptions,
10
+ BoundingBox,
3
11
  Cookie,
12
+ EvalOptions,
4
13
  WebHandsPage,
14
+ MouseInput,
15
+ QueryOptions,
16
+ QueryRow,
17
+ Screenshot,
18
+ ScreenshotOptions,
19
+ ScrollTarget,
20
+ SelectChoice,
5
21
  Snapshot,
6
22
  SnapshotOptions,
7
23
  WaitCondition,
8
24
  } from './seam.js';
25
+ import {validateSnapshotOptions} from './seam.js';
26
+ import {
27
+ CrossOriginFrameError,
28
+ ScreenshotPathError,
29
+ StaleRefError,
30
+ } from './errors.js';
31
+ import {mkdir} from 'node:fs/promises';
32
+ import {isAbsolute, join, relative, resolve as resolvePath} from 'node:path';
9
33
 
10
34
  /**
11
35
  * The hand-host primitive (Phase 1 of the "hands" prd,
@@ -65,6 +89,17 @@ export interface HandContext {
65
89
  readonly pwPage: Page;
66
90
  readonly context: BrowserContext;
67
91
  readonly ensureOpen: () => void;
92
+ /**
93
+ * The managed SCREENSHOTS directory the `screenshot` verb mints PNGs under
94
+ * (Tier-4, prd `broaden-agent-verb-surface`, R3). Resolved by each transport
95
+ * from its home root (`<homeRoot>/screenshots`, beside `profiles/`) via
96
+ * {@link resolveScreenshotsDir}, so the same `root`/`WEBHANDS_HOME` override
97
+ * that isolates profiles in a test isolates screenshots too. The verb creates
98
+ * it lazily on first write and validates any caller `out` override stays under
99
+ * it ({@link ScreenshotPathError}). Carried HERE (not on the seam) so no path
100
+ * policy leaks into the public {@link WebHandsPage} surface (ADR-0003).
101
+ */
102
+ readonly screenshotsDir: string;
68
103
  }
69
104
 
70
105
  /**
@@ -171,6 +206,18 @@ const REQUIRED_VERBS = [
171
206
  'wait',
172
207
  'cookies',
173
208
  'setCookies',
209
+ 'query',
210
+ 'count',
211
+ 'exists',
212
+ 'isVisible',
213
+ 'getAttribute',
214
+ 'press',
215
+ 'hover',
216
+ 'select',
217
+ 'scroll',
218
+ 'drag',
219
+ 'mouse',
220
+ 'screenshot',
174
221
  ] as const satisfies ReadonlyArray<keyof WebHandsPage>;
175
222
 
176
223
  /**
@@ -203,6 +250,17 @@ function assertCompletePage(verbs: Partial<WebHandsPage>): WebHandsPage {
203
250
  */
204
251
  const NORMAL_CLICK_TIMEOUT_MS = 1_000;
205
252
 
253
+ /**
254
+ * How long {@link resolveSameOriginFrame} waits for the `frame` selector to
255
+ * resolve to an iframe element before treating it as "no such frame". Short on
256
+ * purpose: a frame-scoped `eval` against a bad selector should fail LOUD fast,
257
+ * not burn Playwright's 30s default auto-wait (the same reasoning as
258
+ * {@link NORMAL_CLICK_TIMEOUT_MS}). An iframe present in the markup resolves
259
+ * immediately and never approaches this bound; it is the latency cost paid ONLY
260
+ * on the no-such-frame path.
261
+ */
262
+ const FRAME_RESOLVE_TIMEOUT_MS = 1_000;
263
+
206
264
  // ---------------------------------------------------------------------------
207
265
  // Built-in hands: webhands' OWN eight verbs, each a hand over the host.
208
266
  //
@@ -236,6 +294,10 @@ export const snapshotHand: Hand = ({pwPage, ensureOpen}) => ({
236
294
  verbs: {
237
295
  async snapshot(options?: SnapshotOptions): Promise<Snapshot> {
238
296
  ensureOpen();
297
+ // Reject an unknown/misshapen option LOUDLY (e.g. `{view: 'full'}`)
298
+ // rather than silently returning the wrong view. Single source of
299
+ // truth in the seam, shared with the RPC server dispatch.
300
+ validateSnapshotOptions(options);
239
301
  const url = pwPage.url();
240
302
  if (options?.full === true) {
241
303
  // `--full`: the raw DOM. `documentElement.outerHTML` is the serialized
@@ -259,15 +321,29 @@ export const snapshotHand: Hand = ({pwPage, ensureOpen}) => ({
259
321
  },
260
322
  });
261
323
 
262
- /** The `click` + `type` verbs: page interaction by raw locator (ADR-0004). */
324
+ /**
325
+ * The `click` + `type` verbs: page interaction by raw locator (ADR-0004).
326
+ *
327
+ * With `{byRef: true}` the target is a durable `query` {@link QueryRow.ref}: it
328
+ * is resolved through the SAME {@link resolveLocator} but FIRST asserted to match
329
+ * EXACTLY ONE element ({@link assertRefResolvesToOne}), so a stale (zero) or
330
+ * ambiguous (many) ref fails LOUD with a {@link StaleRefError} instead of
331
+ * silently acting on the wrong element — the safety the durable ref exists for.
332
+ */
263
333
  export const interactionHand: Hand = ({pwPage, ensureOpen}) => ({
264
334
  verbs: {
265
- async click(t): Promise<void> {
335
+ async click(t, options?: ActionOptions): Promise<void> {
266
336
  ensureOpen();
337
+ if (options?.byRef === true) {
338
+ await assertRefResolvesToOne(pwPage, t, 'click');
339
+ }
267
340
  await clickLocator(pwPage, t);
268
341
  },
269
- async type(t, text): Promise<void> {
342
+ async type(t, text, options?: ActionOptions): Promise<void> {
270
343
  ensureOpen();
344
+ if (options?.byRef === true) {
345
+ await assertRefResolvesToOne(pwPage, t, 'type');
346
+ }
271
347
  await resolveLocator(pwPage, t).fill(text);
272
348
  },
273
349
  },
@@ -276,22 +352,9 @@ export const interactionHand: Hand = ({pwPage, ensureOpen}) => ({
276
352
  /** The `eval` escape hatch: run a JS EXPRESSION in the page, return by value. */
277
353
  export const evalHand: Hand = ({pwPage, ensureOpen}) => ({
278
354
  verbs: {
279
- async eval(expression: string): Promise<unknown> {
355
+ async eval(expression: string, options?: EvalOptions): Promise<unknown> {
280
356
  ensureOpen();
281
- // The `eval` escape hatch (PRD story 9): run the raw JS EXPRESSION in the
282
- // page and return its serializable result. Playwright's `evaluate`
283
- // already IS the seam's serialization contract (see {@link WebHandsPage.eval}):
284
- // it passes a string as an expression, awaits a returned Promise, and
285
- // structurally clones the result out of the page by VALUE. That clone is
286
- // richer than JSON: it preserves NaN/Infinity/BigInt and circular
287
- // structures (back-refs become a `[Circular]` marker), yields `undefined`
288
- // for functions/symbols, and returns an opaque preview string for a live
289
- // host object (a DOM node never crosses the process boundary). A page-side
290
- // throw rejects. We pass it straight through rather than re-encode it:
291
- // wrapping the value in a transport-specific envelope would invent a
292
- // dialect the seam deliberately avoids. The thrown error is a plain
293
- // `Error`, so no Playwright/CDP type leaks across the seam (ADR-0003).
294
- return pwPage.evaluate(expression);
357
+ return evalExpression(pwPage, expression, options);
295
358
  },
296
359
  },
297
360
  });
@@ -326,7 +389,147 @@ export const cookiesHand: Hand = ({context, ensureOpen}) => ({
326
389
  });
327
390
 
328
391
  /**
329
- * webhands' eight built-in verbs as built-in hands, in composition order. Both
392
+ * The Tier-1 read verbs (prd `broaden-agent-verb-surface`, R2): the `query`
393
+ * extraction verb plus the thin state shorthands `count` / `exists` /
394
+ * `isVisible` / `getAttribute`. All five address element(s) by the SAME raw
395
+ * Playwright locator expression the other verbs use, resolved through the ONE
396
+ * existing {@link resolveLocator} (so a `frameLocator(...)` same-origin frame
397
+ * hop in the string Just Works, and there is no parallel addressing scheme —
398
+ * R1). They are pure READS: no page mutation.
399
+ *
400
+ * `query` returns one row per match carrying EXACTLY the requested fields (R2);
401
+ * the state verbs are computed over the same machinery (see {@link queryRows}
402
+ * and the per-verb bodies). Read values cross by structured clone, the same
403
+ * contract as `eval` (ADR-0003).
404
+ */
405
+ export const queryHand: Hand = ({pwPage, ensureOpen}) => ({
406
+ verbs: {
407
+ async query(target, options?: QueryOptions): Promise<QueryRow[]> {
408
+ ensureOpen();
409
+ return queryRows(pwPage, target, options);
410
+ },
411
+ async count(target): Promise<number> {
412
+ ensureOpen();
413
+ return resolveLocator(pwPage, target).count();
414
+ },
415
+ async exists(target): Promise<boolean> {
416
+ ensureOpen();
417
+ return (await resolveLocator(pwPage, target).count()) > 0;
418
+ },
419
+ async isVisible(target): Promise<boolean> {
420
+ ensureOpen();
421
+ // The FIRST match's actionability-grade visibility. `.first().isVisible()`
422
+ // returns `false` for an ABSENT element too (no match cannot be visible),
423
+ // which is the loud, correct answer for the absent case.
424
+ return resolveLocator(pwPage, target).first().isVisible();
425
+ },
426
+ async getAttribute(target, name: string): Promise<string | null> {
427
+ ensureOpen();
428
+ // The FIRST match's DOM attribute. `.first().getAttribute()` resolves to
429
+ // `null` for an absent attribute AND surfaces a clean miss for an absent
430
+ // element (it would otherwise time out); we treat "no element" as `null`
431
+ // (there is no attribute value to read) rather than hanging.
432
+ if ((await resolveLocator(pwPage, target).count()) === 0) {
433
+ return null;
434
+ }
435
+ return resolveLocator(pwPage, target).first().getAttribute(name);
436
+ },
437
+ },
438
+ });
439
+
440
+ /**
441
+ * The Tier-2 rich INPUT verbs (prd `broaden-agent-verb-surface`, stories 8-12):
442
+ * `press` / `hover` / `select` / `scroll` / `drag`. These lift page-level
443
+ * Playwright actions a hand already has on `pwPage` (`keyboard.press`,
444
+ * `hover`, `selectOption`, `mouse.wheel`/`scrollIntoViewIfNeeded`, `dragTo`) up
445
+ * to the agent verb seam so a seam-only agent can drive a browser game or a
446
+ * richer form, not just `click`/`type`.
447
+ *
448
+ * Every locator-addressing form resolves through the SAME single
449
+ * {@link resolveLocator} the other verbs use (so a same-origin `frameLocator(...)`
450
+ * hop in the string Just Works — no parallel addressing scheme, R1). Keys are
451
+ * strings, offsets are numbers, locators are strings: nothing Playwright-shaped
452
+ * crosses the seam (ADR-0003).
453
+ */
454
+ export const inputHand: Hand = ({pwPage, ensureOpen}) => ({
455
+ verbs: {
456
+ async press(key, target): Promise<void> {
457
+ ensureOpen();
458
+ if (target !== undefined) {
459
+ // At a locator: Playwright focuses the element first, then presses
460
+ // (the `locator.press` semantics).
461
+ await resolveLocator(pwPage, target).press(key);
462
+ return;
463
+ }
464
+ // No locator: the page's currently focused element receives the key.
465
+ await pwPage.keyboard.press(key);
466
+ },
467
+ async hover(target): Promise<void> {
468
+ ensureOpen();
469
+ await resolveLocator(pwPage, target).hover();
470
+ },
471
+ async select(target, choice: SelectChoice): Promise<void> {
472
+ ensureOpen();
473
+ // EXACTLY ONE of value/label (the seam type enforces it); map to
474
+ // Playwright's `selectOption({value})` / `selectOption({label})`.
475
+ const option =
476
+ 'value' in choice ? {value: choice.value} : {label: choice.label};
477
+ await resolveLocator(pwPage, target).selectOption(option);
478
+ },
479
+ async scroll(target: ScrollTarget): Promise<void> {
480
+ ensureOpen();
481
+ if ('to' in target) {
482
+ // Reach an off-viewport element by scrolling it into view.
483
+ await resolveLocator(pwPage, target.to).scrollIntoViewIfNeeded();
484
+ return;
485
+ }
486
+ // Scroll the page by a pixel delta (the wheel convention: positive dy
487
+ // scrolls DOWN).
488
+ await pwPage.mouse.wheel(target.by.dx, target.by.dy);
489
+ },
490
+ async drag(source, target): Promise<void> {
491
+ ensureOpen();
492
+ await resolveLocator(pwPage, source).dragTo(
493
+ resolveLocator(pwPage, target),
494
+ );
495
+ },
496
+ },
497
+ });
498
+
499
+ /**
500
+ * The Tier-4 COORDINATE + SCREENSHOT hand (prd `broaden-agent-verb-surface`,
501
+ * R3; stories 17-19): the `mouse` coordinate-input verb and the `screenshot`
502
+ * path-returning verb, the look-then-click pair that lets a seam-only agent
503
+ * handle the VISION/TILE captcha family and any visual task.
504
+ *
505
+ * The seam stays ADR-0003-clean (as amended by the Tier-4 ADR) by passing ONLY
506
+ * numbers + a string enum (`mouse`) and returning ONLY a file PATH + dimensions
507
+ * (`screenshot`): NO image bytes and NO Playwright/CDP type cross the seam.
508
+ *
509
+ * - `mouse` drives Playwright `page.mouse` at VIEWPORT CSS-pixels (NOT OS-level
510
+ * input). A VIEWPORT screenshot's pixels map directly to these coordinates
511
+ * (the look-then-click contract); a FULL-PAGE shot does not.
512
+ * - `screenshot` MINTS a PNG under the managed {@link HandContext.screenshotsDir}
513
+ * and returns its path. The `element` scope clips to a locator (resolved
514
+ * through the SAME {@link resolveLocator}, so a cross-origin `frameLocator(...)`
515
+ * widget shot Just Works). A caller `out` override is validated to stay under
516
+ * the managed dir ({@link ScreenshotPathError}).
517
+ */
518
+ export const coordinateHand: Hand = ({pwPage, ensureOpen, screenshotsDir}) => ({
519
+ verbs: {
520
+ async mouse(input: MouseInput): Promise<void> {
521
+ ensureOpen();
522
+ await doMouse(pwPage, input);
523
+ },
524
+ async screenshot(options?: ScreenshotOptions): Promise<Screenshot> {
525
+ ensureOpen();
526
+ return takeScreenshot(pwPage, screenshotsDir, options);
527
+ },
528
+ },
529
+ });
530
+
531
+ /**
532
+ * webhands' built-in verbs as built-in hands, in composition order. Both
330
533
  * Playwright transports compose THIS exact set, so the verb surface is
331
534
  * identical across launch and attach (the only legitimate difference is the
332
535
  * per-transport SESSION LIFECYCLE, which is not a hand's concern).
@@ -338,6 +541,9 @@ export const BUILT_IN_HANDS: readonly Hand[] = [
338
541
  evalHand,
339
542
  waitHand,
340
543
  cookiesHand,
544
+ queryHand,
545
+ inputHand,
546
+ coordinateHand,
341
547
  ];
342
548
 
343
549
  /**
@@ -416,6 +622,144 @@ export async function waitFor(
416
622
  }
417
623
  }
418
624
 
625
+ /**
626
+ * Run the `eval` verb against a Playwright page (PRD story 9; frame scope from
627
+ * prd `broaden-agent-verb-surface`, Tier-3), shared by both Playwright
628
+ * transports (via the built-in eval hand) so the verb behaves identically (no
629
+ * parallel second implementation).
630
+ *
631
+ * With no `frame`, this is the top-document escape hatch: Playwright's
632
+ * `evaluate` IS the seam's serialization contract (see {@link WebHandsPage.eval}):
633
+ * it passes a string as an expression, awaits a returned Promise, and
634
+ * structurally clones the result out of the page by VALUE. That clone is richer
635
+ * than JSON: it preserves NaN/Infinity/BigInt and circular structures (back-refs
636
+ * become a `[Circular]` marker), yields `undefined` for functions/symbols, and
637
+ * returns an opaque preview string for a live host object (a DOM node never
638
+ * crosses the process boundary). A page-side throw rejects. We pass it straight
639
+ * through rather than re-encode it: wrapping the value in a transport-specific
640
+ * envelope would invent a dialect the seam deliberately avoids. The thrown error
641
+ * is a plain `Error`, so no Playwright/CDP type leaks across the seam (ADR-0003).
642
+ *
643
+ * With a `frame` selector, the SAME structured-clone contract holds, but the
644
+ * expression runs in the named SAME-ORIGIN child frame (resolved through the
645
+ * single {@link resolveSameOriginFrame}, which reuses the same
646
+ * {@link resolveLocator} the locator-taking verbs use). A cross-origin frame
647
+ * REJECTS with a typed {@link CrossOriginFrameError} (see that resolver).
648
+ */
649
+ export async function evalExpression(
650
+ page: Page,
651
+ expression: string,
652
+ options?: EvalOptions,
653
+ ): Promise<unknown> {
654
+ if (options?.frame === undefined) {
655
+ return page.evaluate(expression);
656
+ }
657
+ const frame = await resolveSameOriginFrame(page, options.frame);
658
+ // `frame.evaluate` honours the SAME structured-clone contract as
659
+ // `page.evaluate` (it is the same Playwright serialization), so the
660
+ // frame-scoped result crosses the seam by value exactly as the top-document
661
+ // `eval` does.
662
+ return frame.evaluate(expression);
663
+ }
664
+
665
+ /**
666
+ * Resolve a `frame` SELECTOR string to a live, SAME-ORIGIN Playwright
667
+ * {@link Frame} for a frame-scoped `eval` (prd `broaden-agent-verb-surface`,
668
+ * Tier-3, R1). This is the SINGLE frame resolver: it reuses the very same
669
+ * {@link resolveLocator} the locator-taking verbs use (a `frameLocator(...)`
670
+ * over the selector), then walks the iframe element handle to its content
671
+ * frame — there is no parallel frame-addressing scheme.
672
+ *
673
+ * SAME-ORIGIN ONLY, enforced LOUD. Playwright will happily `evaluate` inside a
674
+ * CROSS-ORIGIN OOPIF (it attaches out-of-band), so a cross-origin frame would
675
+ * NOT throw on its own — it would silently succeed, which is exactly the
676
+ * contract violation this verb forbids (page-world JS cannot cross a security
677
+ * boundary; the seam is same-origin only). So we DETECT cross-origin by
678
+ * comparing the frame's origin to the page's main-frame origin and reject with a
679
+ * typed {@link CrossOriginFrameError} when they differ, never returning a frame
680
+ * the page world could not legitimately reach.
681
+ *
682
+ * Failure modes are loud/typed: a selector that matches NO iframe element
683
+ * rejects (the locator resolves nothing); a matched frame with no content frame
684
+ * rejects; a cross-origin frame rejects with {@link CrossOriginFrameError}.
685
+ */
686
+ export async function resolveSameOriginFrame(
687
+ page: Page,
688
+ selector: string,
689
+ ): Promise<Frame> {
690
+ // Reuse the ONE resolver: treat the selector as the argument to
691
+ // `frameLocator(...)`, exactly how a locator-taking verb would frame-hop. We
692
+ // build the expression with a JSON-encoded selector so an arbitrary CSS
693
+ // selector cannot break out of the call.
694
+ const frameLocator = resolveLocator(
695
+ page,
696
+ `p.frameLocator(${JSON.stringify(selector)})`,
697
+ ) as unknown as {owner(): Locator};
698
+ // Bound the resolve: a selector that matches NO iframe must fail LOUD quickly
699
+ // rather than burn Playwright's 30s default auto-wait (mirrors the short
700
+ // bound `clickLocator` uses for a non-actionable element). `elementHandle`
701
+ // throws a TimeoutError on no match within the bound; we map it to a clear
702
+ // "no iframe matched" error.
703
+ let handle: Awaited<ReturnType<Locator['elementHandle']>>;
704
+ try {
705
+ handle = await frameLocator
706
+ .owner()
707
+ .elementHandle({timeout: FRAME_RESOLVE_TIMEOUT_MS});
708
+ } catch (cause) {
709
+ if (cause instanceof pwErrors.TimeoutError) {
710
+ throw new Error(
711
+ `eval --frame: no iframe element matched selector ${JSON.stringify(
712
+ selector,
713
+ )}.`,
714
+ );
715
+ }
716
+ throw cause;
717
+ }
718
+ if (handle === null) {
719
+ throw new Error(
720
+ `eval --frame: no iframe element matched selector ${JSON.stringify(
721
+ selector,
722
+ )}.`,
723
+ );
724
+ }
725
+ try {
726
+ const frame = await handle.contentFrame();
727
+ if (frame === null) {
728
+ throw new Error(
729
+ `eval --frame: the element matched by selector ${JSON.stringify(
730
+ selector,
731
+ )} is not a frame.`,
732
+ );
733
+ }
734
+ const pageOrigin = originOf(page.mainFrame().url());
735
+ const frameOrigin = originOf(frame.url());
736
+ if (frameOrigin === null || frameOrigin !== pageOrigin) {
737
+ throw new CrossOriginFrameError(selector, {
738
+ frameOrigin: frameOrigin ?? undefined,
739
+ pageOrigin: pageOrigin ?? undefined,
740
+ });
741
+ }
742
+ return frame;
743
+ } finally {
744
+ await handle.dispose();
745
+ }
746
+ }
747
+
748
+ /**
749
+ * The origin (`scheme://host:port`) of a frame/page URL, or `null` when the URL
750
+ * has no parseable origin (e.g. `about:blank`). Used to compare a child frame's
751
+ * origin against the page's, the same-origin check the frame-scoped `eval`
752
+ * enforces. An unparseable / opaque origin reads as NOT same-origin (loud over
753
+ * silent): the frame is not provably reachable, so we treat it as cross-origin.
754
+ */
755
+ function originOf(url: string): string | null {
756
+ try {
757
+ return new URL(url).origin;
758
+ } catch {
759
+ return null;
760
+ }
761
+ }
762
+
419
763
  /**
420
764
  * Resolve a raw Playwright locator EXPRESSION (ADR-0004) against the page. The
421
765
  * verb surface passes locator expressions like `getByRole('button', …)`; we
@@ -455,6 +799,19 @@ export function resolveLocator(page: Page, expression: string) {
455
799
  * bad locator) surfaces its timeout quickly instead of hanging the dispatch on
456
800
  * Playwright's 30s default — the dispatch escape is for elements that EXIST but
457
801
  * are not actionable (hidden custom inputs), not for absent ones.
802
+ *
803
+ * The happy-path click passes `noWaitAfter: true` on purpose. Playwright's
804
+ * `Locator.click()` normally clicks AND THEN auto-waits for any navigation the
805
+ * click scheduled to finish, and that post-click wait counts against the same
806
+ * timeout. A real submit button whose navigation takes longer than
807
+ * {@link NORMAL_CLICK_TIMEOUT_MS} would therefore have its (already-performed)
808
+ * click reported as a `TimeoutError` and be wrongly routed to the dispatch
809
+ * escape, which then re-clicks a page that is already navigating away. We only
810
+ * want the short budget to measure ACTIONABILITY (can we click it?), not how
811
+ * long the resulting navigation takes — `noWaitAfter` returns as soon as the
812
+ * click is performed, so a slow-but-successful submit no longer trips the
813
+ * fallback. A genuinely non-actionable hidden input still cannot be clicked
814
+ * within the budget and still falls through to `dispatchEvent` as before.
458
815
  */
459
816
  export async function clickLocator(
460
817
  page: Page,
@@ -462,7 +819,7 @@ export async function clickLocator(
462
819
  ): Promise<void> {
463
820
  const target = resolveLocator(page, expression);
464
821
  try {
465
- await target.click({timeout: NORMAL_CLICK_TIMEOUT_MS});
822
+ await target.click({timeout: NORMAL_CLICK_TIMEOUT_MS, noWaitAfter: true});
466
823
  } catch (cause) {
467
824
  if (!(cause instanceof pwErrors.TimeoutError)) {
468
825
  throw cause;
@@ -473,6 +830,425 @@ export async function clickLocator(
473
830
  }
474
831
  }
475
832
 
833
+ /**
834
+ * Run the `query` verb (prd `broaden-agent-verb-surface`, R2) against a
835
+ * Playwright page: resolve the locator EXPRESSION through the SINGLE existing
836
+ * {@link resolveLocator} (so a same-origin `frameLocator(...)` hop in the string
837
+ * Just Works), then return ONE ROW PER MATCH carrying EXACTLY the requested
838
+ * fields and nothing else.
839
+ *
840
+ * The split is LOUD and never auto-detected:
841
+ * - `attrs[name]` is the element's `getAttribute(name)` (the markup value;
842
+ * `null` if absent).
843
+ * - `props[name]` is the live `el[name]` JS property (runtime state), read in
844
+ * one page-world `evaluate` over the element so the value is structurally
845
+ * cloned out by VALUE — the SAME serialization contract `eval` documents
846
+ * (ADR-0003: no Playwright/CDP type leak; richer than JSON).
847
+ * - `pw.visible` / `pw.bbox` are the closed Playwright-locator extras
848
+ * (`isVisible()` / `boundingBox()`), the only facts not expressible as an
849
+ * attribute or a property. `bbox` is in VIEWPORT CSS-pixels.
850
+ *
851
+ * `limit` bounds the row count. With no fields requested every row is an empty
852
+ * object (the caller asked for nothing; R2). Each row is built independently so
853
+ * a per-element read failure is the page's own throw, surfaced faithfully like
854
+ * `eval` (no silent swallow).
855
+ */
856
+ export async function queryRows(
857
+ page: Page,
858
+ expression: string,
859
+ options?: QueryOptions,
860
+ ): Promise<QueryRow[]> {
861
+ const attrs = options?.attrs ?? [];
862
+ const props = options?.props ?? [];
863
+ const pw = options?.pw ?? [];
864
+ const withRefs = options?.refs === true;
865
+ const base = resolveLocator(page, expression);
866
+ const total = await base.count();
867
+ const limit =
868
+ options?.limit !== undefined ? Math.max(0, options.limit) : total;
869
+ const rowCount = Math.min(total, limit);
870
+
871
+ // Refs are single-`query`-scoped: each `refs: true` query SWEEPS the PRIOR
872
+ // query's minted attributes FIRST (page-wide), so a ref can never resolve a
873
+ // stale element minted two queries ago. Reused stable attrs (ladder step 1)
874
+ // are the framework's own and are untouched. Done once, before iterating.
875
+ if (withRefs) {
876
+ await sweepPriorMints(page);
877
+ }
878
+
879
+ const rows: QueryRow[] = [];
880
+ for (let i = 0; i < rowCount; i++) {
881
+ rows.push(await readRow(base.nth(i), attrs, props, pw, withRefs));
882
+ }
883
+ return rows;
884
+ }
885
+
886
+ /**
887
+ * The namespaced attribute the MINT fallback (ladder step 2) stamps on an
888
+ * anonymous element. A `query({refs: true})` sweeps every node carrying it
889
+ * before re-minting, so mints stay single-query-scoped.
890
+ */
891
+ const REF_MINT_ATTR = 'data-webhands-ref';
892
+
893
+ /**
894
+ * Remove EVERY {@link REF_MINT_ATTR} attribute currently in the document, the
895
+ * single-`query`-scope sweep run at the start of each `refs: true` query. This
896
+ * touches ONLY webhands' own minted attribute — never a framework's stable attrs
897
+ * (ladder step 1 reuses those, it does not stamp them), so a sweep cannot break
898
+ * a reused-attribute ref.
899
+ */
900
+ async function sweepPriorMints(page: Page): Promise<void> {
901
+ await page.evaluate((attr) => {
902
+ document.querySelectorAll('[' + attr + ']').forEach((el) => {
903
+ el.removeAttribute(attr);
904
+ });
905
+ }, REF_MINT_ATTR);
906
+ }
907
+
908
+ /**
909
+ * Compute the durable {@link QueryRow.ref} for ONE matched element by the R4
910
+ * PREFERENCE LADDER, in page-world (the finding
911
+ * `query-ref-mint-mechanism-attribute-beats-weakmap` settled the mechanism: a
912
+ * `data-webhands-ref` ATTRIBUTE, not a WeakMap).
913
+ *
914
+ * Returns the ref as a LOCATOR EXPRESSION the ONE existing {@link resolveLocator}
915
+ * resolves — `p.locator('<css>')` — NOT a bare CSS string, so `click`/`type`
916
+ * feed it back through the exact same resolver path as any other locator (no new
917
+ * addressing engine, R1). The human-legible CSS the ladder picks rides INSIDE
918
+ * that expression (`p.locator('#buy-charlie')`).
919
+ *
920
+ * Ladder:
921
+ * 1. REUSE the element's own stable, VERIFIED-UNIQUE attribute, in priority
922
+ * `id` > `data-testid`/`data-test`/`data-id` > `name` > a link's `href` >
923
+ * a unique `aria-label`. The CSS IS the element's real address: durable
924
+ * across reconciliation (the framework keeps its OWN attrs), legible, ZERO
925
+ * DOM mutation. Uniqueness is VERIFIED with
926
+ * `querySelectorAll(...).length === 1`; a duplicate (e.g. two equal ids)
927
+ * FALLS THROUGH to the next rung.
928
+ * 2. MINT a namespaced {@link REF_MINT_ATTR} as the fallback for an anonymous
929
+ * element with no stable unique address, addressed by
930
+ * `[data-webhands-ref="<id>"]`.
931
+ *
932
+ * The minted-id counter lives on `window` so ids are unique within the page for
933
+ * the life of the document (the sweep clears stale ATTRIBUTES, not the counter,
934
+ * so a re-mint never reuses an id a still-resolvable ref might hold).
935
+ */
936
+ async function computeRef(cell: Locator): Promise<string> {
937
+ const css = await cell.evaluate((el: Element, attr: string): string => {
938
+ const cssEscape = (v: string): string =>
939
+ typeof (window as {CSS?: {escape?: (s: string) => string}}).CSS
940
+ ?.escape === 'function'
941
+ ? (
942
+ window as unknown as {CSS: {escape: (s: string) => string}}
943
+ ).CSS.escape(v)
944
+ : v.replace(/[^a-zA-Z0-9_-]/g, (c) => '\\' + c);
945
+ const uniq = (selector: string): boolean =>
946
+ document.querySelectorAll(selector).length === 1;
947
+
948
+ // Ladder step 1: reuse a stable, VERIFIED-UNIQUE existing attribute.
949
+ const id = el.getAttribute('id');
950
+ if (id !== null && id !== '') {
951
+ const sel = '#' + cssEscape(id);
952
+ if (uniq(sel)) return sel;
953
+ }
954
+ for (const name of ['data-testid', 'data-test', 'data-id', 'name']) {
955
+ const value = el.getAttribute(name);
956
+ if (value !== null && value !== '') {
957
+ const sel = '[' + name + '="' + value.replace(/"/g, '\\"') + '"]';
958
+ if (uniq(sel)) return sel;
959
+ }
960
+ }
961
+ // A link's href (only meaningful on an anchor).
962
+ if (el.tagName === 'A') {
963
+ const href = el.getAttribute('href');
964
+ if (href !== null && href !== '') {
965
+ const sel = 'a[href="' + href.replace(/"/g, '\\"') + '"]';
966
+ if (uniq(sel)) return sel;
967
+ }
968
+ }
969
+ // A unique aria-label.
970
+ const aria = el.getAttribute('aria-label');
971
+ if (aria !== null && aria !== '') {
972
+ const sel = '[aria-label="' + aria.replace(/"/g, '\\"') + '"]';
973
+ if (uniq(sel)) return sel;
974
+ }
975
+
976
+ // Ladder step 2: MINT the namespaced attribute (the fallback).
977
+ const w = window as unknown as {__webhandsRefSeq?: number};
978
+ w.__webhandsRefSeq = (w.__webhandsRefSeq ?? 0) + 1;
979
+ const mintedId = 'wr' + w.__webhandsRefSeq;
980
+ el.setAttribute(attr, mintedId);
981
+ return '[' + attr + '="' + mintedId + '"]';
982
+ }, REF_MINT_ATTR);
983
+ // Wrap the chosen CSS in a `p.locator(...)` expression so the ref resolves
984
+ // through the SAME resolver as every other locator. JSON-encode the CSS so a
985
+ // quote/backslash in a reused attribute value cannot break out of the call.
986
+ return `p.locator(${JSON.stringify(css)})`;
987
+ }
988
+
989
+ /**
990
+ * Resolve a durable `query` `ref` and assert it matches EXACTLY ONE element,
991
+ * else throw a typed {@link StaleRefError} (resolve-to-ZERO = removed/replaced;
992
+ * resolve-to-MANY = a cloned subtree / non-unique attribute). The loud-stale
993
+ * guard `click`/`type` run BEFORE acting when `{byRef: true}`, so a stale or
994
+ * ambiguous ref NEVER silently acts on the wrong element (the safety a ref has
995
+ * over a positional `.nth(i)`). Resolved through the SAME {@link resolveLocator}
996
+ * the verbs already use — no parallel addressing path.
997
+ */
998
+ export async function assertRefResolvesToOne(
999
+ page: Page,
1000
+ ref: string,
1001
+ verb: string,
1002
+ ): Promise<void> {
1003
+ const matched = await resolveLocator(page, ref).count();
1004
+ if (matched !== 1) {
1005
+ throw new StaleRefError(ref, matched, verb);
1006
+ }
1007
+ }
1008
+
1009
+ /**
1010
+ * Read ONE matched element into a {@link QueryRow}, carrying only the requested
1011
+ * families. `attrs` and `props` are read in a SINGLE page-world `evaluate` over
1012
+ * the element handle (so a row is one round-trip and `props` values are cloned
1013
+ * by value); the `pw` extras use the locator API (`isVisible`/`boundingBox`).
1014
+ */
1015
+ async function readRow(
1016
+ cell: Locator,
1017
+ attrs: readonly string[],
1018
+ props: readonly string[],
1019
+ pw: readonly string[],
1020
+ withRef: boolean,
1021
+ ): Promise<QueryRow> {
1022
+ const row: {
1023
+ attrs?: Record<string, string | null>;
1024
+ props?: Record<string, unknown>;
1025
+ pw?: {visible?: boolean; bbox?: BoundingBox | null};
1026
+ ref?: string;
1027
+ } = {};
1028
+
1029
+ if (attrs.length > 0 || props.length > 0) {
1030
+ // One page-world read of the live element: `getAttribute` for the markup
1031
+ // attrs, `el[name]` for the live JS props. The returned object is
1032
+ // structurally cloned out of the page by Playwright (the `eval` contract),
1033
+ // so a prop value crosses the seam by VALUE with no type leak.
1034
+ const read = await cell.evaluate(
1035
+ (
1036
+ el: Element,
1037
+ {
1038
+ attrNames,
1039
+ propNames,
1040
+ }: {attrNames: readonly string[]; propNames: readonly string[]},
1041
+ ) => {
1042
+ const out: {
1043
+ attrs?: Record<string, string | null>;
1044
+ props?: Record<string, unknown>;
1045
+ } = {};
1046
+ if (attrNames.length > 0) {
1047
+ const a: Record<string, string | null> = {};
1048
+ for (const name of attrNames) {
1049
+ a[name] = el.getAttribute(name);
1050
+ }
1051
+ out.attrs = a;
1052
+ }
1053
+ if (propNames.length > 0) {
1054
+ const p: Record<string, unknown> = {};
1055
+ for (const name of propNames) {
1056
+ p[name] = (el as unknown as Record<string, unknown>)[name];
1057
+ }
1058
+ out.props = p;
1059
+ }
1060
+ return out;
1061
+ },
1062
+ {attrNames: [...attrs], propNames: [...props]},
1063
+ );
1064
+ if (read.attrs !== undefined) {
1065
+ row.attrs = read.attrs;
1066
+ }
1067
+ if (read.props !== undefined) {
1068
+ row.props = read.props;
1069
+ }
1070
+ }
1071
+
1072
+ if (pw.length > 0) {
1073
+ const extras: {visible?: boolean; bbox?: BoundingBox | null} = {};
1074
+ if (pw.includes('visible')) {
1075
+ extras.visible = await cell.isVisible();
1076
+ }
1077
+ if (pw.includes('bbox')) {
1078
+ extras.bbox = await cell.boundingBox();
1079
+ }
1080
+ row.pw = extras;
1081
+ }
1082
+
1083
+ // The durable handle (opt-in). Computed by the R4 ladder in page-world:
1084
+ // reuse a stable unique attribute, else mint `data-webhands-ref`. Done after
1085
+ // the reads so a mint can never perturb an attr/prop read of this row.
1086
+ if (withRef) {
1087
+ row.ref = await computeRef(cell);
1088
+ }
1089
+
1090
+ return row;
1091
+ }
1092
+
1093
+ /**
1094
+ * Run the `mouse` verb (prd `broaden-agent-verb-surface`, Tier-4, R3) against a
1095
+ * Playwright page: drive `page.mouse` at the given VIEWPORT CSS-pixel
1096
+ * coordinate. Viewport-relative, NOT OS-level input — the same coordinate frame
1097
+ * a VIEWPORT `screenshot` is captured in, so a pixel an agent saw maps directly
1098
+ * to the click. Shared by both transports (via the coordinate hand) so the verb
1099
+ * behaves identically. Plain numbers + a string enum only (ADR-0003 as amended).
1100
+ */
1101
+ export async function doMouse(page: Page, input: MouseInput): Promise<void> {
1102
+ const button = input.button ?? 'left';
1103
+ switch (input.action) {
1104
+ case 'move':
1105
+ // A bare move takes no button (it is a pointer move, not a press).
1106
+ await page.mouse.move(input.x, input.y);
1107
+ return;
1108
+ case 'click':
1109
+ await page.mouse.click(input.x, input.y, {button});
1110
+ return;
1111
+ case 'down':
1112
+ // down/up press/release at the CURRENT pointer position, so move there
1113
+ // first to honour the (x, y) the caller named (the two halves of a manual
1114
+ // drag both land at the intended spot).
1115
+ await page.mouse.move(input.x, input.y);
1116
+ await page.mouse.down({button});
1117
+ return;
1118
+ case 'up':
1119
+ await page.mouse.move(input.x, input.y);
1120
+ await page.mouse.up({button});
1121
+ return;
1122
+ }
1123
+ }
1124
+
1125
+ /**
1126
+ * Run the `screenshot` verb (prd `broaden-agent-verb-surface`, Tier-4, R3;
1127
+ * stories 17-19) against a Playwright page: capture the requested SCOPE to a PNG
1128
+ * FILE under the managed `screenshotsDir` and return `{path, width, height}` —
1129
+ * NEVER image bytes (the load-bearing ADR-0003-as-amended choice). Shared by
1130
+ * both transports (via the coordinate hand).
1131
+ *
1132
+ * Scopes:
1133
+ * - `viewport` (default) — the visible viewport, COORDINATE-MATCHED to `mouse`.
1134
+ * - `full` — the whole scrollable page (`fullPage: true`), NOT coordinate-matched.
1135
+ * - `element` — clipped to the locator's element (REQUIRED; resolved through the
1136
+ * SAME {@link resolveLocator}, so a `frameLocator(...)` frame widget works even
1137
+ * cross-origin). A missing locator for `element`, or a stray locator on a
1138
+ * non-`element` scope, is a LOUD validation error (mirrors `wait`).
1139
+ *
1140
+ * The PNG is written by Playwright to a path webhands MINTS under the managed
1141
+ * dir (or a caller `out` override VALIDATED to stay under it, else
1142
+ * {@link ScreenshotPathError}). We read the PNG's IHDR for the real pixel
1143
+ * dimensions (so the number is the image's, not an assumed viewport size).
1144
+ */
1145
+ export async function takeScreenshot(
1146
+ page: Page,
1147
+ screenshotsDir: string,
1148
+ options?: ScreenshotOptions,
1149
+ ): Promise<Screenshot> {
1150
+ const scope = options?.scope ?? 'viewport';
1151
+ // LOUD scope/locator validation (mirrors `wait`'s exactly-one-of): `element`
1152
+ // MUST carry a locator; the other scopes must NOT (a stray locator is a
1153
+ // caller mistake, not a silent no-op).
1154
+ if (scope === 'element' && options?.locator === undefined) {
1155
+ throw new Error(
1156
+ 'screenshot --scope element requires --locator <expr> (the element to clip to).',
1157
+ );
1158
+ }
1159
+ if (scope !== 'element' && options?.locator !== undefined) {
1160
+ throw new Error(
1161
+ `screenshot --locator is only valid with --scope element (got scope ${JSON.stringify(
1162
+ scope,
1163
+ )}).`,
1164
+ );
1165
+ }
1166
+
1167
+ const path = await resolveScreenshotPath(screenshotsDir, options?.out);
1168
+ await mkdir(screenshotsDir, {recursive: true});
1169
+
1170
+ let buffer: Buffer;
1171
+ if (scope === 'element') {
1172
+ // Clip to just the element (the captcha widget). Resolve through the ONE
1173
+ // shared resolver so a `frameLocator(...)` hop reaches a frame widget,
1174
+ // including cross-origin (Playwright `frameLocator` crosses; the spike).
1175
+ buffer = await resolveLocator(page, options!.locator!)
1176
+ .first()
1177
+ .screenshot({path, type: 'png'});
1178
+ } else {
1179
+ buffer = await page.screenshot({
1180
+ path,
1181
+ type: 'png',
1182
+ fullPage: scope === 'full',
1183
+ });
1184
+ }
1185
+
1186
+ const {width, height} = pngDimensions(buffer, path);
1187
+ return {path, width, height};
1188
+ }
1189
+
1190
+ /**
1191
+ * The PNG magic + IHDR layout: an 8-byte signature, then the IHDR chunk whose
1192
+ * width/height are big-endian uint32s at byte offsets 16 and 20. Reading them is
1193
+ * how we report the image's REAL pixel dimensions without decoding the whole
1194
+ * PNG or assuming a viewport size.
1195
+ */
1196
+ function pngDimensions(
1197
+ buffer: Buffer,
1198
+ path: string,
1199
+ ): {width: number; height: number} {
1200
+ const PNG_SIGNATURE = '89504e470d0a1a0a';
1201
+ if (
1202
+ buffer.length < 24 ||
1203
+ buffer.subarray(0, 8).toString('hex') !== PNG_SIGNATURE
1204
+ ) {
1205
+ throw new Error(
1206
+ `screenshot: the file written at ${path} is not a valid PNG (no PNG signature).`,
1207
+ );
1208
+ }
1209
+ return {
1210
+ width: buffer.readUInt32BE(16),
1211
+ height: buffer.readUInt32BE(20),
1212
+ };
1213
+ }
1214
+
1215
+ /**
1216
+ * Resolve the PNG output path: a caller `out` override (VALIDATED to stay under
1217
+ * the managed dir) or a freshly MINTED unique path under it. A relative `out` is
1218
+ * resolved against the managed dir; an absolute (or `..`-escaping) `out` that
1219
+ * lands outside it is refused with {@link ScreenshotPathError} — webhands never
1220
+ * writes a screenshot to an arbitrary location.
1221
+ */
1222
+ async function resolveScreenshotPath(
1223
+ screenshotsDir: string,
1224
+ out?: string,
1225
+ ): Promise<string> {
1226
+ if (out === undefined || out === '') {
1227
+ return join(screenshotsDir, mintScreenshotName());
1228
+ }
1229
+ const managedRoot = resolvePath(screenshotsDir);
1230
+ const candidate = isAbsolute(out)
1231
+ ? resolvePath(out)
1232
+ : resolvePath(managedRoot, out);
1233
+ const rel = relative(managedRoot, candidate);
1234
+ // `rel` starting with `..` (or being absolute on a different root) means the
1235
+ // candidate escapes the managed dir.
1236
+ if (rel === '' || rel.startsWith('..') || isAbsolute(rel)) {
1237
+ throw new ScreenshotPathError(out, managedRoot);
1238
+ }
1239
+ return candidate;
1240
+ }
1241
+
1242
+ /**
1243
+ * Mint a unique PNG filename: a timestamp plus random suffix, so concurrent /
1244
+ * rapid shots never collide and the name is sortable by capture time.
1245
+ */
1246
+ function mintScreenshotName(): string {
1247
+ const stamp = new Date().toISOString().replace(/[:.]/g, '-');
1248
+ const rand = Math.random().toString(36).slice(2, 10);
1249
+ return `webhands-${stamp}-${rand}.png`;
1250
+ }
1251
+
476
1252
  /** Map a Playwright cookie to the transport-neutral seam {@link Cookie}. */
477
1253
  function toSeamCookie(c: {
478
1254
  name: string;