@webhands/core 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +69 -6
  2. package/dist/errors.d.ts +112 -1
  3. package/dist/errors.d.ts.map +1 -1
  4. package/dist/errors.js +121 -0
  5. package/dist/errors.js.map +1 -1
  6. package/dist/hand-host.d.ts +198 -5
  7. package/dist/hand-host.d.ts.map +1 -1
  8. package/dist/hand-host.js +664 -21
  9. package/dist/hand-host.js.map +1 -1
  10. package/dist/index.d.ts +5 -4
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +4 -3
  13. package/dist/index.js.map +1 -1
  14. package/dist/playwright-attach-transport.d.ts +8 -1
  15. package/dist/playwright-attach-transport.d.ts.map +1 -1
  16. package/dist/playwright-attach-transport.js +19 -4
  17. package/dist/playwright-attach-transport.js.map +1 -1
  18. package/dist/playwright-launch-transport.d.ts +23 -0
  19. package/dist/playwright-launch-transport.d.ts.map +1 -1
  20. package/dist/playwright-launch-transport.js +40 -6
  21. package/dist/playwright-launch-transport.js.map +1 -1
  22. package/dist/profile-location.d.ts +19 -0
  23. package/dist/profile-location.d.ts.map +1 -1
  24. package/dist/profile-location.js +21 -0
  25. package/dist/profile-location.js.map +1 -1
  26. package/dist/seam.d.ts +501 -7
  27. package/dist/seam.d.ts.map +1 -1
  28. package/dist/seam.js +31 -0
  29. package/dist/seam.js.map +1 -1
  30. package/dist/session-rpc.d.ts +63 -1
  31. package/dist/session-rpc.d.ts.map +1 -1
  32. package/dist/session-rpc.js +174 -11
  33. package/dist/session-rpc.js.map +1 -1
  34. package/dist/socks-proxy.d.ts +61 -0
  35. package/dist/socks-proxy.d.ts.map +1 -0
  36. package/dist/socks-proxy.js +84 -0
  37. package/dist/socks-proxy.js.map +1 -0
  38. package/dist/stub-transport.d.ts.map +1 -1
  39. package/dist/stub-transport.js +74 -6
  40. package/dist/stub-transport.js.map +1 -1
  41. package/dist/test-fixtures/fixture-pages.d.ts.map +1 -1
  42. package/dist/test-fixtures/fixture-pages.js +994 -0
  43. package/dist/test-fixtures/fixture-pages.js.map +1 -1
  44. package/dist/test-fixtures/fixture-server.d.ts.map +1 -1
  45. package/dist/test-fixtures/fixture-server.js +33 -3
  46. package/dist/test-fixtures/fixture-server.js.map +1 -1
  47. package/package.json +1 -1
  48. package/src/errors.ts +164 -1
  49. package/src/hand-host.ts +797 -21
  50. package/src/index.ts +27 -1
  51. package/src/playwright-attach-transport.ts +25 -3
  52. package/src/playwright-launch-transport.ts +63 -4
  53. package/src/profile-location.ts +25 -0
  54. package/src/seam.ts +535 -7
  55. package/src/session-rpc.ts +276 -14
  56. package/src/socks-proxy.ts +127 -0
  57. package/src/stub-transport.ts +83 -6
  58. package/src/test-fixtures/fixture-pages.ts +1010 -0
  59. package/src/test-fixtures/fixture-server.ts +32 -3
package/dist/hand-host.js CHANGED
@@ -1,4 +1,8 @@
1
- import { errors as pwErrors } from 'playwright';
1
+ import { errors as pwErrors, } from 'playwright';
2
+ import { validateSnapshotOptions } from './seam.js';
3
+ import { CrossOriginFrameError, ScreenshotPathError, StaleRefError, } from './errors.js';
4
+ import { mkdir } from 'node:fs/promises';
5
+ import { isAbsolute, join, relative, resolve as resolvePath } from 'node:path';
2
6
  /**
3
7
  * Compose a set of hands over one live {@link HandContext} into a single
4
8
  * {@link WebHandsPage}. This is the host primitive both Playwright transports call to
@@ -54,6 +58,18 @@ const REQUIRED_VERBS = [
54
58
  'wait',
55
59
  'cookies',
56
60
  'setCookies',
61
+ 'query',
62
+ 'count',
63
+ 'exists',
64
+ 'isVisible',
65
+ 'getAttribute',
66
+ 'press',
67
+ 'hover',
68
+ 'select',
69
+ 'scroll',
70
+ 'drag',
71
+ 'mouse',
72
+ 'screenshot',
57
73
  ];
58
74
  /**
59
75
  * Assert the composed verbs cover the whole seam {@link WebHandsPage}, then return it
@@ -79,6 +95,16 @@ function assertCompletePage(verbs) {
79
95
  * (animations, late layout) before deciding to dispatch.
80
96
  */
81
97
  const NORMAL_CLICK_TIMEOUT_MS = 1_000;
98
+ /**
99
+ * How long {@link resolveSameOriginFrame} waits for the `frame` selector to
100
+ * resolve to an iframe element before treating it as "no such frame". Short on
101
+ * purpose: a frame-scoped `eval` against a bad selector should fail LOUD fast,
102
+ * not burn Playwright's 30s default auto-wait (the same reasoning as
103
+ * {@link NORMAL_CLICK_TIMEOUT_MS}). An iframe present in the markup resolves
104
+ * immediately and never approaches this bound; it is the latency cost paid ONLY
105
+ * on the no-such-frame path.
106
+ */
107
+ const FRAME_RESOLVE_TIMEOUT_MS = 1_000;
82
108
  // ---------------------------------------------------------------------------
83
109
  // Built-in hands: webhands' OWN eight verbs, each a hand over the host.
84
110
  //
@@ -110,6 +136,10 @@ export const snapshotHand = ({ pwPage, ensureOpen }) => ({
110
136
  verbs: {
111
137
  async snapshot(options) {
112
138
  ensureOpen();
139
+ // Reject an unknown/misshapen option LOUDLY (e.g. `{view: 'full'}`)
140
+ // rather than silently returning the wrong view. Single source of
141
+ // truth in the seam, shared with the RPC server dispatch.
142
+ validateSnapshotOptions(options);
113
143
  const url = pwPage.url();
114
144
  if (options?.full === true) {
115
145
  // `--full`: the raw DOM. `documentElement.outerHTML` is the serialized
@@ -130,15 +160,29 @@ export const snapshotHand = ({ pwPage, ensureOpen }) => ({
130
160
  },
131
161
  },
132
162
  });
133
- /** The `click` + `type` verbs: page interaction by raw locator (ADR-0004). */
163
+ /**
164
+ * The `click` + `type` verbs: page interaction by raw locator (ADR-0004).
165
+ *
166
+ * With `{byRef: true}` the target is a durable `query` {@link QueryRow.ref}: it
167
+ * is resolved through the SAME {@link resolveLocator} but FIRST asserted to match
168
+ * EXACTLY ONE element ({@link assertRefResolvesToOne}), so a stale (zero) or
169
+ * ambiguous (many) ref fails LOUD with a {@link StaleRefError} instead of
170
+ * silently acting on the wrong element — the safety the durable ref exists for.
171
+ */
134
172
  export const interactionHand = ({ pwPage, ensureOpen }) => ({
135
173
  verbs: {
136
- async click(t) {
174
+ async click(t, options) {
137
175
  ensureOpen();
176
+ if (options?.byRef === true) {
177
+ await assertRefResolvesToOne(pwPage, t, 'click');
178
+ }
138
179
  await clickLocator(pwPage, t);
139
180
  },
140
- async type(t, text) {
181
+ async type(t, text, options) {
141
182
  ensureOpen();
183
+ if (options?.byRef === true) {
184
+ await assertRefResolvesToOne(pwPage, t, 'type');
185
+ }
142
186
  await resolveLocator(pwPage, t).fill(text);
143
187
  },
144
188
  },
@@ -146,22 +190,9 @@ export const interactionHand = ({ pwPage, ensureOpen }) => ({
146
190
  /** The `eval` escape hatch: run a JS EXPRESSION in the page, return by value. */
147
191
  export const evalHand = ({ pwPage, ensureOpen }) => ({
148
192
  verbs: {
149
- async eval(expression) {
193
+ async eval(expression, options) {
150
194
  ensureOpen();
151
- // The `eval` escape hatch (PRD story 9): run the raw JS EXPRESSION in the
152
- // page and return its serializable result. Playwright's `evaluate`
153
- // already IS the seam's serialization contract (see {@link WebHandsPage.eval}):
154
- // it passes a string as an expression, awaits a returned Promise, and
155
- // structurally clones the result out of the page by VALUE. That clone is
156
- // richer than JSON: it preserves NaN/Infinity/BigInt and circular
157
- // structures (back-refs become a `[Circular]` marker), yields `undefined`
158
- // for functions/symbols, and returns an opaque preview string for a live
159
- // host object (a DOM node never crosses the process boundary). A page-side
160
- // throw rejects. We pass it straight through rather than re-encode it:
161
- // wrapping the value in a transport-specific envelope would invent a
162
- // dialect the seam deliberately avoids. The thrown error is a plain
163
- // `Error`, so no Playwright/CDP type leaks across the seam (ADR-0003).
164
- return pwPage.evaluate(expression);
195
+ return evalExpression(pwPage, expression, options);
165
196
  },
166
197
  },
167
198
  });
@@ -193,7 +224,141 @@ export const cookiesHand = ({ context, ensureOpen }) => ({
193
224
  },
194
225
  });
195
226
  /**
196
- * webhands' eight built-in verbs as built-in hands, in composition order. Both
227
+ * The Tier-1 read verbs (prd `broaden-agent-verb-surface`, R2): the `query`
228
+ * extraction verb plus the thin state shorthands `count` / `exists` /
229
+ * `isVisible` / `getAttribute`. All five address element(s) by the SAME raw
230
+ * Playwright locator expression the other verbs use, resolved through the ONE
231
+ * existing {@link resolveLocator} (so a `frameLocator(...)` same-origin frame
232
+ * hop in the string Just Works, and there is no parallel addressing scheme —
233
+ * R1). They are pure READS: no page mutation.
234
+ *
235
+ * `query` returns one row per match carrying EXACTLY the requested fields (R2);
236
+ * the state verbs are computed over the same machinery (see {@link queryRows}
237
+ * and the per-verb bodies). Read values cross by structured clone, the same
238
+ * contract as `eval` (ADR-0003).
239
+ */
240
+ export const queryHand = ({ pwPage, ensureOpen }) => ({
241
+ verbs: {
242
+ async query(target, options) {
243
+ ensureOpen();
244
+ return queryRows(pwPage, target, options);
245
+ },
246
+ async count(target) {
247
+ ensureOpen();
248
+ return resolveLocator(pwPage, target).count();
249
+ },
250
+ async exists(target) {
251
+ ensureOpen();
252
+ return (await resolveLocator(pwPage, target).count()) > 0;
253
+ },
254
+ async isVisible(target) {
255
+ ensureOpen();
256
+ // The FIRST match's actionability-grade visibility. `.first().isVisible()`
257
+ // returns `false` for an ABSENT element too (no match cannot be visible),
258
+ // which is the loud, correct answer for the absent case.
259
+ return resolveLocator(pwPage, target).first().isVisible();
260
+ },
261
+ async getAttribute(target, name) {
262
+ ensureOpen();
263
+ // The FIRST match's DOM attribute. `.first().getAttribute()` resolves to
264
+ // `null` for an absent attribute AND surfaces a clean miss for an absent
265
+ // element (it would otherwise time out); we treat "no element" as `null`
266
+ // (there is no attribute value to read) rather than hanging.
267
+ if ((await resolveLocator(pwPage, target).count()) === 0) {
268
+ return null;
269
+ }
270
+ return resolveLocator(pwPage, target).first().getAttribute(name);
271
+ },
272
+ },
273
+ });
274
+ /**
275
+ * The Tier-2 rich INPUT verbs (prd `broaden-agent-verb-surface`, stories 8-12):
276
+ * `press` / `hover` / `select` / `scroll` / `drag`. These lift page-level
277
+ * Playwright actions a hand already has on `pwPage` (`keyboard.press`,
278
+ * `hover`, `selectOption`, `mouse.wheel`/`scrollIntoViewIfNeeded`, `dragTo`) up
279
+ * to the agent verb seam so a seam-only agent can drive a browser game or a
280
+ * richer form, not just `click`/`type`.
281
+ *
282
+ * Every locator-addressing form resolves through the SAME single
283
+ * {@link resolveLocator} the other verbs use (so a same-origin `frameLocator(...)`
284
+ * hop in the string Just Works — no parallel addressing scheme, R1). Keys are
285
+ * strings, offsets are numbers, locators are strings: nothing Playwright-shaped
286
+ * crosses the seam (ADR-0003).
287
+ */
288
+ export const inputHand = ({ pwPage, ensureOpen }) => ({
289
+ verbs: {
290
+ async press(key, target) {
291
+ ensureOpen();
292
+ if (target !== undefined) {
293
+ // At a locator: Playwright focuses the element first, then presses
294
+ // (the `locator.press` semantics).
295
+ await resolveLocator(pwPage, target).press(key);
296
+ return;
297
+ }
298
+ // No locator: the page's currently focused element receives the key.
299
+ await pwPage.keyboard.press(key);
300
+ },
301
+ async hover(target) {
302
+ ensureOpen();
303
+ await resolveLocator(pwPage, target).hover();
304
+ },
305
+ async select(target, choice) {
306
+ ensureOpen();
307
+ // EXACTLY ONE of value/label (the seam type enforces it); map to
308
+ // Playwright's `selectOption({value})` / `selectOption({label})`.
309
+ const option = 'value' in choice ? { value: choice.value } : { label: choice.label };
310
+ await resolveLocator(pwPage, target).selectOption(option);
311
+ },
312
+ async scroll(target) {
313
+ ensureOpen();
314
+ if ('to' in target) {
315
+ // Reach an off-viewport element by scrolling it into view.
316
+ await resolveLocator(pwPage, target.to).scrollIntoViewIfNeeded();
317
+ return;
318
+ }
319
+ // Scroll the page by a pixel delta (the wheel convention: positive dy
320
+ // scrolls DOWN).
321
+ await pwPage.mouse.wheel(target.by.dx, target.by.dy);
322
+ },
323
+ async drag(source, target) {
324
+ ensureOpen();
325
+ await resolveLocator(pwPage, source).dragTo(resolveLocator(pwPage, target));
326
+ },
327
+ },
328
+ });
329
+ /**
330
+ * The Tier-4 COORDINATE + SCREENSHOT hand (prd `broaden-agent-verb-surface`,
331
+ * R3; stories 17-19): the `mouse` coordinate-input verb and the `screenshot`
332
+ * path-returning verb, the look-then-click pair that lets a seam-only agent
333
+ * handle the VISION/TILE captcha family and any visual task.
334
+ *
335
+ * The seam stays ADR-0003-clean (as amended by the Tier-4 ADR) by passing ONLY
336
+ * numbers + a string enum (`mouse`) and returning ONLY a file PATH + dimensions
337
+ * (`screenshot`): NO image bytes and NO Playwright/CDP type cross the seam.
338
+ *
339
+ * - `mouse` drives Playwright `page.mouse` at VIEWPORT CSS-pixels (NOT OS-level
340
+ * input). A VIEWPORT screenshot's pixels map directly to these coordinates
341
+ * (the look-then-click contract); a FULL-PAGE shot does not.
342
+ * - `screenshot` MINTS a PNG under the managed {@link HandContext.screenshotsDir}
343
+ * and returns its path. The `element` scope clips to a locator (resolved
344
+ * through the SAME {@link resolveLocator}, so a cross-origin `frameLocator(...)`
345
+ * widget shot Just Works). A caller `out` override is validated to stay under
346
+ * the managed dir ({@link ScreenshotPathError}).
347
+ */
348
+ export const coordinateHand = ({ pwPage, ensureOpen, screenshotsDir }) => ({
349
+ verbs: {
350
+ async mouse(input) {
351
+ ensureOpen();
352
+ await doMouse(pwPage, input);
353
+ },
354
+ async screenshot(options) {
355
+ ensureOpen();
356
+ return takeScreenshot(pwPage, screenshotsDir, options);
357
+ },
358
+ },
359
+ });
360
+ /**
361
+ * webhands' built-in verbs as built-in hands, in composition order. Both
197
362
  * Playwright transports compose THIS exact set, so the verb surface is
198
363
  * identical across launch and attach (the only legitimate difference is the
199
364
  * per-transport SESSION LIFECYCLE, which is not a hand's concern).
@@ -205,6 +370,9 @@ export const BUILT_IN_HANDS = [
205
370
  evalHand,
206
371
  waitHand,
207
372
  cookiesHand,
373
+ queryHand,
374
+ inputHand,
375
+ coordinateHand,
208
376
  ];
209
377
  /**
210
378
  * Compose webhands' built-in hands over a live context into the seam's
@@ -272,6 +440,122 @@ export async function waitFor(page, condition) {
272
440
  return;
273
441
  }
274
442
  }
443
+ /**
444
+ * Run the `eval` verb against a Playwright page (PRD story 9; frame scope from
445
+ * prd `broaden-agent-verb-surface`, Tier-3), shared by both Playwright
446
+ * transports (via the built-in eval hand) so the verb behaves identically (no
447
+ * parallel second implementation).
448
+ *
449
+ * With no `frame`, this is the top-document escape hatch: Playwright's
450
+ * `evaluate` IS the seam's serialization contract (see {@link WebHandsPage.eval}):
451
+ * it passes a string as an expression, awaits a returned Promise, and
452
+ * structurally clones the result out of the page by VALUE. That clone is richer
453
+ * than JSON: it preserves NaN/Infinity/BigInt and circular structures (back-refs
454
+ * become a `[Circular]` marker), yields `undefined` for functions/symbols, and
455
+ * returns an opaque preview string for a live host object (a DOM node never
456
+ * crosses the process boundary). A page-side throw rejects. We pass it straight
457
+ * through rather than re-encode it: wrapping the value in a transport-specific
458
+ * envelope would invent a dialect the seam deliberately avoids. The thrown error
459
+ * is a plain `Error`, so no Playwright/CDP type leaks across the seam (ADR-0003).
460
+ *
461
+ * With a `frame` selector, the SAME structured-clone contract holds, but the
462
+ * expression runs in the named SAME-ORIGIN child frame (resolved through the
463
+ * single {@link resolveSameOriginFrame}, which reuses the same
464
+ * {@link resolveLocator} the locator-taking verbs use). A cross-origin frame
465
+ * REJECTS with a typed {@link CrossOriginFrameError} (see that resolver).
466
+ */
467
+ export async function evalExpression(page, expression, options) {
468
+ if (options?.frame === undefined) {
469
+ return page.evaluate(expression);
470
+ }
471
+ const frame = await resolveSameOriginFrame(page, options.frame);
472
+ // `frame.evaluate` honours the SAME structured-clone contract as
473
+ // `page.evaluate` (it is the same Playwright serialization), so the
474
+ // frame-scoped result crosses the seam by value exactly as the top-document
475
+ // `eval` does.
476
+ return frame.evaluate(expression);
477
+ }
478
+ /**
479
+ * Resolve a `frame` SELECTOR string to a live, SAME-ORIGIN Playwright
480
+ * {@link Frame} for a frame-scoped `eval` (prd `broaden-agent-verb-surface`,
481
+ * Tier-3, R1). This is the SINGLE frame resolver: it reuses the very same
482
+ * {@link resolveLocator} the locator-taking verbs use (a `frameLocator(...)`
483
+ * over the selector), then walks the iframe element handle to its content
484
+ * frame — there is no parallel frame-addressing scheme.
485
+ *
486
+ * SAME-ORIGIN ONLY, enforced LOUD. Playwright will happily `evaluate` inside a
487
+ * CROSS-ORIGIN OOPIF (it attaches out-of-band), so a cross-origin frame would
488
+ * NOT throw on its own — it would silently succeed, which is exactly the
489
+ * contract violation this verb forbids (page-world JS cannot cross a security
490
+ * boundary; the seam is same-origin only). So we DETECT cross-origin by
491
+ * comparing the frame's origin to the page's main-frame origin and reject with a
492
+ * typed {@link CrossOriginFrameError} when they differ, never returning a frame
493
+ * the page world could not legitimately reach.
494
+ *
495
+ * Failure modes are loud/typed: a selector that matches NO iframe element
496
+ * rejects (the locator resolves nothing); a matched frame with no content frame
497
+ * rejects; a cross-origin frame rejects with {@link CrossOriginFrameError}.
498
+ */
499
+ export async function resolveSameOriginFrame(page, selector) {
500
+ // Reuse the ONE resolver: treat the selector as the argument to
501
+ // `frameLocator(...)`, exactly how a locator-taking verb would frame-hop. We
502
+ // build the expression with a JSON-encoded selector so an arbitrary CSS
503
+ // selector cannot break out of the call.
504
+ const frameLocator = resolveLocator(page, `p.frameLocator(${JSON.stringify(selector)})`);
505
+ // Bound the resolve: a selector that matches NO iframe must fail LOUD quickly
506
+ // rather than burn Playwright's 30s default auto-wait (mirrors the short
507
+ // bound `clickLocator` uses for a non-actionable element). `elementHandle`
508
+ // throws a TimeoutError on no match within the bound; we map it to a clear
509
+ // "no iframe matched" error.
510
+ let handle;
511
+ try {
512
+ handle = await frameLocator
513
+ .owner()
514
+ .elementHandle({ timeout: FRAME_RESOLVE_TIMEOUT_MS });
515
+ }
516
+ catch (cause) {
517
+ if (cause instanceof pwErrors.TimeoutError) {
518
+ throw new Error(`eval --frame: no iframe element matched selector ${JSON.stringify(selector)}.`);
519
+ }
520
+ throw cause;
521
+ }
522
+ if (handle === null) {
523
+ throw new Error(`eval --frame: no iframe element matched selector ${JSON.stringify(selector)}.`);
524
+ }
525
+ try {
526
+ const frame = await handle.contentFrame();
527
+ if (frame === null) {
528
+ throw new Error(`eval --frame: the element matched by selector ${JSON.stringify(selector)} is not a frame.`);
529
+ }
530
+ const pageOrigin = originOf(page.mainFrame().url());
531
+ const frameOrigin = originOf(frame.url());
532
+ if (frameOrigin === null || frameOrigin !== pageOrigin) {
533
+ throw new CrossOriginFrameError(selector, {
534
+ frameOrigin: frameOrigin ?? undefined,
535
+ pageOrigin: pageOrigin ?? undefined,
536
+ });
537
+ }
538
+ return frame;
539
+ }
540
+ finally {
541
+ await handle.dispose();
542
+ }
543
+ }
544
+ /**
545
+ * The origin (`scheme://host:port`) of a frame/page URL, or `null` when the URL
546
+ * has no parseable origin (e.g. `about:blank`). Used to compare a child frame's
547
+ * origin against the page's, the same-origin check the frame-scoped `eval`
548
+ * enforces. An unparseable / opaque origin reads as NOT same-origin (loud over
549
+ * silent): the frame is not provably reachable, so we treat it as cross-origin.
550
+ */
551
+ function originOf(url) {
552
+ try {
553
+ return new URL(url).origin;
554
+ }
555
+ catch {
556
+ return null;
557
+ }
558
+ }
275
559
  /**
276
560
  * Resolve a raw Playwright locator EXPRESSION (ADR-0004) against the page. The
277
561
  * verb surface passes locator expressions like `getByRole('button', …)`; we
@@ -307,11 +591,24 @@ export function resolveLocator(page, expression) {
307
591
  * bad locator) surfaces its timeout quickly instead of hanging the dispatch on
308
592
  * Playwright's 30s default — the dispatch escape is for elements that EXIST but
309
593
  * are not actionable (hidden custom inputs), not for absent ones.
594
+ *
595
+ * The happy-path click passes `noWaitAfter: true` on purpose. Playwright's
596
+ * `Locator.click()` normally clicks AND THEN auto-waits for any navigation the
597
+ * click scheduled to finish, and that post-click wait counts against the same
598
+ * timeout. A real submit button whose navigation takes longer than
599
+ * {@link NORMAL_CLICK_TIMEOUT_MS} would therefore have its (already-performed)
600
+ * click reported as a `TimeoutError` and be wrongly routed to the dispatch
601
+ * escape, which then re-clicks a page that is already navigating away. We only
602
+ * want the short budget to measure ACTIONABILITY (can we click it?), not how
603
+ * long the resulting navigation takes — `noWaitAfter` returns as soon as the
604
+ * click is performed, so a slow-but-successful submit no longer trips the
605
+ * fallback. A genuinely non-actionable hidden input still cannot be clicked
606
+ * within the budget and still falls through to `dispatchEvent` as before.
310
607
  */
311
608
  export async function clickLocator(page, expression) {
312
609
  const target = resolveLocator(page, expression);
313
610
  try {
314
- await target.click({ timeout: NORMAL_CLICK_TIMEOUT_MS });
611
+ await target.click({ timeout: NORMAL_CLICK_TIMEOUT_MS, noWaitAfter: true });
315
612
  }
316
613
  catch (cause) {
317
614
  if (!(cause instanceof pwErrors.TimeoutError)) {
@@ -322,6 +619,352 @@ export async function clickLocator(page, expression) {
322
619
  await target.dispatchEvent('click', { timeout: NORMAL_CLICK_TIMEOUT_MS });
323
620
  }
324
621
  }
622
+ /**
623
+ * Run the `query` verb (prd `broaden-agent-verb-surface`, R2) against a
624
+ * Playwright page: resolve the locator EXPRESSION through the SINGLE existing
625
+ * {@link resolveLocator} (so a same-origin `frameLocator(...)` hop in the string
626
+ * Just Works), then return ONE ROW PER MATCH carrying EXACTLY the requested
627
+ * fields and nothing else.
628
+ *
629
+ * The split is LOUD and never auto-detected:
630
+ * - `attrs[name]` is the element's `getAttribute(name)` (the markup value;
631
+ * `null` if absent).
632
+ * - `props[name]` is the live `el[name]` JS property (runtime state), read in
633
+ * one page-world `evaluate` over the element so the value is structurally
634
+ * cloned out by VALUE — the SAME serialization contract `eval` documents
635
+ * (ADR-0003: no Playwright/CDP type leak; richer than JSON).
636
+ * - `pw.visible` / `pw.bbox` are the closed Playwright-locator extras
637
+ * (`isVisible()` / `boundingBox()`), the only facts not expressible as an
638
+ * attribute or a property. `bbox` is in VIEWPORT CSS-pixels.
639
+ *
640
+ * `limit` bounds the row count. With no fields requested every row is an empty
641
+ * object (the caller asked for nothing; R2). Each row is built independently so
642
+ * a per-element read failure is the page's own throw, surfaced faithfully like
643
+ * `eval` (no silent swallow).
644
+ */
645
+ export async function queryRows(page, expression, options) {
646
+ const attrs = options?.attrs ?? [];
647
+ const props = options?.props ?? [];
648
+ const pw = options?.pw ?? [];
649
+ const withRefs = options?.refs === true;
650
+ const base = resolveLocator(page, expression);
651
+ const total = await base.count();
652
+ const limit = options?.limit !== undefined ? Math.max(0, options.limit) : total;
653
+ const rowCount = Math.min(total, limit);
654
+ // Refs are single-`query`-scoped: each `refs: true` query SWEEPS the PRIOR
655
+ // query's minted attributes FIRST (page-wide), so a ref can never resolve a
656
+ // stale element minted two queries ago. Reused stable attrs (ladder step 1)
657
+ // are the framework's own and are untouched. Done once, before iterating.
658
+ if (withRefs) {
659
+ await sweepPriorMints(page);
660
+ }
661
+ const rows = [];
662
+ for (let i = 0; i < rowCount; i++) {
663
+ rows.push(await readRow(base.nth(i), attrs, props, pw, withRefs));
664
+ }
665
+ return rows;
666
+ }
667
+ /**
668
+ * The namespaced attribute the MINT fallback (ladder step 2) stamps on an
669
+ * anonymous element. A `query({refs: true})` sweeps every node carrying it
670
+ * before re-minting, so mints stay single-query-scoped.
671
+ */
672
+ const REF_MINT_ATTR = 'data-webhands-ref';
673
+ /**
674
+ * Remove EVERY {@link REF_MINT_ATTR} attribute currently in the document, the
675
+ * single-`query`-scope sweep run at the start of each `refs: true` query. This
676
+ * touches ONLY webhands' own minted attribute — never a framework's stable attrs
677
+ * (ladder step 1 reuses those, it does not stamp them), so a sweep cannot break
678
+ * a reused-attribute ref.
679
+ */
680
+ async function sweepPriorMints(page) {
681
+ await page.evaluate((attr) => {
682
+ document.querySelectorAll('[' + attr + ']').forEach((el) => {
683
+ el.removeAttribute(attr);
684
+ });
685
+ }, REF_MINT_ATTR);
686
+ }
687
+ /**
688
+ * Compute the durable {@link QueryRow.ref} for ONE matched element by the R4
689
+ * PREFERENCE LADDER, in page-world (the finding
690
+ * `query-ref-mint-mechanism-attribute-beats-weakmap` settled the mechanism: a
691
+ * `data-webhands-ref` ATTRIBUTE, not a WeakMap).
692
+ *
693
+ * Returns the ref as a LOCATOR EXPRESSION the ONE existing {@link resolveLocator}
694
+ * resolves — `p.locator('<css>')` — NOT a bare CSS string, so `click`/`type`
695
+ * feed it back through the exact same resolver path as any other locator (no new
696
+ * addressing engine, R1). The human-legible CSS the ladder picks rides INSIDE
697
+ * that expression (`p.locator('#buy-charlie')`).
698
+ *
699
+ * Ladder:
700
+ * 1. REUSE the element's own stable, VERIFIED-UNIQUE attribute, in priority
701
+ * `id` > `data-testid`/`data-test`/`data-id` > `name` > a link's `href` >
702
+ * a unique `aria-label`. The CSS IS the element's real address: durable
703
+ * across reconciliation (the framework keeps its OWN attrs), legible, ZERO
704
+ * DOM mutation. Uniqueness is VERIFIED with
705
+ * `querySelectorAll(...).length === 1`; a duplicate (e.g. two equal ids)
706
+ * FALLS THROUGH to the next rung.
707
+ * 2. MINT a namespaced {@link REF_MINT_ATTR} as the fallback for an anonymous
708
+ * element with no stable unique address, addressed by
709
+ * `[data-webhands-ref="<id>"]`.
710
+ *
711
+ * The minted-id counter lives on `window` so ids are unique within the page for
712
+ * the life of the document (the sweep clears stale ATTRIBUTES, not the counter,
713
+ * so a re-mint never reuses an id a still-resolvable ref might hold).
714
+ */
715
+ async function computeRef(cell) {
716
+ const css = await cell.evaluate((el, attr) => {
717
+ const cssEscape = (v) => typeof window.CSS
718
+ ?.escape === 'function'
719
+ ? window.CSS.escape(v)
720
+ : v.replace(/[^a-zA-Z0-9_-]/g, (c) => '\\' + c);
721
+ const uniq = (selector) => document.querySelectorAll(selector).length === 1;
722
+ // Ladder step 1: reuse a stable, VERIFIED-UNIQUE existing attribute.
723
+ const id = el.getAttribute('id');
724
+ if (id !== null && id !== '') {
725
+ const sel = '#' + cssEscape(id);
726
+ if (uniq(sel))
727
+ return sel;
728
+ }
729
+ for (const name of ['data-testid', 'data-test', 'data-id', 'name']) {
730
+ const value = el.getAttribute(name);
731
+ if (value !== null && value !== '') {
732
+ const sel = '[' + name + '="' + value.replace(/"/g, '\\"') + '"]';
733
+ if (uniq(sel))
734
+ return sel;
735
+ }
736
+ }
737
+ // A link's href (only meaningful on an anchor).
738
+ if (el.tagName === 'A') {
739
+ const href = el.getAttribute('href');
740
+ if (href !== null && href !== '') {
741
+ const sel = 'a[href="' + href.replace(/"/g, '\\"') + '"]';
742
+ if (uniq(sel))
743
+ return sel;
744
+ }
745
+ }
746
+ // A unique aria-label.
747
+ const aria = el.getAttribute('aria-label');
748
+ if (aria !== null && aria !== '') {
749
+ const sel = '[aria-label="' + aria.replace(/"/g, '\\"') + '"]';
750
+ if (uniq(sel))
751
+ return sel;
752
+ }
753
+ // Ladder step 2: MINT the namespaced attribute (the fallback).
754
+ const w = window;
755
+ w.__webhandsRefSeq = (w.__webhandsRefSeq ?? 0) + 1;
756
+ const mintedId = 'wr' + w.__webhandsRefSeq;
757
+ el.setAttribute(attr, mintedId);
758
+ return '[' + attr + '="' + mintedId + '"]';
759
+ }, REF_MINT_ATTR);
760
+ // Wrap the chosen CSS in a `p.locator(...)` expression so the ref resolves
761
+ // through the SAME resolver as every other locator. JSON-encode the CSS so a
762
+ // quote/backslash in a reused attribute value cannot break out of the call.
763
+ return `p.locator(${JSON.stringify(css)})`;
764
+ }
765
+ /**
766
+ * Resolve a durable `query` `ref` and assert it matches EXACTLY ONE element,
767
+ * else throw a typed {@link StaleRefError} (resolve-to-ZERO = removed/replaced;
768
+ * resolve-to-MANY = a cloned subtree / non-unique attribute). The loud-stale
769
+ * guard `click`/`type` run BEFORE acting when `{byRef: true}`, so a stale or
770
+ * ambiguous ref NEVER silently acts on the wrong element (the safety a ref has
771
+ * over a positional `.nth(i)`). Resolved through the SAME {@link resolveLocator}
772
+ * the verbs already use — no parallel addressing path.
773
+ */
774
+ export async function assertRefResolvesToOne(page, ref, verb) {
775
+ const matched = await resolveLocator(page, ref).count();
776
+ if (matched !== 1) {
777
+ throw new StaleRefError(ref, matched, verb);
778
+ }
779
+ }
780
+ /**
781
+ * Read ONE matched element into a {@link QueryRow}, carrying only the requested
782
+ * families. `attrs` and `props` are read in a SINGLE page-world `evaluate` over
783
+ * the element handle (so a row is one round-trip and `props` values are cloned
784
+ * by value); the `pw` extras use the locator API (`isVisible`/`boundingBox`).
785
+ */
786
+ async function readRow(cell, attrs, props, pw, withRef) {
787
+ const row = {};
788
+ if (attrs.length > 0 || props.length > 0) {
789
+ // One page-world read of the live element: `getAttribute` for the markup
790
+ // attrs, `el[name]` for the live JS props. The returned object is
791
+ // structurally cloned out of the page by Playwright (the `eval` contract),
792
+ // so a prop value crosses the seam by VALUE with no type leak.
793
+ const read = await cell.evaluate((el, { attrNames, propNames, }) => {
794
+ const out = {};
795
+ if (attrNames.length > 0) {
796
+ const a = {};
797
+ for (const name of attrNames) {
798
+ a[name] = el.getAttribute(name);
799
+ }
800
+ out.attrs = a;
801
+ }
802
+ if (propNames.length > 0) {
803
+ const p = {};
804
+ for (const name of propNames) {
805
+ p[name] = el[name];
806
+ }
807
+ out.props = p;
808
+ }
809
+ return out;
810
+ }, { attrNames: [...attrs], propNames: [...props] });
811
+ if (read.attrs !== undefined) {
812
+ row.attrs = read.attrs;
813
+ }
814
+ if (read.props !== undefined) {
815
+ row.props = read.props;
816
+ }
817
+ }
818
+ if (pw.length > 0) {
819
+ const extras = {};
820
+ if (pw.includes('visible')) {
821
+ extras.visible = await cell.isVisible();
822
+ }
823
+ if (pw.includes('bbox')) {
824
+ extras.bbox = await cell.boundingBox();
825
+ }
826
+ row.pw = extras;
827
+ }
828
+ // The durable handle (opt-in). Computed by the R4 ladder in page-world:
829
+ // reuse a stable unique attribute, else mint `data-webhands-ref`. Done after
830
+ // the reads so a mint can never perturb an attr/prop read of this row.
831
+ if (withRef) {
832
+ row.ref = await computeRef(cell);
833
+ }
834
+ return row;
835
+ }
836
+ /**
837
+ * Run the `mouse` verb (prd `broaden-agent-verb-surface`, Tier-4, R3) against a
838
+ * Playwright page: drive `page.mouse` at the given VIEWPORT CSS-pixel
839
+ * coordinate. Viewport-relative, NOT OS-level input — the same coordinate frame
840
+ * a VIEWPORT `screenshot` is captured in, so a pixel an agent saw maps directly
841
+ * to the click. Shared by both transports (via the coordinate hand) so the verb
842
+ * behaves identically. Plain numbers + a string enum only (ADR-0003 as amended).
843
+ */
844
+ export async function doMouse(page, input) {
845
+ const button = input.button ?? 'left';
846
+ switch (input.action) {
847
+ case 'move':
848
+ // A bare move takes no button (it is a pointer move, not a press).
849
+ await page.mouse.move(input.x, input.y);
850
+ return;
851
+ case 'click':
852
+ await page.mouse.click(input.x, input.y, { button });
853
+ return;
854
+ case 'down':
855
+ // down/up press/release at the CURRENT pointer position, so move there
856
+ // first to honour the (x, y) the caller named (the two halves of a manual
857
+ // drag both land at the intended spot).
858
+ await page.mouse.move(input.x, input.y);
859
+ await page.mouse.down({ button });
860
+ return;
861
+ case 'up':
862
+ await page.mouse.move(input.x, input.y);
863
+ await page.mouse.up({ button });
864
+ return;
865
+ }
866
+ }
867
+ /**
868
+ * Run the `screenshot` verb (prd `broaden-agent-verb-surface`, Tier-4, R3;
869
+ * stories 17-19) against a Playwright page: capture the requested SCOPE to a PNG
870
+ * FILE under the managed `screenshotsDir` and return `{path, width, height}` —
871
+ * NEVER image bytes (the load-bearing ADR-0003-as-amended choice). Shared by
872
+ * both transports (via the coordinate hand).
873
+ *
874
+ * Scopes:
875
+ * - `viewport` (default) — the visible viewport, COORDINATE-MATCHED to `mouse`.
876
+ * - `full` — the whole scrollable page (`fullPage: true`), NOT coordinate-matched.
877
+ * - `element` — clipped to the locator's element (REQUIRED; resolved through the
878
+ * SAME {@link resolveLocator}, so a `frameLocator(...)` frame widget works even
879
+ * cross-origin). A missing locator for `element`, or a stray locator on a
880
+ * non-`element` scope, is a LOUD validation error (mirrors `wait`).
881
+ *
882
+ * The PNG is written by Playwright to a path webhands MINTS under the managed
883
+ * dir (or a caller `out` override VALIDATED to stay under it, else
884
+ * {@link ScreenshotPathError}). We read the PNG's IHDR for the real pixel
885
+ * dimensions (so the number is the image's, not an assumed viewport size).
886
+ */
887
+ export async function takeScreenshot(page, screenshotsDir, options) {
888
+ const scope = options?.scope ?? 'viewport';
889
+ // LOUD scope/locator validation (mirrors `wait`'s exactly-one-of): `element`
890
+ // MUST carry a locator; the other scopes must NOT (a stray locator is a
891
+ // caller mistake, not a silent no-op).
892
+ if (scope === 'element' && options?.locator === undefined) {
893
+ throw new Error('screenshot --scope element requires --locator <expr> (the element to clip to).');
894
+ }
895
+ if (scope !== 'element' && options?.locator !== undefined) {
896
+ throw new Error(`screenshot --locator is only valid with --scope element (got scope ${JSON.stringify(scope)}).`);
897
+ }
898
+ const path = await resolveScreenshotPath(screenshotsDir, options?.out);
899
+ await mkdir(screenshotsDir, { recursive: true });
900
+ let buffer;
901
+ if (scope === 'element') {
902
+ // Clip to just the element (the captcha widget). Resolve through the ONE
903
+ // shared resolver so a `frameLocator(...)` hop reaches a frame widget,
904
+ // including cross-origin (Playwright `frameLocator` crosses; the spike).
905
+ buffer = await resolveLocator(page, options.locator)
906
+ .first()
907
+ .screenshot({ path, type: 'png' });
908
+ }
909
+ else {
910
+ buffer = await page.screenshot({
911
+ path,
912
+ type: 'png',
913
+ fullPage: scope === 'full',
914
+ });
915
+ }
916
+ const { width, height } = pngDimensions(buffer, path);
917
+ return { path, width, height };
918
+ }
919
+ /**
920
+ * The PNG magic + IHDR layout: an 8-byte signature, then the IHDR chunk whose
921
+ * width/height are big-endian uint32s at byte offsets 16 and 20. Reading them is
922
+ * how we report the image's REAL pixel dimensions without decoding the whole
923
+ * PNG or assuming a viewport size.
924
+ */
925
+ function pngDimensions(buffer, path) {
926
+ const PNG_SIGNATURE = '89504e470d0a1a0a';
927
+ if (buffer.length < 24 ||
928
+ buffer.subarray(0, 8).toString('hex') !== PNG_SIGNATURE) {
929
+ throw new Error(`screenshot: the file written at ${path} is not a valid PNG (no PNG signature).`);
930
+ }
931
+ return {
932
+ width: buffer.readUInt32BE(16),
933
+ height: buffer.readUInt32BE(20),
934
+ };
935
+ }
936
+ /**
937
+ * Resolve the PNG output path: a caller `out` override (VALIDATED to stay under
938
+ * the managed dir) or a freshly MINTED unique path under it. A relative `out` is
939
+ * resolved against the managed dir; an absolute (or `..`-escaping) `out` that
940
+ * lands outside it is refused with {@link ScreenshotPathError} — webhands never
941
+ * writes a screenshot to an arbitrary location.
942
+ */
943
+ async function resolveScreenshotPath(screenshotsDir, out) {
944
+ if (out === undefined || out === '') {
945
+ return join(screenshotsDir, mintScreenshotName());
946
+ }
947
+ const managedRoot = resolvePath(screenshotsDir);
948
+ const candidate = isAbsolute(out)
949
+ ? resolvePath(out)
950
+ : resolvePath(managedRoot, out);
951
+ const rel = relative(managedRoot, candidate);
952
+ // `rel` starting with `..` (or being absolute on a different root) means the
953
+ // candidate escapes the managed dir.
954
+ if (rel === '' || rel.startsWith('..') || isAbsolute(rel)) {
955
+ throw new ScreenshotPathError(out, managedRoot);
956
+ }
957
+ return candidate;
958
+ }
959
+ /**
960
+ * Mint a unique PNG filename: a timestamp plus random suffix, so concurrent /
961
+ * rapid shots never collide and the name is sortable by capture time.
962
+ */
963
+ function mintScreenshotName() {
964
+ const stamp = new Date().toISOString().replace(/[:.]/g, '-');
965
+ const rand = Math.random().toString(36).slice(2, 10);
966
+ return `webhands-${stamp}-${rand}.png`;
967
+ }
325
968
  /** Map a Playwright cookie to the transport-neutral seam {@link Cookie}. */
326
969
  function toSeamCookie(c) {
327
970
  return {