@webhands/core 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +20 -4
  2. package/dist/errors.d.ts +92 -1
  3. package/dist/errors.d.ts.map +1 -1
  4. package/dist/errors.js +100 -0
  5. package/dist/errors.js.map +1 -1
  6. package/dist/hand-host.d.ts +198 -5
  7. package/dist/hand-host.d.ts.map +1 -1
  8. package/dist/hand-host.js +664 -21
  9. package/dist/hand-host.js.map +1 -1
  10. package/dist/index.d.ts +4 -4
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +3 -3
  13. package/dist/index.js.map +1 -1
  14. package/dist/playwright-attach-transport.d.ts +8 -1
  15. package/dist/playwright-attach-transport.d.ts.map +1 -1
  16. package/dist/playwright-attach-transport.js +19 -4
  17. package/dist/playwright-attach-transport.js.map +1 -1
  18. package/dist/playwright-launch-transport.d.ts.map +1 -1
  19. package/dist/playwright-launch-transport.js +13 -4
  20. package/dist/playwright-launch-transport.js.map +1 -1
  21. package/dist/profile-location.d.ts +19 -0
  22. package/dist/profile-location.d.ts.map +1 -1
  23. package/dist/profile-location.js +21 -0
  24. package/dist/profile-location.js.map +1 -1
  25. package/dist/seam.d.ts +501 -7
  26. package/dist/seam.d.ts.map +1 -1
  27. package/dist/seam.js +31 -0
  28. package/dist/seam.js.map +1 -1
  29. package/dist/session-rpc.d.ts +63 -1
  30. package/dist/session-rpc.d.ts.map +1 -1
  31. package/dist/session-rpc.js +174 -11
  32. package/dist/session-rpc.js.map +1 -1
  33. package/dist/stub-transport.d.ts.map +1 -1
  34. package/dist/stub-transport.js +74 -6
  35. package/dist/stub-transport.js.map +1 -1
  36. package/dist/test-fixtures/fixture-pages.d.ts.map +1 -1
  37. package/dist/test-fixtures/fixture-pages.js +994 -0
  38. package/dist/test-fixtures/fixture-pages.js.map +1 -1
  39. package/dist/test-fixtures/fixture-server.d.ts.map +1 -1
  40. package/dist/test-fixtures/fixture-server.js +33 -3
  41. package/dist/test-fixtures/fixture-server.js.map +1 -1
  42. package/package.json +1 -1
  43. package/src/errors.ts +134 -1
  44. package/src/hand-host.ts +797 -21
  45. package/src/index.ts +20 -1
  46. package/src/playwright-attach-transport.ts +25 -3
  47. package/src/playwright-launch-transport.ts +13 -2
  48. package/src/profile-location.ts +25 -0
  49. package/src/seam.ts +535 -7
  50. package/src/session-rpc.ts +276 -14
  51. package/src/stub-transport.ts +83 -6
  52. package/src/test-fixtures/fixture-pages.ts +1010 -0
  53. package/src/test-fixtures/fixture-server.ts +32 -3
package/src/seam.ts CHANGED
@@ -74,12 +74,189 @@ export interface Cookie {
74
74
  readonly sameSite?: 'Strict' | 'Lax' | 'None';
75
75
  }
76
76
 
77
+ /**
78
+ * Options for the {@link WebHandsPage.eval} verb.
79
+ *
80
+ * This is an OPTIONS OBJECT, not a positional argument, on purpose (R1, the
81
+ * reversibility invariant): the optional `frame` qualifier is an ADDITION to
82
+ * this object, so a call passing no options keeps `eval`'s today behaviour
83
+ * unchanged. `eval` is the ONE verb that carries a `frame?` qualifier, because
84
+ * it runs page-world JS and CANNOT carry a `frameLocator(...)` expression the
85
+ * way the locator-taking verbs do (the spike confirmed `ReferenceError`); the
86
+ * other verbs reach a same-origin frame through a `frameLocator(...)` hop in
87
+ * their locator string instead (R1).
88
+ */
89
+ export interface EvalOptions {
90
+ /**
91
+ * A transport-neutral SELECTOR for a SAME-ORIGIN child frame to evaluate the
92
+ * expression in — a CSS selector for the `<iframe>` element (e.g.
93
+ * `#main-iframe`), the form the single frame resolver understands. NEVER a
94
+ * Playwright `Frame` handle (no Playwright type crosses the seam, ADR-0003).
95
+ *
96
+ * Omitted == today's top-document `eval` (backward compatible). When given,
97
+ * the expression runs in the named frame and its result crosses the seam by
98
+ * the SAME structured-clone contract `eval` already has.
99
+ *
100
+ * SAME-ORIGIN ONLY: a cross-origin frame is a browser security boundary
101
+ * page-world JS cannot cross, so a selector that resolves to a CROSS-ORIGIN
102
+ * frame fails LOUD with a typed error (never a silent empty); cross-origin
103
+ * reach is the separate Tier-4 frameLocator/coordinate surface.
104
+ */
105
+ readonly frame?: string;
106
+ }
107
+
77
108
  /** What to wait for in the {@link WebHandsPage.wait} verb. */
78
109
  export type WaitCondition =
79
110
  | {readonly kind: 'timeout'; readonly ms: number}
80
111
  | {readonly kind: 'locator'; readonly target: LocatorString}
81
112
  | {readonly kind: 'navigation'};
82
113
 
114
+ /**
115
+ * Which native `<select>` option the {@link WebHandsPage.select} verb chooses
116
+ * (prd `broaden-agent-verb-surface`, Tier-2). EXACTLY ONE of `value` / `label`,
117
+ * a discriminated union so the mutual exclusion is impossible to violate at the
118
+ * seam (the CLI mirrors it with `wait`-style loud validation, R5):
119
+ *
120
+ * - `value` — match the option's `value` attribute (`<option value="v">`).
121
+ * - `label` — match the option's VISIBLE label (its text), what a human reads.
122
+ *
123
+ * Plain strings only, so nothing Playwright-shaped crosses the seam (ADR-0003).
124
+ */
125
+ export type SelectChoice = {readonly value: string} | {readonly label: string};
126
+
127
+ /**
128
+ * Where the {@link WebHandsPage.scroll} verb scrolls (prd
129
+ * `broaden-agent-verb-surface`, Tier-2). EXACTLY ONE of `to` / `by`, a
130
+ * discriminated union mirroring `wait`'s mutually-exclusive forms:
131
+ *
132
+ * - `to` — bring the element a locator EXPRESSION addresses into view
133
+ * (`scrollIntoViewIfNeeded`); reach an off-viewport control.
134
+ * - `by` — scroll the page by a pixel delta (`mouse.wheel`), `dx`/`dy` in
135
+ * CSS pixels (positive `dy` scrolls DOWN, the wheel convention).
136
+ *
137
+ * `to` carries a {@link LocatorString}; `by` carries plain numbers — no
138
+ * Playwright type crosses the seam (ADR-0003).
139
+ */
140
+ export type ScrollTarget =
141
+ | {readonly to: LocatorString}
142
+ | {readonly by: {readonly dx: number; readonly dy: number}};
143
+
144
+ /**
145
+ * Which mouse button the {@link WebHandsPage.mouse} verb uses (prd
146
+ * `broaden-agent-verb-surface`, Tier-4, R3; story 17). Plain string enum, the
147
+ * Playwright `page.mouse` button vocabulary, so nothing Playwright-shaped
148
+ * crosses the seam (ADR-0003 as amended by the Tier-4 ADR).
149
+ */
150
+ export type MouseButton = 'left' | 'right' | 'middle';
151
+
152
+ /**
153
+ * What the {@link WebHandsPage.mouse} verb does at the given coordinate (prd
154
+ * `broaden-agent-verb-surface`, Tier-4, R3):
155
+ *
156
+ * - `'click'` — a full press-and-release at `(x, y)` (`mouse.click`).
157
+ * - `'move'` — move the pointer to `(x, y)` without pressing (`mouse.move`),
158
+ * e.g. to trigger a hover affordance at a raw coordinate.
159
+ * - `'down'` / `'up'` — press / release the button at the current position
160
+ * (`mouse.down` / `mouse.up`), the two halves of a manual drag.
161
+ */
162
+ export type MouseAction = 'click' | 'move' | 'down' | 'up';
163
+
164
+ /**
165
+ * A coordinate mouse input (prd `broaden-agent-verb-surface`, Tier-4, R3, story
166
+ * 17). The coordinate-input counterpart to the locator-addressing `click`, for
167
+ * the VISION/TILE captcha family and any task that must act at a raw pixel an
168
+ * agent SAW in a screenshot rather than at a DOM element.
169
+ *
170
+ * COORDINATE FRAME (load-bearing). `x`/`y` are VIEWPORT CSS-pixels (the
171
+ * Playwright `page.mouse` frame), NOT OS-level screen coordinates (webhands
172
+ * never injects OS input). A pixel `(x, y)` in a VIEWPORT {@link Screenshot}
173
+ * maps DIRECTLY to a `mouse` click `(x, y)` — that is the look-then-click
174
+ * contract the agent relies on. A FULL-PAGE screenshot is NOT coordinate-matched
175
+ * (it includes off-viewport content), so its pixels do not map to `mouse`
176
+ * coordinates (see {@link ScreenshotScope}).
177
+ *
178
+ * Plain numbers + a string enum only, so nothing Playwright-shaped crosses the
179
+ * seam (ADR-0003 as amended by the Tier-4 ADR).
180
+ */
181
+ export interface MouseInput {
182
+ /** What to do at the coordinate (click / move / down / up). */
183
+ readonly action: MouseAction;
184
+ /** Viewport CSS-pixel X (left-relative), the `page.mouse` frame. */
185
+ readonly x: number;
186
+ /** Viewport CSS-pixel Y (top-relative), the `page.mouse` frame. */
187
+ readonly y: number;
188
+ /** Which button for `click`/`down`/`up`. Defaults to `'left'`. */
189
+ readonly button?: MouseButton;
190
+ }
191
+
192
+ /**
193
+ * Which region a {@link WebHandsPage.screenshot} captures (prd
194
+ * `broaden-agent-verb-surface`, Tier-4, R3; stories 17-19):
195
+ *
196
+ * - `'viewport'` — the DEFAULT: exactly the visible viewport. Its pixels are
197
+ * COORDINATE-MATCHED to the `mouse` verb (a pixel at `(x, y)` is the `mouse`
198
+ * click `(x, y)`), so it is the shot the look-then-click loop uses.
199
+ * - `'full'` — the whole scrollable page (`fullPage`), for READING scrolled-out
200
+ * content. It is NOT coordinate-matched (it includes off-viewport content), so
201
+ * its pixels must NOT be fed back as `mouse` coordinates.
202
+ * - `'element'` — clipped to the element a locator addresses (just the captcha
203
+ * widget, ideal for focusing a vision model). REQUIRES a
204
+ * {@link ScreenshotOptions.locator}; absent, the verb rejects LOUD (like
205
+ * `wait`'s mutually-exclusive validation).
206
+ */
207
+ export type ScreenshotScope = 'viewport' | 'full' | 'element';
208
+
209
+ /**
210
+ * Options for the {@link WebHandsPage.screenshot} verb (prd
211
+ * `broaden-agent-verb-surface`, Tier-4, R3; R5). An OPTIONS OBJECT so future
212
+ * fields stay additive (R1).
213
+ *
214
+ * The seam stays ADR-0003-clean (as amended by the Tier-4 ADR): the verb takes
215
+ * STRINGS + an enum and returns a file PATH — NEVER image bytes.
216
+ */
217
+ export interface ScreenshotOptions {
218
+ /**
219
+ * Which region to capture. Defaults to `'viewport'` (the coordinate-matched
220
+ * shot the `mouse` loop uses). See {@link ScreenshotScope}.
221
+ */
222
+ readonly scope?: ScreenshotScope;
223
+ /**
224
+ * The element to clip to for `scope: 'element'`, a raw Playwright locator
225
+ * EXPRESSION resolved through the SAME resolver the other verbs use (so a
226
+ * `frameLocator(...)` hop reaches a frame widget). REQUIRED for `'element'`
227
+ * and rejected (loud, like `wait`) for the other scopes.
228
+ */
229
+ readonly locator?: LocatorString;
230
+ /**
231
+ * Caller override for the output PNG path. When omitted, webhands MINTS a
232
+ * unique path under its managed screenshots dir. When given, it is VALIDATED
233
+ * to stay UNDER that managed dir (a path that escapes it is rejected with a
234
+ * typed error), so the verb never writes to an arbitrary filesystem location.
235
+ * A plain string — no bytes cross the seam.
236
+ */
237
+ readonly out?: string;
238
+ }
239
+
240
+ /**
241
+ * The result of a {@link WebHandsPage.screenshot}: the file PATH webhands wrote
242
+ * the PNG to, plus its pixel dimensions (prd `broaden-agent-verb-surface`,
243
+ * Tier-4, R3, story 19).
244
+ *
245
+ * `path` is a plain STRING — the load-bearing ADR-0003 (as amended) choice: a
246
+ * path, not image bytes, crosses the seam, so the seam stays string/number-typed
247
+ * and the agent reads/attaches the file itself. `width`/`height` are the PNG's
248
+ * pixel dimensions, so an agent knows the coordinate space of a VIEWPORT shot
249
+ * before it maps a pixel to a `mouse` click.
250
+ */
251
+ export interface Screenshot {
252
+ /** The filesystem PATH of the written PNG (a string; never bytes). */
253
+ readonly path: string;
254
+ /** The PNG's pixel width. */
255
+ readonly width: number;
256
+ /** The PNG's pixel height. */
257
+ readonly height: number;
258
+ }
259
+
83
260
  /**
84
261
  * Which page view a {@link Snapshot} carries.
85
262
  *
@@ -93,7 +270,16 @@ export type WaitCondition =
93
270
  */
94
271
  export type SnapshotView = 'accessibility' | 'full';
95
272
 
96
- /** Options for the {@link WebHandsPage.snapshot} verb. */
273
+ /**
274
+ * Options for the {@link WebHandsPage.snapshot} verb.
275
+ *
276
+ * `full` is the ONLY recognised key. Unknown keys are REJECTED (not silently
277
+ * ignored) by {@link validateSnapshotOptions}, which every entry point calls:
278
+ * passing `{view: 'full'}` (a natural mistake, because the RESULT carries a
279
+ * {@link SnapshotView} `view` field) throws a clear error instead of silently
280
+ * returning the wrong view. There is no `view` option; `view` is a RESULT
281
+ * field, set by the verb from `full`.
282
+ */
97
283
  export interface SnapshotOptions {
98
284
  /**
99
285
  * When `true`, return the raw DOM (`view: 'full'`) instead of the default
@@ -102,6 +288,46 @@ export interface SnapshotOptions {
102
288
  readonly full?: boolean;
103
289
  }
104
290
 
291
+ /**
292
+ * Validate a {@link SnapshotOptions} value at a verb entry point, the SINGLE
293
+ * source of truth shared by the in-process host and the RPC server dispatch so
294
+ * neither path can silently drop a misspelled option.
295
+ *
296
+ * Accepts `undefined`, `{}`, and `{full: boolean}`. REJECTS any object carrying
297
+ * a key other than `full`, and a non-boolean `full`, by throwing a clear `Error`
298
+ * that names the offending key and hints the right one (e.g. `{view: 'full'}`
299
+ * throws `snapshot: unknown option "view" (did you mean { full: true }?)`).
300
+ *
301
+ * This turns a silent wrong-result into a loud error: it does not change
302
+ * behaviour for any valid input. Returns the validated options unchanged so it
303
+ * can wrap a call site inline.
304
+ */
305
+ export function validateSnapshotOptions(
306
+ options?: SnapshotOptions,
307
+ ): SnapshotOptions | undefined {
308
+ if (options === undefined) {
309
+ return options;
310
+ }
311
+ if (typeof options !== 'object' || options === null) {
312
+ throw new Error(
313
+ `snapshot: options must be an object like { full: true }, got ${typeof options}`,
314
+ );
315
+ }
316
+ const unknownKeys = Object.keys(options).filter((key) => key !== 'full');
317
+ if (unknownKeys.length > 0) {
318
+ const named = unknownKeys.map((key) => `"${key}"`).join(', ');
319
+ throw new Error(
320
+ `snapshot: unknown option ${named} (did you mean { full: true }?)`,
321
+ );
322
+ }
323
+ if (options.full !== undefined && typeof options.full !== 'boolean') {
324
+ throw new Error(
325
+ `snapshot: option "full" must be a boolean, got ${typeof options.full}`,
326
+ );
327
+ }
328
+ return options;
329
+ }
330
+
105
331
  /**
106
332
  * A structured, token-cheap view of the current page with stable element refs.
107
333
  *
@@ -128,6 +354,157 @@ export interface Snapshot {
128
354
  readonly content: string;
129
355
  }
130
356
 
357
+ /**
358
+ * The Playwright-locator-derived extras a {@link QueryRow} can carry under
359
+ * `pw`. This is the ONLY fixed (closed) set in {@link QueryOptions}: these two
360
+ * facts are NOT expressible as a DOM attribute or a live JS property, so they
361
+ * cannot ride in `attrs`/`props` (which are caller-named and open). Everything
362
+ * else the agent wants is named freely as an attribute or a property (R2, no
363
+ * curated DOM field set).
364
+ *
365
+ * - `'visible'` — actionability-grade visibility (`locator.isVisible()`),
366
+ * strictly better than the `offsetParent` hack: a present-but-hidden element
367
+ * reads `false`.
368
+ * - `'bbox'` — the element's bounding box (`locator.boundingBox()`) in VIEWPORT
369
+ * CSS-pixels, the coordinate frame the future Tier-4 `mouse` verb uses.
370
+ */
371
+ export type PwExtra = 'visible' | 'bbox';
372
+
373
+ /**
374
+ * An element's bounding box in VIEWPORT CSS-pixels, the value of a
375
+ * {@link QueryRow}'s `pw.bbox`. Plain numbers only, so nothing Playwright-typed
376
+ * crosses the seam (ADR-0003). `null` when the element has no box (e.g. it is
377
+ * not rendered), mirroring `locator.boundingBox()`.
378
+ */
379
+ export interface BoundingBox {
380
+ readonly x: number;
381
+ readonly y: number;
382
+ readonly width: number;
383
+ readonly height: number;
384
+ }
385
+
386
+ /**
387
+ * Options for the {@link WebHandsPage.query} verb (R2).
388
+ *
389
+ * This is an OPTIONS OBJECT, not positional fields, on purpose (R1, the
390
+ * reversibility invariant a reviewer checks): a future optional `frame?`
391
+ * qualifier AND the T1b `ref` field are then PURE ADDITIONS to this object,
392
+ * breaking no existing call. Do NOT turn these into positional arguments.
393
+ *
394
+ * There is NO curated DOM field set: a row carries EXACTLY what the caller
395
+ * names here and nothing else. `attrs` and `props` are caller-named and OPEN
396
+ * (the agent already knows DOM/Playwright vocabulary); `pw` is the one closed
397
+ * set ({@link PwExtra}).
398
+ *
399
+ * `refs` is the OPT-IN durable-handle switch (R4): default `query` is a PURE
400
+ * READ that mints nothing and returns no `ref`; `refs: true` adds a `ref` to
401
+ * each row (see {@link QueryRow.ref}). It is a dedicated boolean, NOT a member
402
+ * of `pw`, because a `ref` is not a Playwright-locator-derived FACT about the
403
+ * element (the closed `pw` set) — it is an ADDRESS the agent acts on later. The
404
+ * CLI exposes it as `--with-refs`.
405
+ *
406
+ * The `attrs` vs `props` split is deliberate and LOUD — webhands NEVER
407
+ * auto-detects which of the two a name like `value`/`checked` means, because a
408
+ * silent attribute-vs-property guess is the footgun this repo's "loud over
409
+ * silent" style rejects.
410
+ */
411
+ export interface QueryOptions {
412
+ /**
413
+ * DOM ATTRIBUTES to read by name, via `getAttribute(name)` — what is written
414
+ * in the markup (`href`, `data-sitekey`, `type`). A missing attribute reads
415
+ * `null`.
416
+ */
417
+ readonly attrs?: readonly string[];
418
+ /**
419
+ * Live JS PROPERTIES to read by name, via `el[name]` — runtime state
420
+ * (`innerText`, `value`, `checked`, `selectedIndex`). `text` is just
421
+ * `props: ['innerText']`; there is no special `text` field.
422
+ */
423
+ readonly props?: readonly string[];
424
+ /**
425
+ * Playwright-locator-derived extras to include (the ONLY closed set; see
426
+ * {@link PwExtra}).
427
+ */
428
+ readonly pw?: readonly PwExtra[];
429
+ /** Bound the number of rows returned (token economy on a multi-match). */
430
+ readonly limit?: number;
431
+ /**
432
+ * Opt-in to a durable element {@link QueryRow.ref} per row (R4; finding
433
+ * `query-ref-mint-mechanism-attribute-beats-weakmap`). Default (omitted /
434
+ * `false`) keeps `query` a PURE READ: no `ref` field, and the page is NOT
435
+ * mutated. `true` computes a `ref` per matched element by the PREFERENCE
436
+ * LADDER — REUSE the element's own stable UNIQUE attribute when present
437
+ * (`id`/`data-testid`/…, ZERO DOM mutation), MINT a namespaced
438
+ * `data-webhands-ref` attribute ONLY as the fallback for an anonymous element.
439
+ *
440
+ * Mints are single-`query`-scoped: each `refs: true` query SWEEPS the prior
441
+ * query's mints first, so a ref can never match a stale element from two
442
+ * queries ago. An action verb resolves a `ref` with loud staleness detection
443
+ * (resolve-to-zero / resolve-to-many => {@link StaleRefError}); see
444
+ * {@link ActionOptions.byRef}.
445
+ */
446
+ readonly refs?: boolean;
447
+ }
448
+
449
+ /**
450
+ * One matched element's data, carrying EXACTLY the fields the caller named in
451
+ * {@link QueryOptions} and nothing else (R2). A sub-object is present ONLY when
452
+ * the caller asked for that family, and within it a key is present for every
453
+ * name requested:
454
+ *
455
+ * - `attrs[name]` is the `getAttribute(name)` value (`null` if absent).
456
+ * - `props[name]` is the live `el[name]` value, structurally cloned by VALUE
457
+ * (the same contract as `eval`; ADR-0003: no Playwright/CDP type leaks).
458
+ * - `pw.visible` / `pw.bbox` are the requested {@link PwExtra} values.
459
+ *
460
+ * When `query` is called with NO fields, each row is an empty object `{}`: the
461
+ * caller asked for nothing, so the row carries nothing (R2, "a row carries
462
+ * EXACTLY what the caller asked for").
463
+ */
464
+ export interface QueryRow {
465
+ readonly attrs?: Readonly<Record<string, string | null>>;
466
+ readonly props?: Readonly<Record<string, unknown>>;
467
+ readonly pw?: {
468
+ readonly visible?: boolean;
469
+ readonly bbox?: BoundingBox | null;
470
+ };
471
+ /**
472
+ * The element's durable HANDLE, present ONLY when the caller asked
473
+ * ({@link QueryOptions.refs}). It is a LOCATOR STRING the agent feeds back to
474
+ * an action verb (`click`/`type`) with `{byRef: true}` to act on THIS element
475
+ * later even after the list mutates — fixing the index-drift footgun where a
476
+ * positional `.nth(i)` silently clicks the wrong row.
477
+ *
478
+ * It is computed by the LADDER (R4): when the element has a stable UNIQUE
479
+ * attribute it IS that real locator (`#buy-charlie`, `[data-testid="x"]`),
480
+ * durable across framework reconciliation and ZERO DOM mutation; otherwise it
481
+ * is a minted `[data-webhands-ref="<id>"]` selector. Either way it is a plain
482
+ * STRING resolved through the ONE existing resolver — no new addressing engine,
483
+ * no Playwright type on the seam (ADR-0003/0004). It is a SHORT-LIVED handle:
484
+ * acting on it after a NODE-REPLACEMENT re-render or a navigation fails LOUD
485
+ * with {@link StaleRefError}, never a silent wrong-element action.
486
+ */
487
+ readonly ref?: string;
488
+ }
489
+
490
+ /**
491
+ * Options for an ACTION verb that may act on a durable {@link QueryRow.ref}
492
+ * instead of a raw locator (R4). An OPTIONS OBJECT so it is an ADDITIVE,
493
+ * non-breaking extension of `click`/`type` (R1): a today call passing no options
494
+ * is unchanged.
495
+ *
496
+ * `byRef: true` tells the verb its `target` is a `ref` from a prior
497
+ * `query({refs: true})`, so it must enforce the loud-stale contract: resolve the
498
+ * ref through the SAME single resolver, then assert it matches EXACTLY ONE
499
+ * element — resolve-to-zero (removed/replaced) OR resolve-to-many (a cloned
500
+ * subtree) BOTH reject with a typed {@link StaleRefError}, never a silent
501
+ * wrong-element action. Omitted / `false` keeps the verb's plain locator
502
+ * behaviour (auto-waiting, first-match), unchanged.
503
+ */
504
+ export interface ActionOptions {
505
+ readonly byRef?: boolean;
506
+ }
507
+
131
508
  /**
132
509
  * The page-level verb surface. One method per verb in the domain glossary.
133
510
  * All element addressing flows through {@link LocatorString}.
@@ -138,13 +515,35 @@ export interface WebHandsPage {
138
515
  /**
139
516
  * Return a structured, token-cheap view of the page. Defaults to the
140
517
  * accessibility-tree + visible-text view with stable refs; pass
141
- * `{full: true}` to get the raw DOM instead (PRD story 7).
518
+ * `{full: true}` to get the raw DOM instead (PRD story 7). An unknown or
519
+ * misshapen option REJECTS (e.g. `{view: 'full'}`), it is never silently
520
+ * ignored (see {@link validateSnapshotOptions}).
142
521
  */
143
522
  snapshot(options?: SnapshotOptions): Promise<Snapshot>;
144
- /** Click the element addressed by a raw Playwright locator string. */
145
- click(target: LocatorString): Promise<void>;
146
- /** Type text into the element addressed by a raw Playwright locator string. */
147
- type(target: LocatorString, text: string): Promise<void>;
523
+ /**
524
+ * Click the element addressed by a raw Playwright locator string.
525
+ *
526
+ * With `{byRef: true}` the `target` is treated as a durable
527
+ * {@link QueryRow.ref} from a prior `query({refs: true})`: it is resolved
528
+ * through the SAME resolver but MUST match EXACTLY ONE element, else a typed
529
+ * {@link StaleRefError} (resolve-to-zero / resolve-to-many) — the loud-stale
530
+ * guarantee that makes a ref strictly safer than a positional `.nth(i)`. The
531
+ * options object is additive (R1); omitted keeps today's plain-locator click.
532
+ */
533
+ click(target: LocatorString, options?: ActionOptions): Promise<void>;
534
+ /**
535
+ * Type text into the element addressed by a raw Playwright locator string.
536
+ *
537
+ * With `{byRef: true}` the `target` is a durable {@link QueryRow.ref}, resolved
538
+ * with the same EXACTLY-ONE loud-stale contract as {@link WebHandsPage.click}
539
+ * (a typed {@link StaleRefError} on zero/many). The options object is additive
540
+ * (R1); omitted keeps today's plain-locator type.
541
+ */
542
+ type(
543
+ target: LocatorString,
544
+ text: string,
545
+ options?: ActionOptions,
546
+ ): Promise<void>;
148
547
  /**
149
548
  * Run a JavaScript EXPRESSION in the active page's context and return its
150
549
  * result, the `eval` escape hatch for cases no other verb covers (PRD story
@@ -185,14 +584,143 @@ export interface WebHandsPage {
185
584
  * narrow it. This is deliberately a thin passthrough to the transport's
186
585
  * serialize-and-return: `eval` does not re-encode or wrap the result, so an
187
586
  * agent gets exactly what the page produced.
587
+ *
588
+ * FRAME SCOPE ({@link EvalOptions.frame}). With no `frame` this is exactly
589
+ * the top-document `eval` above. With a `frame` selector the expression runs
590
+ * in that NAMED SAME-ORIGIN child frame instead (e.g. to fire a captcha
591
+ * `data-callback` or read a runtime-only value the top document cannot see),
592
+ * returning by the same structured clone. The frame resolves through the
593
+ * SAME single resolver `click`/`type` use (a `frameLocator(...)` over the
594
+ * selector; R1), so there is no parallel frame-addressing path. A selector
595
+ * that resolves to a CROSS-ORIGIN frame REJECTS with a typed
596
+ * cross-origin-frame error (page-world JS cannot cross a security boundary),
597
+ * never a silent empty result.
188
598
  */
189
- eval(expression: string): Promise<unknown>;
599
+ eval(expression: string, options?: EvalOptions): Promise<unknown>;
190
600
  /** Pace actions by waiting for a condition. */
191
601
  wait(condition: WaitCondition): Promise<void>;
192
602
  /** Read the session's cookies. */
193
603
  cookies(): Promise<readonly Cookie[]>;
194
604
  /** Seed the session's cookies. */
195
605
  setCookies(cookies: readonly Cookie[]): Promise<void>;
606
+ /**
607
+ * Read STRUCTURED data out of the element(s) addressed by a raw Playwright
608
+ * locator string (ADR-0004; already frame-capable for same-origin frames via
609
+ * a `frameLocator(...)` expression). Returns ONE ROW PER MATCH, each carrying
610
+ * EXACTLY the fields named in {@link QueryOptions} — caller-named `attrs`
611
+ * (DOM attributes) and `props` (live JS properties), plus the closed `pw`
612
+ * extras (R2). This kills the `eval`-returns-a-JSON-string pattern.
613
+ *
614
+ * The options are an OPTIONS OBJECT so a future `frame?` field is a
615
+ * non-breaking addition (R1); the locator resolves through the SAME single
616
+ * resolver `click`/`type`/`wait` use — no parallel addressing scheme.
617
+ *
618
+ * With `{refs: true}` (OPT-IN) each row also carries a durable
619
+ * {@link QueryRow.ref} the agent feeds back to `click`/`type` (`{byRef: true}`)
620
+ * to act on THAT element after the page mutates, fixing the index-drift
621
+ * footgun. The default (no `refs`) is a PURE READ that mints nothing.
622
+ *
623
+ * Values cross by structured clone, the SAME contract as `eval` (ADR-0003: no
624
+ * Playwright/CDP types on the seam). With no fields requested, each row is an
625
+ * empty object.
626
+ */
627
+ query(target: LocatorString, options?: QueryOptions): Promise<QueryRow[]>;
628
+ /**
629
+ * The number of elements the locator matches (a property of the MATCH SET,
630
+ * not a row field). A thin shorthand over the same machinery as
631
+ * {@link WebHandsPage.query}.
632
+ */
633
+ count(target: LocatorString): Promise<number>;
634
+ /** Whether the locator matches at least one element (`count(target) > 0`). */
635
+ exists(target: LocatorString): Promise<boolean>;
636
+ /**
637
+ * The first match's actionability-grade visibility (its `pw:['visible']`): a
638
+ * present-but-hidden element reads `false`, and an ABSENT element reads
639
+ * `false` too (no match cannot be visible).
640
+ */
641
+ isVisible(target: LocatorString): Promise<boolean>;
642
+ /**
643
+ * The first match's `name` DOM attribute (its `attrs:[name]`), via
644
+ * `getAttribute`. `null` when the attribute is absent OR the locator matches
645
+ * no element — both "there is no such attribute value to read".
646
+ */
647
+ getAttribute(target: LocatorString, name: string): Promise<string | null>;
648
+ /**
649
+ * Press a keyboard key or chord (prd `broaden-agent-verb-surface`, Tier-2,
650
+ * story 8) — arrows, `Enter`, `Space`, a letter (`w`), or a chord like
651
+ * `Control+A`. The chord grammar is Playwright's `keyboard.press` grammar:
652
+ * `Modifier+Modifier+Key`, modifiers `Control`/`Alt`/`Shift`/`Meta`, key names
653
+ * like `ArrowLeft`/`Enter`/`a` (see the task's ## Decisions note). The key is a
654
+ * plain STRING, so nothing Playwright-shaped crosses the seam (ADR-0003).
655
+ *
656
+ * With `target`, the key is sent to the element that locator addresses (it is
657
+ * focused first, the `locator.press` semantics); WITHOUT it, the key is sent to
658
+ * the page's currently focused element (`keyboard.press`). `target` is an
659
+ * optional trailing arg so a future `frame?` stays additive (R1).
660
+ */
661
+ press(key: string, target?: LocatorString): Promise<void>;
662
+ /**
663
+ * Hover the pointer over the element a locator addresses (prd
664
+ * `broaden-agent-verb-surface`, Tier-2, story 9), to reveal a hover menu /
665
+ * on-hover control `click` cannot surface (`locator.hover`).
666
+ */
667
+ hover(target: LocatorString): Promise<void>;
668
+ /**
669
+ * Choose an option in the native `<select>` a locator addresses (prd
670
+ * `broaden-agent-verb-surface`, Tier-2, story 10), by `value` OR by `label`
671
+ * (EXACTLY ONE; see {@link SelectChoice}). Maps to Playwright
672
+ * `locator.selectOption`; the chosen option is reflected in the element's live
673
+ * state (its `value` / `selectedIndex`).
674
+ */
675
+ select(target: LocatorString, choice: SelectChoice): Promise<void>;
676
+ /**
677
+ * Scroll the page, either TO an element a locator addresses or BY a pixel
678
+ * delta (prd `broaden-agent-verb-surface`, Tier-2, story 11; EXACTLY ONE form,
679
+ * see {@link ScrollTarget}). `to` reaches lazy-loaded / off-viewport content
680
+ * (`scrollIntoViewIfNeeded`); `by` nudges the page a fixed amount
681
+ * (`mouse.wheel`).
682
+ */
683
+ scroll(target: ScrollTarget): Promise<void>;
684
+ /**
685
+ * Drag the element `source` addresses onto the element `target` addresses (prd
686
+ * `broaden-agent-verb-surface`, Tier-2, story 12), for drag-reorder UIs and
687
+ * drag-slider challenges (`locator.dragTo`). Both are raw locator EXPRESSIONS
688
+ * resolved through the SAME resolver as `click`/`type` (ADR-0004).
689
+ */
690
+ drag(source: LocatorString, target: LocatorString): Promise<void>;
691
+ /**
692
+ * Coordinate mouse input at VIEWPORT CSS-pixels (prd
693
+ * `broaden-agent-verb-surface`, Tier-4, R3, story 17): click / move / press /
694
+ * release at a raw `(x, y)` the agent SAW in a VIEWPORT {@link
695
+ * WebHandsPage.screenshot}, the input half of the look-then-click loop. This
696
+ * is the coordinate counterpart to the locator-addressing {@link
697
+ * WebHandsPage.click}, for the vision/tile captcha family and any pixel-level
698
+ * task. It uses Playwright `page.mouse` semantics (viewport-relative CSS
699
+ * pixels), NOT OS-level screen input — see {@link MouseInput}.
700
+ *
701
+ * Plain numbers + a string enum cross the seam (ADR-0003 as amended by the
702
+ * Tier-4 ADR); no Playwright/CDP type leaks.
703
+ */
704
+ mouse(input: MouseInput): Promise<void>;
705
+ /**
706
+ * Capture the page to a PNG FILE and return its PATH (prd
707
+ * `broaden-agent-verb-surface`, Tier-4, R3; stories 17-19). webhands MINTS the
708
+ * PNG under its managed screenshots dir and returns `{path, width, height}`;
709
+ * NO image bytes cross the seam (the load-bearing ADR-0003-as-amended choice),
710
+ * so an agent reads / attaches the file by path.
711
+ *
712
+ * Three scopes ({@link ScreenshotScope}): `viewport` (default,
713
+ * COORDINATE-MATCHED to {@link WebHandsPage.mouse} — a pixel `(x, y)` here is
714
+ * the `mouse` click `(x, y)`), `full` (the whole scrollable page, for reading
715
+ * scrolled-out content, NOT coordinate-matched), and `element` (clipped to the
716
+ * element a locator addresses; the locator is REQUIRED and validated loud like
717
+ * `wait`).
718
+ *
719
+ * A caller MAY override the path ({@link ScreenshotOptions.out}); it is
720
+ * validated to stay UNDER the managed dir (an escaping path rejects with a
721
+ * typed error), so the verb never writes to an arbitrary location.
722
+ */
723
+ screenshot(options?: ScreenshotOptions): Promise<Screenshot>;
196
724
  }
197
725
 
198
726
  /**