webhands 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -65,6 +65,12 @@ const stealthOptions = z.object({
65
65
  .optional()
66
66
  .describe("Drive a browser already installed on the system (e.g. 'chrome', " +
67
67
  "'msedge') instead of the bundled Chromium."),
68
+ proxy: z
69
+ .string()
70
+ .optional()
71
+ .describe('Route ALL traffic and DNS through a SOCKS proxy. Give a SOCKS URL: ' +
72
+ 'socks5h://host:1080 tunnels DNS through the proxy too (no leak), ' +
73
+ 'socks5://host:1080 allows local DNS. A user:pass@ prefix is allowed.'),
68
74
  // Modelled as a `viewport` boolean so incur's `--no-<flag>` negation gives the
69
75
  // task-mandated `--no-viewport`: passing `--no-viewport` sets `viewport=false`
70
76
  // (i.e. noViewport=true). Absent => undefined => core decides the default
@@ -90,6 +96,11 @@ function launchPolicyFrom(options) {
90
96
  // `--no-viewport` (viewport=false) => noViewport:true; `--viewport` => false.
91
97
  // Only forward when the flag was given.
92
98
  ...(options.viewport !== undefined ? { noViewport: !options.viewport } : {}),
99
+ // Only forward --proxy when given, so the policy object stays minimal (the
100
+ // `serve` wiring tests assert it by exact shape).
101
+ ...(options.proxy !== undefined && options.proxy !== ''
102
+ ? { proxy: options.proxy }
103
+ : {}),
93
104
  };
94
105
  }
95
106
  /** The structured output schema shared by verbs that act on a page but return no data. */
@@ -122,6 +133,10 @@ function nextAct() {
122
133
  command: 'type',
123
134
  description: 'Type into an element addressed by a Playwright locator string.',
124
135
  },
136
+ {
137
+ command: 'query',
138
+ description: 'Read structured data (attrs/props/visibility) out of matched elements.',
139
+ },
125
140
  {
126
141
  command: 'eval',
127
142
  description: 'Run JavaScript in the page as an escape hatch.',
@@ -238,6 +253,7 @@ export function createCli(deps = {}) {
238
253
  stealth: z.boolean(),
239
254
  systemBrowser: z.string().optional(),
240
255
  noViewport: z.boolean().optional(),
256
+ proxy: z.string().optional(),
241
257
  }),
242
258
  async run(c) {
243
259
  try {
@@ -257,6 +273,7 @@ export function createCli(deps = {}) {
257
273
  ...(policy.noViewport !== undefined
258
274
  ? { noViewport: policy.noViewport }
259
275
  : {}),
276
+ ...(policy.proxy !== undefined ? { proxy: policy.proxy } : {}),
260
277
  }, {
261
278
  cta: {
262
279
  commands: [
@@ -471,14 +488,22 @@ export function createCli(deps = {}) {
471
488
  args: z.object({
472
489
  locator: z
473
490
  .string()
474
- .describe("A raw Playwright locator expression, e.g. getByRole('button', { name: 'Search' })."),
491
+ .describe("A raw Playwright locator expression, e.g. getByRole('button', { name: 'Search' }). " +
492
+ 'With --by-ref, a durable `ref` from `query --with-refs` instead.'),
493
+ }),
494
+ options: connectionOptions.extend({
495
+ 'by-ref': z
496
+ .boolean()
497
+ .default(false)
498
+ .describe('Treat the argument as a durable `ref` from `query --with-refs`: ' +
499
+ 'resolve it but fail LOUD (stale-ref) if it now matches zero or more ' +
500
+ 'than one element, instead of silently clicking the wrong one.'),
475
501
  }),
476
- options: connectionOptions,
477
502
  output: actionOutput.extend({ verb: z.literal('click') }),
478
503
  async run(c) {
479
504
  try {
480
505
  return await withSession(provider, targetFrom(c.options), async (s) => {
481
- await s.page.click(locator(c.args.locator));
506
+ await s.page.click(locator(c.args.locator), c.options['by-ref'] ? { byRef: true } : undefined);
482
507
  return c.ok({ ok: true, verb: 'click' }, { cta: { commands: [nextSnapshot()] } });
483
508
  });
484
509
  }
@@ -492,15 +517,23 @@ export function createCli(deps = {}) {
492
517
  args: z.object({
493
518
  locator: z
494
519
  .string()
495
- .describe('A raw Playwright locator expression for the target input.'),
520
+ .describe('A raw Playwright locator expression for the target input. ' +
521
+ 'With --by-ref, a durable `ref` from `query --with-refs` instead.'),
496
522
  text: z.string().describe('The text to type into the element.'),
497
523
  }),
498
- options: connectionOptions,
524
+ options: connectionOptions.extend({
525
+ 'by-ref': z
526
+ .boolean()
527
+ .default(false)
528
+ .describe('Treat the locator argument as a durable `ref` from `query --with-refs`: ' +
529
+ 'resolve it but fail LOUD (stale-ref) if it now matches zero or more ' +
530
+ 'than one element, instead of silently typing into the wrong one.'),
531
+ }),
499
532
  output: actionOutput.extend({ verb: z.literal('type') }),
500
533
  async run(c) {
501
534
  try {
502
535
  return await withSession(provider, targetFrom(c.options), async (s) => {
503
- await s.page.type(locator(c.args.locator), c.args.text);
536
+ await s.page.type(locator(c.args.locator), c.args.text, c.options['by-ref'] ? { byRef: true } : undefined);
504
537
  return c.ok({ ok: true, verb: 'type' }, { cta: { commands: [nextSnapshot()] } });
505
538
  });
506
539
  }
@@ -516,7 +549,17 @@ export function createCli(deps = {}) {
516
549
  .string()
517
550
  .describe('A JS expression evaluated in the page context (e.g. document.title).'),
518
551
  }),
519
- options: connectionOptions,
552
+ // The ONE `--frame` flag on the surface (R1): `eval` runs page-world JS and
553
+ // cannot carry a `frameLocator(...)` the way locator-taking verbs do, so it
554
+ // gets an explicit SAME-ORIGIN frame selector. Omitted == top-document eval.
555
+ options: connectionOptions.extend({
556
+ frame: z
557
+ .string()
558
+ .optional()
559
+ .describe('Evaluate inside the named SAME-ORIGIN child frame instead of the top ' +
560
+ "document: a CSS selector for the iframe element (e.g. '#main-iframe'). " +
561
+ 'A cross-origin frame is unreachable and fails loud.'),
562
+ }),
520
563
  output: z.object({
521
564
  ok: z.literal(true),
522
565
  verb: z.literal('eval'),
@@ -527,7 +570,9 @@ export function createCli(deps = {}) {
527
570
  async run(c) {
528
571
  try {
529
572
  return await withSession(provider, targetFrom(c.options), async (s) => {
530
- const result = await s.page.eval(c.args.expression);
573
+ const result = await s.page.eval(c.args.expression, c.options.frame !== undefined
574
+ ? { frame: c.options.frame }
575
+ : undefined);
531
576
  return c.ok({ ok: true, verb: 'eval', result }, { cta: { commands: [nextSnapshot()] } });
532
577
  });
533
578
  }
@@ -536,6 +581,482 @@ export function createCli(deps = {}) {
536
581
  }
537
582
  },
538
583
  });
584
+ // --- Tier-1 read verbs: query + state shorthands (prd broaden-agent-verb-
585
+ // surface, R2/R5). Each is its own incur command, so one definition yields
586
+ // both the CLI command and the MCP tool. List flags (--attr/--prop/--pw) are
587
+ // REPEATABLE, not comma-joined (R5): incur arrays collect each occurrence.
588
+ // There is NO --frame flag (frame scope rides IN the locator string, R1).
589
+ cli.command('query', {
590
+ description: 'Read structured data out of the element(s) a Playwright locator matches: ' +
591
+ 'one row per match carrying exactly the requested DOM attributes (--attr), ' +
592
+ 'live JS properties (--prop), and Playwright extras (--pw visible|bbox).',
593
+ args: z.object({
594
+ locator: z
595
+ .string()
596
+ .describe('A raw Playwright locator expression addressing the element(s) to read. ' +
597
+ "Frame scope rides in the string, e.g. frameLocator('#f').locator('#x')."),
598
+ }),
599
+ options: connectionOptions.extend({
600
+ attr: z
601
+ .array(z.string())
602
+ .default([])
603
+ .describe('A DOM ATTRIBUTE to read (getAttribute), e.g. href. Repeatable.'),
604
+ prop: z
605
+ .array(z.string())
606
+ .default([])
607
+ .describe('A live JS PROPERTY to read (el[name]), e.g. innerText. Repeatable.'),
608
+ pw: z
609
+ .array(z.enum(['visible', 'bbox']))
610
+ .default([])
611
+ .describe('A Playwright extra to include: visible (actionability-grade) or ' +
612
+ 'bbox (viewport-pixel box). Repeatable.'),
613
+ limit: z.coerce
614
+ .number()
615
+ .optional()
616
+ .describe('Bound the number of rows returned.'),
617
+ 'with-refs': z
618
+ .boolean()
619
+ .default(false)
620
+ .describe('Also return a durable `ref` per row — a locator handle you feed back ' +
621
+ 'to `click`/`type` --by-ref to act on THAT element even after the ' +
622
+ 'list mutates (fixes the .nth() index-drift footgun). Reuses a ' +
623
+ 'stable unique attribute (id/data-testid/…) when present, mints ' +
624
+ 'a namespaced data-webhands-ref only as a fallback. Off by default: ' +
625
+ 'the default query is a pure read and mutates nothing.'),
626
+ }),
627
+ output: z.object({
628
+ ok: z.literal(true),
629
+ verb: z.literal('query'),
630
+ rows: z
631
+ .array(z.object({
632
+ attrs: z.record(z.string(), z.string().nullable()).optional(),
633
+ props: z.record(z.string(), z.unknown()).optional(),
634
+ pw: z
635
+ .object({
636
+ visible: z.boolean().optional(),
637
+ bbox: z
638
+ .object({
639
+ x: z.number(),
640
+ y: z.number(),
641
+ width: z.number(),
642
+ height: z.number(),
643
+ })
644
+ .nullable()
645
+ .optional(),
646
+ })
647
+ .optional(),
648
+ ref: z
649
+ .string()
650
+ .optional()
651
+ .describe('A durable locator handle for this element (only with --with-refs); ' +
652
+ 'pass it to click/type --by-ref to act on it later.'),
653
+ }))
654
+ .describe('One row per matched element, each carrying the asked fields.'),
655
+ }),
656
+ async run(c) {
657
+ try {
658
+ return await withSession(provider, targetFrom(c.options), async (s) => {
659
+ const rows = await s.page.query(locator(c.args.locator), {
660
+ attrs: c.options.attr,
661
+ props: c.options.prop,
662
+ pw: c.options.pw,
663
+ ...(c.options.limit !== undefined ? { limit: c.options.limit } : {}),
664
+ ...(c.options['with-refs'] ? { refs: true } : {}),
665
+ });
666
+ return c.ok({ ok: true, verb: 'query', rows }, { cta: { commands: [nextSnapshot()] } });
667
+ });
668
+ }
669
+ catch (cause) {
670
+ return fail(c, cause, binary);
671
+ }
672
+ },
673
+ });
674
+ cli.command('count', {
675
+ description: 'Count how many elements a Playwright locator matches (a match-set size).',
676
+ args: z.object({
677
+ locator: z.string().describe('A raw Playwright locator expression.'),
678
+ }),
679
+ options: connectionOptions,
680
+ output: z.object({
681
+ ok: z.literal(true),
682
+ verb: z.literal('count'),
683
+ count: z.number().describe('How many elements matched.'),
684
+ }),
685
+ async run(c) {
686
+ try {
687
+ return await withSession(provider, targetFrom(c.options), async (s) => {
688
+ const count = await s.page.count(locator(c.args.locator));
689
+ return c.ok({ ok: true, verb: 'count', count }, { cta: { commands: [nextSnapshot()] } });
690
+ });
691
+ }
692
+ catch (cause) {
693
+ return fail(c, cause, binary);
694
+ }
695
+ },
696
+ });
697
+ cli.command('exists', {
698
+ description: 'Whether a Playwright locator matches at least one element (count > 0).',
699
+ args: z.object({
700
+ locator: z.string().describe('A raw Playwright locator expression.'),
701
+ }),
702
+ options: connectionOptions,
703
+ output: z.object({
704
+ ok: z.literal(true),
705
+ verb: z.literal('exists'),
706
+ exists: z.boolean().describe('Whether any element matched.'),
707
+ }),
708
+ async run(c) {
709
+ try {
710
+ return await withSession(provider, targetFrom(c.options), async (s) => {
711
+ const exists = await s.page.exists(locator(c.args.locator));
712
+ return c.ok({ ok: true, verb: 'exists', exists }, { cta: { commands: [nextSnapshot()] } });
713
+ });
714
+ }
715
+ catch (cause) {
716
+ return fail(c, cause, binary);
717
+ }
718
+ },
719
+ });
720
+ cli.command('is-visible', {
721
+ description: 'Whether the first match is actionability-grade visible (a present-but-hidden ' +
722
+ 'element reads false).',
723
+ args: z.object({
724
+ locator: z.string().describe('A raw Playwright locator expression.'),
725
+ }),
726
+ options: connectionOptions,
727
+ output: z.object({
728
+ ok: z.literal(true),
729
+ verb: z.literal('isVisible'),
730
+ visible: z.boolean().describe("The first match's visibility."),
731
+ }),
732
+ async run(c) {
733
+ try {
734
+ return await withSession(provider, targetFrom(c.options), async (s) => {
735
+ const visible = await s.page.isVisible(locator(c.args.locator));
736
+ return c.ok({ ok: true, verb: 'isVisible', visible }, { cta: { commands: [nextSnapshot()] } });
737
+ });
738
+ }
739
+ catch (cause) {
740
+ return fail(c, cause, binary);
741
+ }
742
+ },
743
+ });
744
+ cli.command('get-attribute', {
745
+ description: 'Read a single DOM attribute off the first match (null if absent or no match).',
746
+ args: z.object({
747
+ locator: z.string().describe('A raw Playwright locator expression.'),
748
+ }),
749
+ options: connectionOptions.extend({
750
+ name: z
751
+ .string()
752
+ .describe('The DOM attribute name to read (e.g. href, data-sitekey).'),
753
+ }),
754
+ output: z.object({
755
+ ok: z.literal(true),
756
+ verb: z.literal('getAttribute'),
757
+ name: z.string().describe('The attribute that was read.'),
758
+ value: z
759
+ .string()
760
+ .nullable()
761
+ .describe('The attribute value, or null if absent / no match.'),
762
+ }),
763
+ async run(c) {
764
+ try {
765
+ return await withSession(provider, targetFrom(c.options), async (s) => {
766
+ const value = await s.page.getAttribute(locator(c.args.locator), c.options.name);
767
+ return c.ok({
768
+ ok: true,
769
+ verb: 'getAttribute',
770
+ name: c.options.name,
771
+ value,
772
+ }, { cta: { commands: [nextSnapshot()] } });
773
+ });
774
+ }
775
+ catch (cause) {
776
+ return fail(c, cause, binary);
777
+ }
778
+ },
779
+ });
780
+ // --- Tier-2 rich input verbs: press / hover / select / scroll / drag (prd
781
+ // broaden-agent-verb-surface, stories 8-12, R5). Each is its own incur
782
+ // command, so one definition yields both the CLI command and the MCP tool.
783
+ // Positional-arg + small-flag, mirroring `click` (R5); `select`/`scroll` use
784
+ // loud "exactly one of" validation, mirroring `wait`. No --frame flag: frame
785
+ // scope rides IN the locator string (R1).
786
+ cli.command('press', {
787
+ description: 'Press a keyboard key or chord (e.g. Enter, ArrowLeft, w, Control+A) at a ' +
788
+ 'locator or, with no locator, the focused element.',
789
+ args: z.object({
790
+ key: z
791
+ .string()
792
+ .describe('A key or chord in Playwright grammar: a key name (Enter, ArrowLeft, ' +
793
+ 'a) or Modifier+Key (Control+A, Shift+Tab).'),
794
+ }),
795
+ options: connectionOptions.extend({
796
+ locator: z
797
+ .string()
798
+ .optional()
799
+ .describe('A raw Playwright locator expression to press the key at (focuses it ' +
800
+ 'first). Omit to press at the focused element.'),
801
+ }),
802
+ output: actionOutput.extend({ verb: z.literal('press') }),
803
+ async run(c) {
804
+ try {
805
+ return await withSession(provider, targetFrom(c.options), async (s) => {
806
+ const target = c.options.locator !== undefined && c.options.locator !== ''
807
+ ? locator(c.options.locator)
808
+ : undefined;
809
+ await s.page.press(c.args.key, target);
810
+ return c.ok({ ok: true, verb: 'press' }, { cta: { commands: [nextSnapshot()] } });
811
+ });
812
+ }
813
+ catch (cause) {
814
+ return fail(c, cause, binary);
815
+ }
816
+ },
817
+ });
818
+ cli.command('hover', {
819
+ description: 'Hover the pointer over the element a Playwright locator addresses ' +
820
+ '(reveal hover menus / on-hover controls).',
821
+ args: z.object({
822
+ locator: z.string().describe('A raw Playwright locator expression.'),
823
+ }),
824
+ options: connectionOptions,
825
+ output: actionOutput.extend({ verb: z.literal('hover') }),
826
+ async run(c) {
827
+ try {
828
+ return await withSession(provider, targetFrom(c.options), async (s) => {
829
+ await s.page.hover(locator(c.args.locator));
830
+ return c.ok({ ok: true, verb: 'hover' }, { cta: { commands: [nextSnapshot()] } });
831
+ });
832
+ }
833
+ catch (cause) {
834
+ return fail(c, cause, binary);
835
+ }
836
+ },
837
+ });
838
+ cli.command('select', {
839
+ description: 'Choose an option in the native <select> a Playwright locator addresses, ' +
840
+ 'by --value OR --label (exactly one).',
841
+ args: z.object({
842
+ locator: z
843
+ .string()
844
+ .describe('A raw Playwright locator expression for the <select>.'),
845
+ }),
846
+ options: connectionOptions.extend({
847
+ value: z
848
+ .string()
849
+ .optional()
850
+ .describe("Match the option's value attribute (value form)."),
851
+ label: z
852
+ .string()
853
+ .optional()
854
+ .describe("Match the option's visible label text (label form)."),
855
+ }),
856
+ output: actionOutput.extend({
857
+ verb: z.literal('select'),
858
+ by: z.enum(['value', 'label']),
859
+ }),
860
+ async run(c) {
861
+ const choice = selectChoiceFrom(c.options);
862
+ if (choice === undefined) {
863
+ return c.error({
864
+ code: 'invalid-select',
865
+ message: 'select needs exactly one of --value <v> or --label <l>.',
866
+ });
867
+ }
868
+ try {
869
+ return await withSession(provider, targetFrom(c.options), async (s) => {
870
+ await s.page.select(locator(c.args.locator), choice);
871
+ return c.ok({
872
+ ok: true,
873
+ verb: 'select',
874
+ by: 'value' in choice ? 'value' : 'label',
875
+ }, { cta: { commands: [nextSnapshot()] } });
876
+ });
877
+ }
878
+ catch (cause) {
879
+ return fail(c, cause, binary);
880
+ }
881
+ },
882
+ });
883
+ cli.command('scroll', {
884
+ description: 'Scroll the page, either --to a Playwright locator (bring it into view) ' +
885
+ 'or --by a dx,dy pixel delta (exactly one).',
886
+ options: connectionOptions.extend({
887
+ to: z
888
+ .string()
889
+ .optional()
890
+ .describe('A raw Playwright locator expression to scroll into view (to form).'),
891
+ by: z
892
+ .string()
893
+ .optional()
894
+ .describe('A dx,dy pixel delta to scroll by, e.g. 0,400 (down) or -100,0 (by form).'),
895
+ }),
896
+ output: actionOutput.extend({
897
+ verb: z.literal('scroll'),
898
+ form: z.enum(['to', 'by']),
899
+ }),
900
+ async run(c) {
901
+ const target = scrollTargetFrom(c.options);
902
+ if (target === undefined) {
903
+ return c.error({
904
+ code: 'invalid-scroll',
905
+ message: 'scroll needs exactly one of --to <locator> or --by <dx,dy> ' +
906
+ '(dx,dy two numbers, e.g. 0,400).',
907
+ });
908
+ }
909
+ try {
910
+ return await withSession(provider, targetFrom(c.options), async (s) => {
911
+ await s.page.scroll(target);
912
+ return c.ok({
913
+ ok: true,
914
+ verb: 'scroll',
915
+ form: 'to' in target ? 'to' : 'by',
916
+ }, { cta: { commands: [nextSnapshot()] } });
917
+ });
918
+ }
919
+ catch (cause) {
920
+ return fail(c, cause, binary);
921
+ }
922
+ },
923
+ });
924
+ cli.command('drag', {
925
+ description: 'Drag the element a source locator addresses onto the element a target ' +
926
+ 'locator addresses (drag-reorder UIs, drag-slider challenges).',
927
+ args: z.object({
928
+ source: z
929
+ .string()
930
+ .describe('A raw Playwright locator expression for the drag source.'),
931
+ target: z
932
+ .string()
933
+ .describe('A raw Playwright locator expression for the drop target.'),
934
+ }),
935
+ options: connectionOptions,
936
+ output: actionOutput.extend({ verb: z.literal('drag') }),
937
+ async run(c) {
938
+ try {
939
+ return await withSession(provider, targetFrom(c.options), async (s) => {
940
+ await s.page.drag(locator(c.args.source), locator(c.args.target));
941
+ return c.ok({ ok: true, verb: 'drag' }, { cta: { commands: [nextSnapshot()] } });
942
+ });
943
+ }
944
+ catch (cause) {
945
+ return fail(c, cause, binary);
946
+ }
947
+ },
948
+ });
949
+ // --- Tier-4 coordinate + screenshot verbs: mouse / screenshot (prd
950
+ // broaden-agent-verb-surface, R3/R5, stories 17-19). Each is its own incur
951
+ // command, so one definition yields both the CLI command and the MCP tool.
952
+ // The seam stays string/number-typed (ADR-0003 as amended by the Tier-4 ADR):
953
+ // `mouse` passes plain numbers + an enum, `screenshot` returns a file PATH
954
+ // (never image bytes). The MCP `screenshot` result surfaces that path as the
955
+ // attachment-capable `path` field an agent reads/attaches.
956
+ cli.command('mouse', {
957
+ description: 'Coordinate mouse input at VIEWPORT CSS-pixels (Playwright page.mouse, NOT ' +
958
+ 'OS screen coordinates): click / move / down / up at --x,--y. A pixel in a ' +
959
+ 'VIEWPORT screenshot maps directly to these coordinates (the look-then-click ' +
960
+ 'loop); a FULL-PAGE screenshot does NOT.',
961
+ options: connectionOptions.extend({
962
+ action: z
963
+ .enum(['click', 'move', 'down', 'up'])
964
+ .default('click')
965
+ .describe('What to do at the coordinate (default: click).'),
966
+ x: z.coerce.number().describe('Viewport CSS-pixel X (left-relative).'),
967
+ y: z.coerce.number().describe('Viewport CSS-pixel Y (top-relative).'),
968
+ button: z
969
+ .enum(['left', 'right', 'middle'])
970
+ .default('left')
971
+ .describe('Which button for click/down/up (default: left).'),
972
+ }),
973
+ output: actionOutput.extend({
974
+ verb: z.literal('mouse'),
975
+ action: z.enum(['click', 'move', 'down', 'up']),
976
+ x: z.number(),
977
+ y: z.number(),
978
+ }),
979
+ async run(c) {
980
+ try {
981
+ return await withSession(provider, targetFrom(c.options), async (s) => {
982
+ const input = {
983
+ action: c.options.action,
984
+ x: c.options.x,
985
+ y: c.options.y,
986
+ button: c.options.button,
987
+ };
988
+ await s.page.mouse(input);
989
+ return c.ok({
990
+ ok: true,
991
+ verb: 'mouse',
992
+ action: c.options.action,
993
+ x: c.options.x,
994
+ y: c.options.y,
995
+ }, { cta: { commands: [nextSnapshot()] } });
996
+ });
997
+ }
998
+ catch (cause) {
999
+ return fail(c, cause, binary);
1000
+ }
1001
+ },
1002
+ });
1003
+ cli.command('screenshot', {
1004
+ description: 'Capture the page to a PNG FILE and return its PATH (never image bytes): ' +
1005
+ '--scope viewport (default, coordinate-matched to mouse) | full (whole page, ' +
1006
+ 'NOT coordinate-matched) | element (clipped to --locator, REQUIRED for element). ' +
1007
+ '--out overrides the path (validated to stay under the managed dir).',
1008
+ options: connectionOptions.extend({
1009
+ scope: z
1010
+ .enum(['viewport', 'full', 'element'])
1011
+ .default('viewport')
1012
+ .describe('Region to capture: viewport (default) | full | element (needs --locator).'),
1013
+ locator: z
1014
+ .string()
1015
+ .optional()
1016
+ .describe('A raw Playwright locator expression to clip to (REQUIRED for --scope ' +
1017
+ 'element, rejected otherwise). Frame scope rides in the string.'),
1018
+ out: z
1019
+ .string()
1020
+ .optional()
1021
+ .describe('Override the output PNG path (validated to stay under the managed dir).'),
1022
+ }),
1023
+ output: z.object({
1024
+ ok: z.literal(true),
1025
+ verb: z.literal('screenshot'),
1026
+ // `path` is the attachment-capable field (R5): a plain file PATH an agent
1027
+ // reads / attaches; no image bytes ever cross the seam.
1028
+ path: z
1029
+ .string()
1030
+ .describe('The PNG file path (read/attach this; never bytes).'),
1031
+ width: z.number().describe('The PNG pixel width.'),
1032
+ height: z.number().describe('The PNG pixel height.'),
1033
+ }),
1034
+ async run(c) {
1035
+ const options = screenshotOptionsFrom(c.options);
1036
+ if (options === undefined) {
1037
+ return c.error({
1038
+ code: 'invalid-screenshot',
1039
+ message: 'screenshot --scope element requires --locator <expr>; --locator is ' +
1040
+ 'only valid with --scope element.',
1041
+ });
1042
+ }
1043
+ try {
1044
+ return await withSession(provider, targetFrom(c.options), async (s) => {
1045
+ const shot = await s.page.screenshot(options);
1046
+ return c.ok({
1047
+ ok: true,
1048
+ verb: 'screenshot',
1049
+ path: shot.path,
1050
+ width: shot.width,
1051
+ height: shot.height,
1052
+ }, { cta: { commands: [nextSnapshot()] } });
1053
+ });
1054
+ }
1055
+ catch (cause) {
1056
+ return fail(c, cause, binary);
1057
+ }
1058
+ },
1059
+ });
539
1060
  cli.command('wait', {
540
1061
  description: 'Pace actions by waiting for a timeout, a locator to appear, or the next navigation.',
541
1062
  options: connectionOptions.extend({
@@ -662,8 +1183,11 @@ async function defaultServeSession(target, home, launchPolicy = {}) {
662
1183
  ...(launchPolicy.noViewport !== undefined
663
1184
  ? { noViewport: launchPolicy.noViewport }
664
1185
  : {}),
1186
+ ...(launchPolicy.proxy !== undefined ? { proxy: launchPolicy.proxy } : {}),
665
1187
  });
666
- const attach = new PlaywrightAttachTransport();
1188
+ // attach reuses the user's browser, but the managed screenshots dir still
1189
+ // honours the home-root override so a test isolates screenshot output.
1190
+ const attach = new PlaywrightAttachTransport([], home);
667
1191
  const transport = {
668
1192
  open(t) {
669
1193
  return t.mode === 'attach' ? attach.open(t) : launch.open(t);
@@ -687,6 +1211,80 @@ function waitConditionFrom(options) {
687
1211
  forms.push({ kind: 'navigation' });
688
1212
  return forms.length === 1 ? forms[0] : undefined;
689
1213
  }
1214
+ /**
1215
+ * Turn the `select` option forms into the seam's {@link SelectChoice}, or
1216
+ * `undefined` if zero or both of `--value` / `--label` were given (the command
1217
+ * reports that as a clear error). Mirrors `wait`'s loud "exactly one of"
1218
+ * validation (R5): an empty string counts as absent, so `--value ''` is treated
1219
+ * as not given.
1220
+ */
1221
+ function selectChoiceFrom(options) {
1222
+ const forms = [];
1223
+ if (options.value !== undefined)
1224
+ forms.push({ value: options.value });
1225
+ if (options.label !== undefined)
1226
+ forms.push({ label: options.label });
1227
+ return forms.length === 1 ? forms[0] : undefined;
1228
+ }
1229
+ /**
1230
+ * Turn the `scroll` option forms into the seam's {@link ScrollTarget}, or
1231
+ * `undefined` if zero or both of `--to` / `--by` were given OR `--by` is not a
1232
+ * valid `dx,dy` pair (the command reports that as a clear error). Mirrors
1233
+ * `wait`'s loud "exactly one of" validation (R5). `--by` is parsed here (two
1234
+ * comma-separated finite numbers) so a malformed delta fails loud rather than
1235
+ * silently scrolling by `NaN`.
1236
+ */
1237
+ function scrollTargetFrom(options) {
1238
+ const forms = [];
1239
+ if (options.to !== undefined && options.to !== '') {
1240
+ forms.push({ to: locator(options.to) });
1241
+ }
1242
+ if (options.by !== undefined && options.by !== '') {
1243
+ const by = parseDelta(options.by);
1244
+ if (by === undefined)
1245
+ return undefined;
1246
+ forms.push({ by });
1247
+ }
1248
+ return forms.length === 1 ? forms[0] : undefined;
1249
+ }
1250
+ /**
1251
+ * Turn the `screenshot` option flags into the seam's {@link ScreenshotOptions},
1252
+ * or `undefined` when the scope/locator pairing is invalid (the command reports
1253
+ * that as a clear error, mirroring `wait`'s loud validation, R5): `--scope
1254
+ * element` REQUIRES `--locator`, and `--locator` is ONLY valid with `--scope
1255
+ * element`. An empty `--locator`/`--out` string counts as absent. The seam
1256
+ * re-validates as the load-bearing check (an untyped RPC client too), so this is
1257
+ * the friendly fail-fast at the CLI edge.
1258
+ */
1259
+ function screenshotOptionsFrom(options) {
1260
+ const hasLocator = options.locator !== undefined && options.locator !== '';
1261
+ if (options.scope === 'element' && !hasLocator)
1262
+ return undefined;
1263
+ if (options.scope !== 'element' && hasLocator)
1264
+ return undefined;
1265
+ return {
1266
+ scope: options.scope,
1267
+ ...(hasLocator ? { locator: locator(options.locator) } : {}),
1268
+ ...(options.out !== undefined && options.out !== ''
1269
+ ? { out: options.out }
1270
+ : {}),
1271
+ };
1272
+ }
1273
+ /**
1274
+ * Parse a `dx,dy` pixel-delta string into a `{dx, dy}` pair, or `undefined` if
1275
+ * it is not exactly two comma-separated finite numbers. Used by `scroll --by`
1276
+ * so a malformed delta fails loud instead of scrolling by `NaN`.
1277
+ */
1278
+ function parseDelta(raw) {
1279
+ const parts = raw.split(',');
1280
+ if (parts.length !== 2)
1281
+ return undefined;
1282
+ const dx = Number(parts[0].trim());
1283
+ const dy = Number(parts[1].trim());
1284
+ if (!Number.isFinite(dx) || !Number.isFinite(dy))
1285
+ return undefined;
1286
+ return { dx, dy };
1287
+ }
690
1288
  /**
691
1289
  * The shared failure path. Map a typed `core` error to its user-facing message
692
1290
  * + exact fix command (PRD story 17); fall back to a generic error otherwise.