@midscene/computer 1.8.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/lib/index.js CHANGED
@@ -47,9 +47,10 @@ __webpack_require__.d(__webpack_exports__, {
47
47
  RDPDevice: ()=>RDPDevice,
48
48
  agentFromComputer: ()=>agentFromComputer,
49
49
  checkAccessibilityPermission: ()=>checkAccessibilityPermission,
50
+ checkScreenRecordingPermission: ()=>checkScreenRecordingPermission,
50
51
  checkXvfbInstalled: ()=>checkXvfbInstalled,
51
- overrideAIConfig: ()=>env_namespaceObject.overrideAIConfig,
52
52
  getConnectedDisplays: ()=>getConnectedDisplays,
53
+ overrideAIConfig: ()=>env_namespaceObject.overrideAIConfig,
53
54
  agentForRDPComputer: ()=>agentForRDPComputer,
54
55
  ComputerDevice: ()=>ComputerDevice,
55
56
  ComputerAgent: ()=>ComputerAgent
@@ -61,7 +62,6 @@ const external_node_fs_namespaceObject = require("node:fs");
61
62
  const external_node_module_namespaceObject = require("node:module");
62
63
  const external_node_path_namespaceObject = require("node:path");
63
64
  const external_node_url_namespaceObject = require("node:url");
64
- const core_namespaceObject = require("@midscene/core");
65
65
  const device_namespaceObject = require("@midscene/core/device");
66
66
  const utils_namespaceObject = require("@midscene/core/utils");
67
67
  const img_namespaceObject = require("@midscene/shared/img");
@@ -145,15 +145,6 @@ function _define_property(obj, key, value) {
145
145
  else obj[key] = value;
146
146
  return obj;
147
147
  }
148
- const computerInputParamSchema = core_namespaceObject.z.object({
149
- value: core_namespaceObject.z.string().describe('The text to input'),
150
- mode: core_namespaceObject.z["enum"]([
151
- 'replace',
152
- 'clear',
153
- 'append'
154
- ]).default('replace').optional().describe('Input mode: replace, clear, or append'),
155
- locate: (0, core_namespaceObject.getMidsceneLocationSchema)().describe('The input field to be filled').optional()
156
- });
157
148
  const SMOOTH_MOVE_STEPS_TAP = 8;
158
149
  const SMOOTH_MOVE_STEPS_MOUSE_MOVE = 10;
159
150
  const SMOOTH_MOVE_DELAY_TAP = 8;
@@ -464,7 +455,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
464
455
  }
465
456
  async healthCheck() {
466
457
  console.log('[HealthCheck] Starting health check...');
467
- console.log("[HealthCheck] @midscene/computer v1.8.0");
458
+ console.log("[HealthCheck] @midscene/computer v1.8.1");
468
459
  console.log('[HealthCheck] Taking screenshot...');
469
460
  const screenshotTimeout = 15000;
470
461
  let timeoutId;
@@ -530,21 +521,38 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
530
521
  debugDevice('Taking screenshot', {
531
522
  displayId: this.displayId
532
523
  });
533
- try {
534
- const options = {
535
- format: 'png'
536
- };
537
- if (void 0 !== this.displayId) if ('darwin' === process.platform) {
538
- const screenIndex = Number(this.displayId);
539
- if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
540
- } else options.screen = this.displayId;
541
- debugDevice('Screenshot options', options);
524
+ const options = {
525
+ format: 'png'
526
+ };
527
+ if (void 0 !== this.displayId) if ('darwin' === process.platform) {
528
+ const screenIndex = Number(this.displayId);
529
+ if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
530
+ } else options.screen = this.displayId;
531
+ debugDevice('Screenshot options', options);
532
+ const MAX_ATTEMPTS = 3;
533
+ const RETRY_DELAY_MS = 300;
534
+ let lastRawMessage = '';
535
+ for(let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++)try {
542
536
  const buffer = await external_screenshot_desktop_default()(options);
537
+ if (attempt > 1) debugDevice(`Screenshot succeeded on attempt ${attempt}`);
543
538
  return (0, img_namespaceObject.createImgBase64ByFormat)('png', buffer.toString('base64'));
544
539
  } catch (error) {
545
- debugDevice(`Screenshot failed: ${error}`);
546
- throw new Error(`Failed to take screenshot: ${error}`);
540
+ lastRawMessage = error instanceof Error ? error.message : String(error);
541
+ const isMacTransient = 'darwin' === process.platform && /could not create image from display/i.test(lastRawMessage);
542
+ const willRetry = isMacTransient && attempt < MAX_ATTEMPTS;
543
+ debugDevice(`Screenshot attempt ${attempt} failed: ${lastRawMessage}${willRetry ? ' — retrying' : ''}`);
544
+ if (!willRetry) break;
545
+ await (0, utils_namespaceObject.sleep)(RETRY_DELAY_MS);
547
546
  }
547
+ if ('darwin' === process.platform && /could not create image from display/i.test(lastRawMessage)) throw new Error(`Failed to take screenshot on macOS: the host process is missing Screen Recording permission, or the target display is locked/sleeping.
548
+
549
+ Please follow these steps:
550
+ 1. Open System Settings > Privacy & Security > Screen Recording
551
+ 2. Enable the application running this script (e.g., Terminal, iTerm2, VS Code, WebStorm, or Midscene Studio)
552
+ 3. Fully quit and relaunch that application after granting permission — macOS only re-reads this permission on process launch.
553
+
554
+ Original error: ${lastRawMessage}`);
555
+ throw new Error(`Failed to take screenshot: ${lastRawMessage}`);
548
556
  }
549
557
  async size() {
550
558
  external_node_assert_default()(device_libnut, 'libnut not initialized');
@@ -590,228 +598,111 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
590
598
  external_node_assert_default()(device_libnut, 'libnut not initialized');
591
599
  await this.typeViaClipboard(text);
592
600
  }
601
+ async selectAllAndDelete() {
602
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
603
+ if (this.useAppleScript) {
604
+ sendKeyViaAppleScript('a', [
605
+ 'command'
606
+ ]);
607
+ await (0, utils_namespaceObject.sleep)(50);
608
+ sendKeyViaAppleScript('backspace', []);
609
+ return;
610
+ }
611
+ const modifier = 'darwin' === process.platform ? 'command' : 'control';
612
+ device_libnut.keyTap('a', [
613
+ modifier
614
+ ]);
615
+ await (0, utils_namespaceObject.sleep)(50);
616
+ device_libnut.keyTap('backspace');
617
+ }
618
+ async pressKeyboardShortcut(keyName) {
619
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
620
+ const keys = keyName.split('+');
621
+ const modifiers = keys.slice(0, -1).map(normalizeKeyName);
622
+ const key = normalizePrimaryKey(keys[keys.length - 1]);
623
+ debugDevice('KeyboardPress', {
624
+ original: keyName,
625
+ key,
626
+ modifiers,
627
+ driver: this.useAppleScript ? "applescript" : 'libnut'
628
+ });
629
+ if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
630
+ else if (modifiers.length > 0) device_libnut.keyTap(key, modifiers);
631
+ else device_libnut.keyTap(key);
632
+ }
633
+ async performScroll(param) {
634
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
635
+ if (param.locate) {
636
+ const element = param.locate;
637
+ const [x, y] = element.center;
638
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
639
+ }
640
+ const scrollType = param?.scrollType;
641
+ const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
642
+ if (edgeSpec) {
643
+ if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
644
+ if (this.useAppleScript) {
645
+ sendKeyViaAppleScript(edgeSpec.key);
646
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
647
+ return;
648
+ }
649
+ const [dx, dy] = edgeSpec.libnut;
650
+ for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
651
+ device_libnut.scrollMouse(dx, dy);
652
+ await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
653
+ }
654
+ return;
655
+ }
656
+ if ('singleAction' === scrollType || !scrollType) {
657
+ const distance = param?.distance || 500;
658
+ const direction = param?.direction || 'down';
659
+ const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
660
+ if (isKnownDirection) {
661
+ const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
662
+ if (runPhasedScroll(direction, distance, steps)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
663
+ }
664
+ if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
665
+ const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
666
+ const key = 'up' === direction ? 'pageup' : 'pagedown';
667
+ for(let i = 0; i < pages; i++){
668
+ sendKeyViaAppleScript(key);
669
+ await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
670
+ }
671
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
672
+ return;
673
+ }
674
+ const ticks = Math.ceil(distance / 100);
675
+ const directionMap = {
676
+ up: [
677
+ 0,
678
+ ticks
679
+ ],
680
+ down: [
681
+ 0,
682
+ -ticks
683
+ ],
684
+ left: [
685
+ -ticks,
686
+ 0
687
+ ],
688
+ right: [
689
+ ticks,
690
+ 0
691
+ ]
692
+ };
693
+ const [dx, dy] = directionMap[direction] || [
694
+ 0,
695
+ -ticks
696
+ ];
697
+ device_libnut.scrollMouse(dx, dy);
698
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
699
+ return;
700
+ }
701
+ throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
702
+ }
593
703
  actionSpace() {
594
704
  const defaultActions = [
595
- (0, device_namespaceObject.defineActionTap)(async (param)=>{
596
- external_node_assert_default()(device_libnut, 'libnut not initialized');
597
- const element = param.locate;
598
- external_node_assert_default()(element, 'Element not found, cannot tap');
599
- const [x, y] = element.center;
600
- const targetX = Math.round(x);
601
- const targetY = Math.round(y);
602
- await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
603
- device_libnut.mouseToggle('down', 'left');
604
- await (0, utils_namespaceObject.sleep)(CLICK_HOLD_DURATION);
605
- device_libnut.mouseToggle('up', 'left');
606
- }),
607
- (0, device_namespaceObject.defineActionDoubleClick)(async (param)=>{
608
- external_node_assert_default()(device_libnut, 'libnut not initialized');
609
- const element = param.locate;
610
- external_node_assert_default()(element, 'Element not found, cannot double click');
611
- const [x, y] = element.center;
612
- device_libnut.moveMouse(Math.round(x), Math.round(y));
613
- device_libnut.mouseClick('left', true);
614
- }),
615
- (0, device_namespaceObject.defineActionRightClick)(async (param)=>{
616
- external_node_assert_default()(device_libnut, 'libnut not initialized');
617
- const element = param.locate;
618
- external_node_assert_default()(element, 'Element not found, cannot right click');
619
- const [x, y] = element.center;
620
- device_libnut.moveMouse(Math.round(x), Math.round(y));
621
- device_libnut.mouseClick('right');
622
- }),
623
- (0, device_namespaceObject.defineAction)({
624
- name: 'MouseMove',
625
- description: 'Move the mouse to the element',
626
- interfaceAlias: 'aiHover',
627
- paramSchema: device_namespaceObject.actionHoverParamSchema,
628
- sample: {
629
- locate: {
630
- prompt: 'the navigation menu item "Products"'
631
- }
632
- },
633
- call: async (param)=>{
634
- external_node_assert_default()(device_libnut, 'libnut not initialized');
635
- const element = param.locate;
636
- external_node_assert_default()(element, 'Element not found, cannot move mouse');
637
- const [x, y] = element.center;
638
- const targetX = Math.round(x);
639
- const targetY = Math.round(y);
640
- await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
641
- await (0, utils_namespaceObject.sleep)(MOUSE_MOVE_EFFECT_WAIT);
642
- }
643
- }),
644
- (0, device_namespaceObject.defineAction)({
645
- name: 'Input',
646
- description: 'Input text into the input field',
647
- interfaceAlias: 'aiInput',
648
- paramSchema: computerInputParamSchema,
649
- sample: {
650
- value: 'test@example.com',
651
- locate: {
652
- prompt: 'the email input field'
653
- }
654
- },
655
- call: async (param)=>{
656
- external_node_assert_default()(device_libnut, 'libnut not initialized');
657
- const element = param.locate;
658
- if (element) {
659
- const [x, y] = element.center;
660
- device_libnut.moveMouse(Math.round(x), Math.round(y));
661
- device_libnut.mouseClick('left');
662
- await (0, utils_namespaceObject.sleep)(INPUT_FOCUS_DELAY);
663
- if ('append' !== param.mode) {
664
- if (this.useAppleScript) {
665
- sendKeyViaAppleScript('a', [
666
- 'command'
667
- ]);
668
- await (0, utils_namespaceObject.sleep)(50);
669
- sendKeyViaAppleScript('backspace', []);
670
- } else {
671
- const modifier = 'darwin' === process.platform ? 'command' : 'control';
672
- device_libnut.keyTap('a', [
673
- modifier
674
- ]);
675
- await (0, utils_namespaceObject.sleep)(50);
676
- device_libnut.keyTap('backspace');
677
- }
678
- await (0, utils_namespaceObject.sleep)(INPUT_CLEAR_DELAY);
679
- }
680
- }
681
- if ('clear' === param.mode) return;
682
- if (!param.value) return;
683
- await this.smartTypeString(param.value);
684
- }
685
- }),
686
- (0, device_namespaceObject.defineActionScroll)(async (param)=>{
687
- external_node_assert_default()(device_libnut, 'libnut not initialized');
688
- if (param.locate) {
689
- const element = param.locate;
690
- const [x, y] = element.center;
691
- device_libnut.moveMouse(Math.round(x), Math.round(y));
692
- }
693
- const scrollType = param?.scrollType;
694
- const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
695
- if (edgeSpec) {
696
- if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
697
- if (this.useAppleScript) {
698
- sendKeyViaAppleScript(edgeSpec.key);
699
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
700
- return;
701
- }
702
- const [dx, dy] = edgeSpec.libnut;
703
- for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
704
- device_libnut.scrollMouse(dx, dy);
705
- await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
706
- }
707
- return;
708
- }
709
- if ('singleAction' === scrollType || !scrollType) {
710
- const distance = param?.distance || 500;
711
- const direction = param?.direction || 'down';
712
- const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
713
- if (isKnownDirection) {
714
- const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
715
- if (runPhasedScroll(direction, distance, steps)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
716
- }
717
- if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
718
- const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
719
- const key = 'up' === direction ? 'pageup' : 'pagedown';
720
- for(let i = 0; i < pages; i++){
721
- sendKeyViaAppleScript(key);
722
- await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
723
- }
724
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
725
- return;
726
- }
727
- const ticks = Math.ceil(distance / 100);
728
- const directionMap = {
729
- up: [
730
- 0,
731
- ticks
732
- ],
733
- down: [
734
- 0,
735
- -ticks
736
- ],
737
- left: [
738
- -ticks,
739
- 0
740
- ],
741
- right: [
742
- ticks,
743
- 0
744
- ]
745
- };
746
- const [dx, dy] = directionMap[direction] || [
747
- 0,
748
- -ticks
749
- ];
750
- device_libnut.scrollMouse(dx, dy);
751
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
752
- return;
753
- }
754
- throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
755
- }),
756
- (0, device_namespaceObject.defineActionKeyboardPress)(async (param)=>{
757
- external_node_assert_default()(device_libnut, 'libnut not initialized');
758
- if (param.locate) {
759
- const [x, y] = param.locate.center;
760
- device_libnut.moveMouse(Math.round(x), Math.round(y));
761
- device_libnut.mouseClick('left');
762
- await (0, utils_namespaceObject.sleep)(50);
763
- }
764
- const keys = param.keyName.split('+');
765
- const modifiers = keys.slice(0, -1).map(normalizeKeyName);
766
- const key = normalizePrimaryKey(keys[keys.length - 1]);
767
- debugDevice('KeyboardPress', {
768
- original: param.keyName,
769
- key,
770
- modifiers,
771
- driver: this.useAppleScript ? "applescript" : 'libnut'
772
- });
773
- if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
774
- else if (modifiers.length > 0) device_libnut.keyTap(key, modifiers);
775
- else device_libnut.keyTap(key);
776
- }),
777
- (0, device_namespaceObject.defineActionDragAndDrop)(async (param)=>{
778
- external_node_assert_default()(device_libnut, 'libnut not initialized');
779
- const from = param.from;
780
- const to = param.to;
781
- external_node_assert_default()(from, 'missing "from" param for drag and drop');
782
- external_node_assert_default()(to, 'missing "to" param for drag and drop');
783
- const [fromX, fromY] = from.center;
784
- const [toX, toY] = to.center;
785
- device_libnut.moveMouse(Math.round(fromX), Math.round(fromY));
786
- device_libnut.mouseToggle('down', 'left');
787
- await (0, utils_namespaceObject.sleep)(100);
788
- device_libnut.moveMouse(Math.round(toX), Math.round(toY));
789
- await (0, utils_namespaceObject.sleep)(100);
790
- device_libnut.mouseToggle('up', 'left');
791
- }),
792
- (0, device_namespaceObject.defineActionClearInput)(async (param)=>{
793
- external_node_assert_default()(device_libnut, 'libnut not initialized');
794
- const element = param.locate;
795
- external_node_assert_default()(element, 'Element not found, cannot clear input');
796
- const [x, y] = element.center;
797
- device_libnut.moveMouse(Math.round(x), Math.round(y));
798
- device_libnut.mouseClick('left');
799
- await (0, utils_namespaceObject.sleep)(100);
800
- if (this.useAppleScript) {
801
- sendKeyViaAppleScript('a', [
802
- 'command'
803
- ]);
804
- await (0, utils_namespaceObject.sleep)(50);
805
- sendKeyViaAppleScript('backspace', []);
806
- } else {
807
- const modifier = 'darwin' === process.platform ? 'command' : 'control';
808
- device_libnut.keyTap('a', [
809
- modifier
810
- ]);
811
- device_libnut.keyTap('backspace');
812
- }
813
- await (0, utils_namespaceObject.sleep)(50);
814
- })
705
+ ...(0, device_namespaceObject.defineActionsFromInputPrimitives)(this.inputPrimitives)
815
706
  ];
816
707
  const platformActions = Object.values(createPlatformActions());
817
708
  const customActions = this.options?.customActions || [];
@@ -849,6 +740,88 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
849
740
  _define_property(this, "xvfbCleanup", void 0);
850
741
  _define_property(this, "useAppleScript", void 0);
851
742
  _define_property(this, "uri", void 0);
743
+ _define_property(this, "inputPrimitives", {
744
+ pointer: {
745
+ tap: async ({ x, y })=>{
746
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
747
+ const targetX = Math.round(x);
748
+ const targetY = Math.round(y);
749
+ await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
750
+ device_libnut.mouseToggle('down', 'left');
751
+ await (0, utils_namespaceObject.sleep)(CLICK_HOLD_DURATION);
752
+ device_libnut.mouseToggle('up', 'left');
753
+ },
754
+ doubleClick: async ({ x, y })=>{
755
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
756
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
757
+ device_libnut.mouseClick('left', true);
758
+ },
759
+ rightClick: async ({ x, y })=>{
760
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
761
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
762
+ device_libnut.mouseClick('right');
763
+ },
764
+ hover: async ({ x, y })=>{
765
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
766
+ await smoothMoveMouse(Math.round(x), Math.round(y), SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
767
+ await (0, utils_namespaceObject.sleep)(MOUSE_MOVE_EFFECT_WAIT);
768
+ },
769
+ dragAndDrop: async (from, to)=>{
770
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
771
+ device_libnut.moveMouse(Math.round(from.x), Math.round(from.y));
772
+ device_libnut.mouseToggle('down', 'left');
773
+ await (0, utils_namespaceObject.sleep)(100);
774
+ device_libnut.moveMouse(Math.round(to.x), Math.round(to.y));
775
+ await (0, utils_namespaceObject.sleep)(100);
776
+ device_libnut.mouseToggle('up', 'left');
777
+ }
778
+ },
779
+ keyboard: {
780
+ typeText: async (value, opts)=>{
781
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
782
+ const element = opts?.target;
783
+ if (element) {
784
+ const [x, y] = element.center;
785
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
786
+ device_libnut.mouseClick('left');
787
+ await (0, utils_namespaceObject.sleep)(INPUT_FOCUS_DELAY);
788
+ if (opts?.replace !== false) {
789
+ await this.selectAllAndDelete();
790
+ await (0, utils_namespaceObject.sleep)(INPUT_CLEAR_DELAY);
791
+ }
792
+ }
793
+ await this.smartTypeString(value);
794
+ },
795
+ keyboardPress: async (keyName, opts)=>{
796
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
797
+ const target = opts?.target;
798
+ if (target) {
799
+ const [x, y] = target.center;
800
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
801
+ device_libnut.mouseClick('left');
802
+ await (0, utils_namespaceObject.sleep)(50);
803
+ }
804
+ await this.pressKeyboardShortcut(keyName);
805
+ },
806
+ clearInput: async (target)=>{
807
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
808
+ if (target) {
809
+ const element = target;
810
+ const [x, y] = element.center;
811
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
812
+ device_libnut.mouseClick('left');
813
+ await (0, utils_namespaceObject.sleep)(100);
814
+ }
815
+ await this.selectAllAndDelete();
816
+ await (0, utils_namespaceObject.sleep)(50);
817
+ }
818
+ },
819
+ scroll: {
820
+ scroll: async (param)=>{
821
+ await this.performScroll(param);
822
+ }
823
+ }
824
+ });
852
825
  this.options = options;
853
826
  this.displayId = options?.displayId;
854
827
  this.useAppleScript = 'darwin' === process.platform && options?.keyboardDriver !== 'libnut';
@@ -1269,132 +1242,7 @@ class RDPDevice {
1269
1242
  }
1270
1243
  actionSpace() {
1271
1244
  const defaultActions = [
1272
- (0, device_namespaceObject.defineActionTap)(async ({ locate })=>{
1273
- const element = this.requireLocate(locate, 'tap');
1274
- await this.moveToElement(element, {
1275
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1276
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1277
- });
1278
- await this.backend.mouseButton('left', 'down');
1279
- await (0, utils_namespaceObject.sleep)(device_CLICK_HOLD_DURATION);
1280
- await this.backend.mouseButton('left', 'up');
1281
- }),
1282
- (0, device_namespaceObject.defineActionDoubleClick)(async ({ locate })=>{
1283
- const element = this.requireLocate(locate, 'double click');
1284
- await this.moveToElement(element, {
1285
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1286
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1287
- });
1288
- await this.backend.mouseButton('left', 'doubleClick');
1289
- }),
1290
- (0, device_namespaceObject.defineActionRightClick)(async ({ locate })=>{
1291
- const element = this.requireLocate(locate, 'right click');
1292
- await this.moveToElement(element, {
1293
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1294
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1295
- });
1296
- await this.backend.mouseButton('right', 'click');
1297
- }),
1298
- (0, device_namespaceObject.defineActionHover)(async ({ locate })=>{
1299
- const element = this.requireLocate(locate, 'hover');
1300
- await this.moveToElement(element, {
1301
- steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1302
- stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
1303
- settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
1304
- });
1305
- }),
1306
- (0, device_namespaceObject.defineActionInput)(async (param)=>{
1307
- this.assertConnected();
1308
- if (param.locate) {
1309
- await this.moveToElement(param.locate, {
1310
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1311
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1312
- });
1313
- await this.backend.mouseButton('left', 'click');
1314
- await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1315
- }
1316
- if ('typeOnly' !== param.mode) {
1317
- await this.clearInput();
1318
- await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1319
- }
1320
- if ('clear' === param.mode) return;
1321
- if (param.value) await this.backend.typeText(param.value);
1322
- }),
1323
- (0, device_namespaceObject.defineActionClearInput)(async ({ locate })=>{
1324
- this.assertConnected();
1325
- if (locate) {
1326
- await this.moveToElement(locate, {
1327
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1328
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1329
- });
1330
- await this.backend.mouseButton('left', 'click');
1331
- await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1332
- }
1333
- await this.clearInput();
1334
- await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1335
- }),
1336
- (0, device_namespaceObject.defineActionKeyboardPress)(async ({ locate, keyName })=>{
1337
- this.assertConnected();
1338
- if (locate) {
1339
- await this.moveToElement(locate, {
1340
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1341
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1342
- });
1343
- await this.backend.mouseButton('left', 'click');
1344
- }
1345
- await this.backend.keyPress(keyName);
1346
- }),
1347
- (0, device_namespaceObject.defineActionScroll)(async (param)=>{
1348
- this.assertConnected();
1349
- const target = param.locate;
1350
- if (target) await this.moveToElement(target, {
1351
- steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1352
- stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
1353
- });
1354
- if (param.scrollType && 'singleAction' !== param.scrollType) {
1355
- const direction = this.edgeScrollDirection(param.scrollType);
1356
- for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1357
- await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1358
- return;
1359
- }
1360
- await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1361
- await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1362
- }),
1363
- (0, device_namespaceObject.defineActionDragAndDrop)(async ({ from, to })=>{
1364
- this.assertConnected();
1365
- const source = this.requireLocate(from, 'drag source');
1366
- const target = this.requireLocate(to, 'drag target');
1367
- await this.moveToElement(source, {
1368
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1369
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1370
- });
1371
- await this.backend.mouseButton('left', 'down');
1372
- await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1373
- await this.moveToElement(target, {
1374
- steps: SMOOTH_MOVE_STEPS_DRAG,
1375
- stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
1376
- });
1377
- await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1378
- await this.backend.mouseButton('left', 'up');
1379
- }),
1380
- (0, device_namespaceObject.defineAction)({
1381
- name: 'MiddleClick',
1382
- description: 'Middle click the element',
1383
- sample: {
1384
- locate: {
1385
- prompt: 'the browser tab close target'
1386
- }
1387
- },
1388
- paramSchema: device_namespaceObject.actionTapParamSchema,
1389
- call: async ({ locate })=>{
1390
- const element = this.requireLocate(locate, 'middle click');
1391
- await this.moveToElement(element, {
1392
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1393
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1394
- });
1395
- await this.backend.mouseButton('middle', 'click');
1396
- }
1397
- }),
1245
+ ...(0, device_namespaceObject.defineActionsFromInputPrimitives)(this.inputPrimitives),
1398
1246
  (0, device_namespaceObject.defineAction)({
1399
1247
  name: 'ListDisplays',
1400
1248
  description: 'List all available displays/monitors',
@@ -1423,10 +1271,6 @@ class RDPDevice {
1423
1271
  throwIfDestroyed() {
1424
1272
  if (this.destroyed) throw new Error('RDPDevice has been destroyed');
1425
1273
  }
1426
- requireLocate(locate, actionName) {
1427
- if (!locate) throw new Error(`Missing target element for ${actionName}`);
1428
- return locate;
1429
- }
1430
1274
  async moveToElement(element, options) {
1431
1275
  this.assertConnected();
1432
1276
  const targetX = Math.round(element.center[0]);
@@ -1492,6 +1336,113 @@ class RDPDevice {
1492
1336
  device_define_property(this, "destroyed", false);
1493
1337
  device_define_property(this, "cursorPosition", void 0);
1494
1338
  device_define_property(this, "uri", void 0);
1339
+ device_define_property(this, "inputPrimitives", {
1340
+ pointer: {
1341
+ tap: async ({ x, y })=>{
1342
+ await this.movePointer(Math.round(x), Math.round(y), {
1343
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1344
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1345
+ });
1346
+ await this.backend.mouseButton('left', 'down');
1347
+ await (0, utils_namespaceObject.sleep)(device_CLICK_HOLD_DURATION);
1348
+ await this.backend.mouseButton('left', 'up');
1349
+ },
1350
+ doubleClick: async ({ x, y })=>{
1351
+ await this.movePointer(Math.round(x), Math.round(y), {
1352
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1353
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1354
+ });
1355
+ await this.backend.mouseButton('left', 'doubleClick');
1356
+ },
1357
+ rightClick: async ({ x, y })=>{
1358
+ await this.movePointer(Math.round(x), Math.round(y), {
1359
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1360
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1361
+ });
1362
+ await this.backend.mouseButton('right', 'click');
1363
+ },
1364
+ hover: async ({ x, y })=>{
1365
+ await this.movePointer(Math.round(x), Math.round(y), {
1366
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1367
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
1368
+ settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
1369
+ });
1370
+ },
1371
+ dragAndDrop: async (from, to)=>{
1372
+ await this.movePointer(Math.round(from.x), Math.round(from.y), {
1373
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1374
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1375
+ });
1376
+ await this.backend.mouseButton('left', 'down');
1377
+ await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1378
+ await this.movePointer(Math.round(to.x), Math.round(to.y), {
1379
+ steps: SMOOTH_MOVE_STEPS_DRAG,
1380
+ stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
1381
+ });
1382
+ await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1383
+ await this.backend.mouseButton('left', 'up');
1384
+ }
1385
+ },
1386
+ keyboard: {
1387
+ typeText: async (value, opts)=>{
1388
+ this.assertConnected();
1389
+ const target = opts?.target;
1390
+ if (target) {
1391
+ await this.inputPrimitives.pointer.tap({
1392
+ x: target.center[0],
1393
+ y: target.center[1]
1394
+ });
1395
+ await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1396
+ }
1397
+ if (opts?.replace !== false) {
1398
+ await this.clearInput();
1399
+ await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1400
+ }
1401
+ if (opts?.focusOnly || !value) return;
1402
+ await this.backend.typeText(value);
1403
+ },
1404
+ clearInput: async (target)=>{
1405
+ this.assertConnected();
1406
+ const element = target;
1407
+ if (element) {
1408
+ await this.inputPrimitives.pointer.tap({
1409
+ x: element.center[0],
1410
+ y: element.center[1]
1411
+ });
1412
+ await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1413
+ }
1414
+ await this.clearInput();
1415
+ await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1416
+ },
1417
+ keyboardPress: async (keyName, opts)=>{
1418
+ this.assertConnected();
1419
+ const target = opts?.target;
1420
+ if (target) await this.inputPrimitives.pointer.tap({
1421
+ x: target.center[0],
1422
+ y: target.center[1]
1423
+ });
1424
+ await this.backend.keyPress(keyName);
1425
+ }
1426
+ },
1427
+ scroll: {
1428
+ scroll: async (param)=>{
1429
+ this.assertConnected();
1430
+ const target = param.locate;
1431
+ if (target) await this.moveToElement(target, {
1432
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1433
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
1434
+ });
1435
+ if (param.scrollType && 'singleAction' !== param.scrollType) {
1436
+ const direction = this.edgeScrollDirection(param.scrollType);
1437
+ for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1438
+ await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1439
+ return;
1440
+ }
1441
+ await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1442
+ await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1443
+ }
1444
+ }
1445
+ });
1495
1446
  this.options = {
1496
1447
  port: 3389,
1497
1448
  securityProtocol: 'auto',
@@ -1539,6 +1490,7 @@ async function agentForRDPComputer(opts) {
1539
1490
  await device.connect();
1540
1491
  return new ComputerAgent(device, opts);
1541
1492
  }
1493
+ const core_namespaceObject = require("@midscene/core");
1542
1494
  const base_tools_namespaceObject = require("@midscene/shared/mcp/base-tools");
1543
1495
  function mcp_tools_define_property(obj, key, value) {
1544
1496
  if (key in obj) Object.defineProperty(obj, key, {
@@ -1551,10 +1503,61 @@ function mcp_tools_define_property(obj, key, value) {
1551
1503
  return obj;
1552
1504
  }
1553
1505
  const mcp_tools_debug = (0, logger_namespaceObject.getDebug)('mcp:computer-tools');
1506
+ const RDP_SECURITY_PROTOCOLS = [
1507
+ 'auto',
1508
+ 'tls',
1509
+ 'nla',
1510
+ 'rdp'
1511
+ ];
1554
1512
  const computerInitArgShape = {
1555
- displayId: core_namespaceObject.z.string().optional().describe('Display ID (from computer_list_displays)'),
1556
- headless: core_namespaceObject.z.boolean().optional().describe('Start virtual display via Xvfb (Linux only)')
1513
+ displayId: core_namespaceObject.z.string().optional().describe('Display ID for local mode (from computer_list_displays). Ignored when host is set.'),
1514
+ headless: core_namespaceObject.z.boolean().optional().describe('Start virtual display via Xvfb (Linux local mode only). Ignored when host is set.'),
1515
+ host: core_namespaceObject.z.string().optional().describe('RDP host (FQDN or IP). Set this to switch into RDP mode.'),
1516
+ port: core_namespaceObject.z.number().optional().describe('RDP port (default 3389). Requires host.'),
1517
+ username: core_namespaceObject.z.string().optional().describe('RDP username. Requires host.'),
1518
+ password: core_namespaceObject.z.string().optional().describe('RDP password. Requires host. Prefer setting via environment or a secrets manager.'),
1519
+ domain: core_namespaceObject.z.string().optional().describe('RDP domain. Requires host.'),
1520
+ adminSession: core_namespaceObject.z.boolean().optional().describe('Attach to the RDP admin/console session. Requires host.'),
1521
+ ignoreCertificate: core_namespaceObject.z.boolean().optional().describe('Skip TLS certificate validation. Requires host.'),
1522
+ securityProtocol: core_namespaceObject.z["enum"](RDP_SECURITY_PROTOCOLS).optional().describe('RDP security protocol negotiation (default auto). Requires host.'),
1523
+ desktopWidth: core_namespaceObject.z.number().optional().describe('Remote desktop width in pixels. Requires host.'),
1524
+ desktopHeight: core_namespaceObject.z.number().optional().describe('Remote desktop height in pixels. Requires host.')
1557
1525
  };
1526
+ function adaptComputerInitArgs(extracted) {
1527
+ if (!extracted || 0 === Object.keys(extracted).length) return;
1528
+ if (extracted.host) {
1529
+ const { displayId: _d, headless: _h, ...rdpFields } = extracted;
1530
+ return {
1531
+ mode: 'rdp',
1532
+ ...rdpFields,
1533
+ host: extracted.host
1534
+ };
1535
+ }
1536
+ return {
1537
+ mode: 'local',
1538
+ displayId: extracted.displayId,
1539
+ headless: extracted.headless
1540
+ };
1541
+ }
1542
+ function shouldRetargetAgent(opts) {
1543
+ if (!opts) return false;
1544
+ if ('rdp' === opts.mode) return true;
1545
+ return void 0 !== opts.displayId || void 0 !== opts.headless;
1546
+ }
1547
+ function describeConnectTarget(opts) {
1548
+ if (opts?.mode === 'rdp') {
1549
+ const portSuffix = opts.port ? `:${opts.port}` : '';
1550
+ const userSuffix = opts.username ? ` as ${opts.username}` : '';
1551
+ return ` via RDP (${opts.host}${portSuffix}${userSuffix})`;
1552
+ }
1553
+ if (opts?.mode === 'local' && opts.displayId) return ` (Display: ${opts.displayId})`;
1554
+ return ' (Primary display)';
1555
+ }
1556
+ function getCliReportSessionTarget(opts) {
1557
+ if (opts?.mode === 'rdp') return `rdp:${opts.host}`;
1558
+ if (opts?.mode === 'local' && opts.displayId) return opts.displayId;
1559
+ return 'primary';
1560
+ }
1558
1561
  class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools {
1559
1562
  getCliReportSessionName() {
1560
1563
  return 'midscene-computer';
@@ -1563,9 +1566,7 @@ class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools
1563
1566
  return new ComputerDevice({});
1564
1567
  }
1565
1568
  async ensureAgent(opts) {
1566
- const displayId = opts?.displayId;
1567
- const headless = opts?.headless;
1568
- if (this.agent && (void 0 !== displayId || void 0 !== headless)) {
1569
+ if (this.agent && shouldRetargetAgent(opts)) {
1569
1570
  try {
1570
1571
  await this.agent.destroy?.();
1571
1572
  } catch (error) {
@@ -1574,8 +1575,20 @@ class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools
1574
1575
  this.agent = void 0;
1575
1576
  }
1576
1577
  if (this.agent) return this.agent;
1577
- mcp_tools_debug('Creating Computer agent with displayId:', displayId || 'primary');
1578
1578
  const reportOptions = this.readCliReportAgentOptions();
1579
+ if (opts?.mode === 'rdp') {
1580
+ mcp_tools_debug('Creating RDP Computer agent for host:', opts.host);
1581
+ const { mode: _mode, ...rdpFields } = opts;
1582
+ const agent = await agentForRDPComputer({
1583
+ ...rdpFields,
1584
+ ...reportOptions ?? {}
1585
+ });
1586
+ this.agent = agent;
1587
+ return agent;
1588
+ }
1589
+ const displayId = opts?.mode === 'local' ? opts.displayId : void 0;
1590
+ const headless = opts?.mode === 'local' ? opts.headless : void 0;
1591
+ mcp_tools_debug('Creating Computer agent with displayId:', displayId || 'primary');
1579
1592
  const agentOpts = {
1580
1593
  ...displayId ? {
1581
1594
  displayId
@@ -1593,12 +1606,12 @@ class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools
1593
1606
  return [
1594
1607
  {
1595
1608
  name: 'computer_connect',
1596
- description: 'Connect to computer desktop. Provide displayId to connect to a specific display (use computer_list_displays to get available IDs). If not provided, uses the primary display.',
1609
+ description: "Connect to a computer desktop. Default (local) mode controls the local machine; pass displayId to target a specific local display (see computer_list_displays). Pass host to switch to RDP mode and connect to a remote Windows desktop via the RDP helper binary. RDP-related options (port/username/password/domain/securityProtocol/ignoreCertificate/adminSession/desktopWidth/desktopHeight) only take effect when host is set.",
1597
1610
  schema: this.getAgentInitArgSchema(),
1598
1611
  cli: this.getAgentInitArgCliMetadata(),
1599
1612
  handler: async (args)=>{
1600
1613
  const initArgs = this.extractAgentInitParam(args);
1601
- const reportSession = this.createNewCliReportSession(initArgs?.displayId ?? 'primary');
1614
+ const reportSession = this.createNewCliReportSession(getCliReportSessionTarget(initArgs));
1602
1615
  this.commitCliReportSession(reportSession);
1603
1616
  if (this.agent) {
1604
1617
  try {
@@ -1614,7 +1627,7 @@ class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools
1614
1627
  content: [
1615
1628
  {
1616
1629
  type: 'text',
1617
- text: `Connected to computer${initArgs?.displayId ? ` (Display: ${initArgs.displayId})` : ' (Primary display)'}`
1630
+ text: `Connected to computer${describeConnectTarget(initArgs)}`
1618
1631
  },
1619
1632
  ...this.buildScreenshotContent(screenshot)
1620
1633
  ]
@@ -1652,32 +1665,44 @@ class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools
1652
1665
  cli: {
1653
1666
  preferBareKeys: true
1654
1667
  },
1655
- adapt: (extracted)=>extracted
1668
+ adapt: (extracted)=>adaptComputerInitArgs(extracted)
1656
1669
  });
1657
1670
  }
1658
1671
  }
1659
1672
  const env_namespaceObject = require("@midscene/shared/env");
1660
1673
  function version() {
1661
- const currentVersion = "1.8.0";
1674
+ const currentVersion = "1.8.1";
1662
1675
  console.log(`@midscene/computer v${currentVersion}`);
1663
1676
  return currentVersion;
1664
1677
  }
1678
+ function loadMacPermissions() {
1679
+ if ('darwin' !== process.platform) return {
1680
+ permissions: null
1681
+ };
1682
+ try {
1683
+ const dynamicRequire = (0, external_node_module_namespaceObject.createRequire)(__rslib_import_meta_url__);
1684
+ return {
1685
+ permissions: dynamicRequire('node-mac-permissions')
1686
+ };
1687
+ } catch (error) {
1688
+ return {
1689
+ permissions: null,
1690
+ loadError: error instanceof Error ? error.message : String(error)
1691
+ };
1692
+ }
1693
+ }
1665
1694
  function checkAccessibilityPermission(promptIfNeeded = false) {
1666
1695
  if ('darwin' !== process.platform) return {
1667
1696
  hasPermission: true,
1668
1697
  platform: process.platform
1669
1698
  };
1670
1699
  try {
1671
- let permissions;
1672
- try {
1673
- const dynamicRequire = (0, external_node_module_namespaceObject.createRequire)(__rslib_import_meta_url__);
1674
- permissions = dynamicRequire('node-mac-permissions');
1675
- } catch {
1676
- return {
1677
- hasPermission: true,
1678
- platform: process.platform
1679
- };
1680
- }
1700
+ const { permissions, loadError } = loadMacPermissions();
1701
+ if (!permissions) return {
1702
+ hasPermission: false,
1703
+ platform: process.platform,
1704
+ error: `Cannot verify macOS Accessibility permission: node-mac-permissions is unavailable${loadError ? ` (${loadError})` : ''}. The native module may need to be rebuilt for the current Node/Electron ABI.`
1705
+ };
1681
1706
  const status = permissions.getAuthStatus('accessibility');
1682
1707
  if ('authorized' === status) return {
1683
1708
  hasPermission: true,
@@ -1697,6 +1722,37 @@ function checkAccessibilityPermission(promptIfNeeded = false) {
1697
1722
  };
1698
1723
  }
1699
1724
  }
1725
+ function checkScreenRecordingPermission(promptIfNeeded = false) {
1726
+ if ('darwin' !== process.platform) return {
1727
+ hasPermission: true,
1728
+ platform: process.platform
1729
+ };
1730
+ try {
1731
+ const { permissions, loadError } = loadMacPermissions();
1732
+ if (!permissions) return {
1733
+ hasPermission: false,
1734
+ platform: process.platform,
1735
+ error: `Cannot verify macOS Screen Recording permission: node-mac-permissions is unavailable${loadError ? ` (${loadError})` : ''}. The native module may need to be rebuilt for the current Node/Electron ABI.`
1736
+ };
1737
+ const status = permissions.getAuthStatus('screen');
1738
+ if ('authorized' === status) return {
1739
+ hasPermission: true,
1740
+ platform: process.platform
1741
+ };
1742
+ if (promptIfNeeded) permissions.askForScreenCaptureAccess(true);
1743
+ return {
1744
+ hasPermission: false,
1745
+ platform: process.platform,
1746
+ error: `macOS Screen Recording permission is required (current status: ${status}).\n\nPlease follow these steps:\n1. Open System Settings > Privacy & Security > Screen Recording\n2. Enable the application running this script (e.g., Terminal, iTerm2, VS Code, WebStorm, or Midscene Studio)\n3. Fully quit and relaunch that application after granting permission — macOS only re-reads this permission on process launch.`
1747
+ };
1748
+ } catch (error) {
1749
+ return {
1750
+ hasPermission: false,
1751
+ platform: process.platform,
1752
+ error: `Failed to check screen recording permission: ${error instanceof Error ? error.message : String(error)}`
1753
+ };
1754
+ }
1755
+ }
1700
1756
  async function checkComputerEnvironment() {
1701
1757
  try {
1702
1758
  const libnutModule = await import("@computer-use/libnut/dist/import_libnut.js");
@@ -1738,6 +1794,7 @@ exports.agentForRDPComputer = __webpack_exports__.agentForRDPComputer;
1738
1794
  exports.agentFromComputer = __webpack_exports__.agentFromComputer;
1739
1795
  exports.checkAccessibilityPermission = __webpack_exports__.checkAccessibilityPermission;
1740
1796
  exports.checkComputerEnvironment = __webpack_exports__.checkComputerEnvironment;
1797
+ exports.checkScreenRecordingPermission = __webpack_exports__.checkScreenRecordingPermission;
1741
1798
  exports.checkXvfbInstalled = __webpack_exports__.checkXvfbInstalled;
1742
1799
  exports.createDefaultRDPBackendClient = __webpack_exports__.createDefaultRDPBackendClient;
1743
1800
  exports.getConnectedDisplays = __webpack_exports__.getConnectedDisplays;
@@ -1756,6 +1813,7 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
1756
1813
  "agentFromComputer",
1757
1814
  "checkAccessibilityPermission",
1758
1815
  "checkComputerEnvironment",
1816
+ "checkScreenRecordingPermission",
1759
1817
  "checkXvfbInstalled",
1760
1818
  "createDefaultRDPBackendClient",
1761
1819
  "getConnectedDisplays",