@midscene/computer 1.8.1-beta-20260513084557.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/lib/index.js CHANGED
@@ -47,9 +47,10 @@ __webpack_require__.d(__webpack_exports__, {
47
47
  RDPDevice: ()=>RDPDevice,
48
48
  agentFromComputer: ()=>agentFromComputer,
49
49
  checkAccessibilityPermission: ()=>checkAccessibilityPermission,
50
+ checkScreenRecordingPermission: ()=>checkScreenRecordingPermission,
50
51
  checkXvfbInstalled: ()=>checkXvfbInstalled,
51
- overrideAIConfig: ()=>env_namespaceObject.overrideAIConfig,
52
52
  getConnectedDisplays: ()=>getConnectedDisplays,
53
+ overrideAIConfig: ()=>env_namespaceObject.overrideAIConfig,
53
54
  agentForRDPComputer: ()=>agentForRDPComputer,
54
55
  ComputerDevice: ()=>ComputerDevice,
55
56
  ComputerAgent: ()=>ComputerAgent
@@ -61,7 +62,6 @@ const external_node_fs_namespaceObject = require("node:fs");
61
62
  const external_node_module_namespaceObject = require("node:module");
62
63
  const external_node_path_namespaceObject = require("node:path");
63
64
  const external_node_url_namespaceObject = require("node:url");
64
- const core_namespaceObject = require("@midscene/core");
65
65
  const device_namespaceObject = require("@midscene/core/device");
66
66
  const utils_namespaceObject = require("@midscene/core/utils");
67
67
  const img_namespaceObject = require("@midscene/shared/img");
@@ -145,15 +145,6 @@ function _define_property(obj, key, value) {
145
145
  else obj[key] = value;
146
146
  return obj;
147
147
  }
148
- const computerInputParamSchema = core_namespaceObject.z.object({
149
- value: core_namespaceObject.z.string().describe('The text to input'),
150
- mode: core_namespaceObject.z["enum"]([
151
- 'replace',
152
- 'clear',
153
- 'append'
154
- ]).default('replace').optional().describe('Input mode: replace, clear, or append'),
155
- locate: (0, core_namespaceObject.getMidsceneLocationSchema)().describe('The input field to be filled').optional()
156
- });
157
148
  const SMOOTH_MOVE_STEPS_TAP = 8;
158
149
  const SMOOTH_MOVE_STEPS_MOUSE_MOVE = 10;
159
150
  const SMOOTH_MOVE_DELAY_TAP = 8;
@@ -464,7 +455,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
464
455
  }
465
456
  async healthCheck() {
466
457
  console.log('[HealthCheck] Starting health check...');
467
- console.log("[HealthCheck] @midscene/computer v1.8.1-beta-20260513084557.0");
458
+ console.log("[HealthCheck] @midscene/computer v1.8.1");
468
459
  console.log('[HealthCheck] Taking screenshot...');
469
460
  const screenshotTimeout = 15000;
470
461
  let timeoutId;
@@ -530,21 +521,38 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
530
521
  debugDevice('Taking screenshot', {
531
522
  displayId: this.displayId
532
523
  });
533
- try {
534
- const options = {
535
- format: 'png'
536
- };
537
- if (void 0 !== this.displayId) if ('darwin' === process.platform) {
538
- const screenIndex = Number(this.displayId);
539
- if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
540
- } else options.screen = this.displayId;
541
- debugDevice('Screenshot options', options);
524
+ const options = {
525
+ format: 'png'
526
+ };
527
+ if (void 0 !== this.displayId) if ('darwin' === process.platform) {
528
+ const screenIndex = Number(this.displayId);
529
+ if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
530
+ } else options.screen = this.displayId;
531
+ debugDevice('Screenshot options', options);
532
+ const MAX_ATTEMPTS = 3;
533
+ const RETRY_DELAY_MS = 300;
534
+ let lastRawMessage = '';
535
+ for(let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++)try {
542
536
  const buffer = await external_screenshot_desktop_default()(options);
537
+ if (attempt > 1) debugDevice(`Screenshot succeeded on attempt ${attempt}`);
543
538
  return (0, img_namespaceObject.createImgBase64ByFormat)('png', buffer.toString('base64'));
544
539
  } catch (error) {
545
- debugDevice(`Screenshot failed: ${error}`);
546
- throw new Error(`Failed to take screenshot: ${error}`);
540
+ lastRawMessage = error instanceof Error ? error.message : String(error);
541
+ const isMacTransient = 'darwin' === process.platform && /could not create image from display/i.test(lastRawMessage);
542
+ const willRetry = isMacTransient && attempt < MAX_ATTEMPTS;
543
+ debugDevice(`Screenshot attempt ${attempt} failed: ${lastRawMessage}${willRetry ? ' — retrying' : ''}`);
544
+ if (!willRetry) break;
545
+ await (0, utils_namespaceObject.sleep)(RETRY_DELAY_MS);
547
546
  }
547
+ if ('darwin' === process.platform && /could not create image from display/i.test(lastRawMessage)) throw new Error(`Failed to take screenshot on macOS: the host process is missing Screen Recording permission, or the target display is locked/sleeping.
548
+
549
+ Please follow these steps:
550
+ 1. Open System Settings > Privacy & Security > Screen Recording
551
+ 2. Enable the application running this script (e.g., Terminal, iTerm2, VS Code, WebStorm, or Midscene Studio)
552
+ 3. Fully quit and relaunch that application after granting permission — macOS only re-reads this permission on process launch.
553
+
554
+ Original error: ${lastRawMessage}`);
555
+ throw new Error(`Failed to take screenshot: ${lastRawMessage}`);
548
556
  }
549
557
  async size() {
550
558
  external_node_assert_default()(device_libnut, 'libnut not initialized');
@@ -590,228 +598,111 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
590
598
  external_node_assert_default()(device_libnut, 'libnut not initialized');
591
599
  await this.typeViaClipboard(text);
592
600
  }
601
+ async selectAllAndDelete() {
602
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
603
+ if (this.useAppleScript) {
604
+ sendKeyViaAppleScript('a', [
605
+ 'command'
606
+ ]);
607
+ await (0, utils_namespaceObject.sleep)(50);
608
+ sendKeyViaAppleScript('backspace', []);
609
+ return;
610
+ }
611
+ const modifier = 'darwin' === process.platform ? 'command' : 'control';
612
+ device_libnut.keyTap('a', [
613
+ modifier
614
+ ]);
615
+ await (0, utils_namespaceObject.sleep)(50);
616
+ device_libnut.keyTap('backspace');
617
+ }
618
+ async pressKeyboardShortcut(keyName) {
619
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
620
+ const keys = keyName.split('+');
621
+ const modifiers = keys.slice(0, -1).map(normalizeKeyName);
622
+ const key = normalizePrimaryKey(keys[keys.length - 1]);
623
+ debugDevice('KeyboardPress', {
624
+ original: keyName,
625
+ key,
626
+ modifiers,
627
+ driver: this.useAppleScript ? "applescript" : 'libnut'
628
+ });
629
+ if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
630
+ else if (modifiers.length > 0) device_libnut.keyTap(key, modifiers);
631
+ else device_libnut.keyTap(key);
632
+ }
633
+ async performScroll(param) {
634
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
635
+ if (param.locate) {
636
+ const element = param.locate;
637
+ const [x, y] = element.center;
638
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
639
+ }
640
+ const scrollType = param?.scrollType;
641
+ const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
642
+ if (edgeSpec) {
643
+ if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
644
+ if (this.useAppleScript) {
645
+ sendKeyViaAppleScript(edgeSpec.key);
646
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
647
+ return;
648
+ }
649
+ const [dx, dy] = edgeSpec.libnut;
650
+ for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
651
+ device_libnut.scrollMouse(dx, dy);
652
+ await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
653
+ }
654
+ return;
655
+ }
656
+ if ('singleAction' === scrollType || !scrollType) {
657
+ const distance = param?.distance || 500;
658
+ const direction = param?.direction || 'down';
659
+ const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
660
+ if (isKnownDirection) {
661
+ const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
662
+ if (runPhasedScroll(direction, distance, steps)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
663
+ }
664
+ if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
665
+ const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
666
+ const key = 'up' === direction ? 'pageup' : 'pagedown';
667
+ for(let i = 0; i < pages; i++){
668
+ sendKeyViaAppleScript(key);
669
+ await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
670
+ }
671
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
672
+ return;
673
+ }
674
+ const ticks = Math.ceil(distance / 100);
675
+ const directionMap = {
676
+ up: [
677
+ 0,
678
+ ticks
679
+ ],
680
+ down: [
681
+ 0,
682
+ -ticks
683
+ ],
684
+ left: [
685
+ -ticks,
686
+ 0
687
+ ],
688
+ right: [
689
+ ticks,
690
+ 0
691
+ ]
692
+ };
693
+ const [dx, dy] = directionMap[direction] || [
694
+ 0,
695
+ -ticks
696
+ ];
697
+ device_libnut.scrollMouse(dx, dy);
698
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
699
+ return;
700
+ }
701
+ throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
702
+ }
593
703
  actionSpace() {
594
704
  const defaultActions = [
595
- (0, device_namespaceObject.defineActionTap)(async (param)=>{
596
- external_node_assert_default()(device_libnut, 'libnut not initialized');
597
- const element = param.locate;
598
- external_node_assert_default()(element, 'Element not found, cannot tap');
599
- const [x, y] = element.center;
600
- const targetX = Math.round(x);
601
- const targetY = Math.round(y);
602
- await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
603
- device_libnut.mouseToggle('down', 'left');
604
- await (0, utils_namespaceObject.sleep)(CLICK_HOLD_DURATION);
605
- device_libnut.mouseToggle('up', 'left');
606
- }),
607
- (0, device_namespaceObject.defineActionDoubleClick)(async (param)=>{
608
- external_node_assert_default()(device_libnut, 'libnut not initialized');
609
- const element = param.locate;
610
- external_node_assert_default()(element, 'Element not found, cannot double click');
611
- const [x, y] = element.center;
612
- device_libnut.moveMouse(Math.round(x), Math.round(y));
613
- device_libnut.mouseClick('left', true);
614
- }),
615
- (0, device_namespaceObject.defineActionRightClick)(async (param)=>{
616
- external_node_assert_default()(device_libnut, 'libnut not initialized');
617
- const element = param.locate;
618
- external_node_assert_default()(element, 'Element not found, cannot right click');
619
- const [x, y] = element.center;
620
- device_libnut.moveMouse(Math.round(x), Math.round(y));
621
- device_libnut.mouseClick('right');
622
- }),
623
- (0, device_namespaceObject.defineAction)({
624
- name: 'MouseMove',
625
- description: 'Move the mouse to the element',
626
- interfaceAlias: 'aiHover',
627
- paramSchema: device_namespaceObject.actionHoverParamSchema,
628
- sample: {
629
- locate: {
630
- prompt: 'the navigation menu item "Products"'
631
- }
632
- },
633
- call: async (param)=>{
634
- external_node_assert_default()(device_libnut, 'libnut not initialized');
635
- const element = param.locate;
636
- external_node_assert_default()(element, 'Element not found, cannot move mouse');
637
- const [x, y] = element.center;
638
- const targetX = Math.round(x);
639
- const targetY = Math.round(y);
640
- await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
641
- await (0, utils_namespaceObject.sleep)(MOUSE_MOVE_EFFECT_WAIT);
642
- }
643
- }),
644
- (0, device_namespaceObject.defineAction)({
645
- name: 'Input',
646
- description: 'Input text into the input field',
647
- interfaceAlias: 'aiInput',
648
- paramSchema: computerInputParamSchema,
649
- sample: {
650
- value: 'test@example.com',
651
- locate: {
652
- prompt: 'the email input field'
653
- }
654
- },
655
- call: async (param)=>{
656
- external_node_assert_default()(device_libnut, 'libnut not initialized');
657
- const element = param.locate;
658
- if (element) {
659
- const [x, y] = element.center;
660
- device_libnut.moveMouse(Math.round(x), Math.round(y));
661
- device_libnut.mouseClick('left');
662
- await (0, utils_namespaceObject.sleep)(INPUT_FOCUS_DELAY);
663
- if ('append' !== param.mode) {
664
- if (this.useAppleScript) {
665
- sendKeyViaAppleScript('a', [
666
- 'command'
667
- ]);
668
- await (0, utils_namespaceObject.sleep)(50);
669
- sendKeyViaAppleScript('backspace', []);
670
- } else {
671
- const modifier = 'darwin' === process.platform ? 'command' : 'control';
672
- device_libnut.keyTap('a', [
673
- modifier
674
- ]);
675
- await (0, utils_namespaceObject.sleep)(50);
676
- device_libnut.keyTap('backspace');
677
- }
678
- await (0, utils_namespaceObject.sleep)(INPUT_CLEAR_DELAY);
679
- }
680
- }
681
- if ('clear' === param.mode) return;
682
- if (!param.value) return;
683
- await this.smartTypeString(param.value);
684
- }
685
- }),
686
- (0, device_namespaceObject.defineActionScroll)(async (param)=>{
687
- external_node_assert_default()(device_libnut, 'libnut not initialized');
688
- if (param.locate) {
689
- const element = param.locate;
690
- const [x, y] = element.center;
691
- device_libnut.moveMouse(Math.round(x), Math.round(y));
692
- }
693
- const scrollType = param?.scrollType;
694
- const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
695
- if (edgeSpec) {
696
- if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
697
- if (this.useAppleScript) {
698
- sendKeyViaAppleScript(edgeSpec.key);
699
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
700
- return;
701
- }
702
- const [dx, dy] = edgeSpec.libnut;
703
- for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
704
- device_libnut.scrollMouse(dx, dy);
705
- await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
706
- }
707
- return;
708
- }
709
- if ('singleAction' === scrollType || !scrollType) {
710
- const distance = param?.distance || 500;
711
- const direction = param?.direction || 'down';
712
- const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
713
- if (isKnownDirection) {
714
- const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
715
- if (runPhasedScroll(direction, distance, steps)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
716
- }
717
- if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
718
- const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
719
- const key = 'up' === direction ? 'pageup' : 'pagedown';
720
- for(let i = 0; i < pages; i++){
721
- sendKeyViaAppleScript(key);
722
- await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
723
- }
724
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
725
- return;
726
- }
727
- const ticks = Math.ceil(distance / 100);
728
- const directionMap = {
729
- up: [
730
- 0,
731
- ticks
732
- ],
733
- down: [
734
- 0,
735
- -ticks
736
- ],
737
- left: [
738
- -ticks,
739
- 0
740
- ],
741
- right: [
742
- ticks,
743
- 0
744
- ]
745
- };
746
- const [dx, dy] = directionMap[direction] || [
747
- 0,
748
- -ticks
749
- ];
750
- device_libnut.scrollMouse(dx, dy);
751
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
752
- return;
753
- }
754
- throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
755
- }),
756
- (0, device_namespaceObject.defineActionKeyboardPress)(async (param)=>{
757
- external_node_assert_default()(device_libnut, 'libnut not initialized');
758
- if (param.locate) {
759
- const [x, y] = param.locate.center;
760
- device_libnut.moveMouse(Math.round(x), Math.round(y));
761
- device_libnut.mouseClick('left');
762
- await (0, utils_namespaceObject.sleep)(50);
763
- }
764
- const keys = param.keyName.split('+');
765
- const modifiers = keys.slice(0, -1).map(normalizeKeyName);
766
- const key = normalizePrimaryKey(keys[keys.length - 1]);
767
- debugDevice('KeyboardPress', {
768
- original: param.keyName,
769
- key,
770
- modifiers,
771
- driver: this.useAppleScript ? "applescript" : 'libnut'
772
- });
773
- if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
774
- else if (modifiers.length > 0) device_libnut.keyTap(key, modifiers);
775
- else device_libnut.keyTap(key);
776
- }),
777
- (0, device_namespaceObject.defineActionDragAndDrop)(async (param)=>{
778
- external_node_assert_default()(device_libnut, 'libnut not initialized');
779
- const from = param.from;
780
- const to = param.to;
781
- external_node_assert_default()(from, 'missing "from" param for drag and drop');
782
- external_node_assert_default()(to, 'missing "to" param for drag and drop');
783
- const [fromX, fromY] = from.center;
784
- const [toX, toY] = to.center;
785
- device_libnut.moveMouse(Math.round(fromX), Math.round(fromY));
786
- device_libnut.mouseToggle('down', 'left');
787
- await (0, utils_namespaceObject.sleep)(100);
788
- device_libnut.moveMouse(Math.round(toX), Math.round(toY));
789
- await (0, utils_namespaceObject.sleep)(100);
790
- device_libnut.mouseToggle('up', 'left');
791
- }),
792
- (0, device_namespaceObject.defineActionClearInput)(async (param)=>{
793
- external_node_assert_default()(device_libnut, 'libnut not initialized');
794
- const element = param.locate;
795
- external_node_assert_default()(element, 'Element not found, cannot clear input');
796
- const [x, y] = element.center;
797
- device_libnut.moveMouse(Math.round(x), Math.round(y));
798
- device_libnut.mouseClick('left');
799
- await (0, utils_namespaceObject.sleep)(100);
800
- if (this.useAppleScript) {
801
- sendKeyViaAppleScript('a', [
802
- 'command'
803
- ]);
804
- await (0, utils_namespaceObject.sleep)(50);
805
- sendKeyViaAppleScript('backspace', []);
806
- } else {
807
- const modifier = 'darwin' === process.platform ? 'command' : 'control';
808
- device_libnut.keyTap('a', [
809
- modifier
810
- ]);
811
- device_libnut.keyTap('backspace');
812
- }
813
- await (0, utils_namespaceObject.sleep)(50);
814
- })
705
+ ...(0, device_namespaceObject.defineActionsFromInputPrimitives)(this.inputPrimitives)
815
706
  ];
816
707
  const platformActions = Object.values(createPlatformActions());
817
708
  const customActions = this.options?.customActions || [];
@@ -849,6 +740,88 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
849
740
  _define_property(this, "xvfbCleanup", void 0);
850
741
  _define_property(this, "useAppleScript", void 0);
851
742
  _define_property(this, "uri", void 0);
743
+ _define_property(this, "inputPrimitives", {
744
+ pointer: {
745
+ tap: async ({ x, y })=>{
746
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
747
+ const targetX = Math.round(x);
748
+ const targetY = Math.round(y);
749
+ await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
750
+ device_libnut.mouseToggle('down', 'left');
751
+ await (0, utils_namespaceObject.sleep)(CLICK_HOLD_DURATION);
752
+ device_libnut.mouseToggle('up', 'left');
753
+ },
754
+ doubleClick: async ({ x, y })=>{
755
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
756
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
757
+ device_libnut.mouseClick('left', true);
758
+ },
759
+ rightClick: async ({ x, y })=>{
760
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
761
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
762
+ device_libnut.mouseClick('right');
763
+ },
764
+ hover: async ({ x, y })=>{
765
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
766
+ await smoothMoveMouse(Math.round(x), Math.round(y), SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
767
+ await (0, utils_namespaceObject.sleep)(MOUSE_MOVE_EFFECT_WAIT);
768
+ },
769
+ dragAndDrop: async (from, to)=>{
770
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
771
+ device_libnut.moveMouse(Math.round(from.x), Math.round(from.y));
772
+ device_libnut.mouseToggle('down', 'left');
773
+ await (0, utils_namespaceObject.sleep)(100);
774
+ device_libnut.moveMouse(Math.round(to.x), Math.round(to.y));
775
+ await (0, utils_namespaceObject.sleep)(100);
776
+ device_libnut.mouseToggle('up', 'left');
777
+ }
778
+ },
779
+ keyboard: {
780
+ typeText: async (value, opts)=>{
781
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
782
+ const element = opts?.target;
783
+ if (element) {
784
+ const [x, y] = element.center;
785
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
786
+ device_libnut.mouseClick('left');
787
+ await (0, utils_namespaceObject.sleep)(INPUT_FOCUS_DELAY);
788
+ if (opts?.replace !== false) {
789
+ await this.selectAllAndDelete();
790
+ await (0, utils_namespaceObject.sleep)(INPUT_CLEAR_DELAY);
791
+ }
792
+ }
793
+ await this.smartTypeString(value);
794
+ },
795
+ keyboardPress: async (keyName, opts)=>{
796
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
797
+ const target = opts?.target;
798
+ if (target) {
799
+ const [x, y] = target.center;
800
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
801
+ device_libnut.mouseClick('left');
802
+ await (0, utils_namespaceObject.sleep)(50);
803
+ }
804
+ await this.pressKeyboardShortcut(keyName);
805
+ },
806
+ clearInput: async (target)=>{
807
+ external_node_assert_default()(device_libnut, 'libnut not initialized');
808
+ if (target) {
809
+ const element = target;
810
+ const [x, y] = element.center;
811
+ device_libnut.moveMouse(Math.round(x), Math.round(y));
812
+ device_libnut.mouseClick('left');
813
+ await (0, utils_namespaceObject.sleep)(100);
814
+ }
815
+ await this.selectAllAndDelete();
816
+ await (0, utils_namespaceObject.sleep)(50);
817
+ }
818
+ },
819
+ scroll: {
820
+ scroll: async (param)=>{
821
+ await this.performScroll(param);
822
+ }
823
+ }
824
+ });
852
825
  this.options = options;
853
826
  this.displayId = options?.displayId;
854
827
  this.useAppleScript = 'darwin' === process.platform && options?.keyboardDriver !== 'libnut';
@@ -1269,132 +1242,7 @@ class RDPDevice {
1269
1242
  }
1270
1243
  actionSpace() {
1271
1244
  const defaultActions = [
1272
- (0, device_namespaceObject.defineActionTap)(async ({ locate })=>{
1273
- const element = this.requireLocate(locate, 'tap');
1274
- await this.moveToElement(element, {
1275
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1276
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1277
- });
1278
- await this.backend.mouseButton('left', 'down');
1279
- await (0, utils_namespaceObject.sleep)(device_CLICK_HOLD_DURATION);
1280
- await this.backend.mouseButton('left', 'up');
1281
- }),
1282
- (0, device_namespaceObject.defineActionDoubleClick)(async ({ locate })=>{
1283
- const element = this.requireLocate(locate, 'double click');
1284
- await this.moveToElement(element, {
1285
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1286
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1287
- });
1288
- await this.backend.mouseButton('left', 'doubleClick');
1289
- }),
1290
- (0, device_namespaceObject.defineActionRightClick)(async ({ locate })=>{
1291
- const element = this.requireLocate(locate, 'right click');
1292
- await this.moveToElement(element, {
1293
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1294
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1295
- });
1296
- await this.backend.mouseButton('right', 'click');
1297
- }),
1298
- (0, device_namespaceObject.defineActionHover)(async ({ locate })=>{
1299
- const element = this.requireLocate(locate, 'hover');
1300
- await this.moveToElement(element, {
1301
- steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1302
- stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
1303
- settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
1304
- });
1305
- }),
1306
- (0, device_namespaceObject.defineActionInput)(async (param)=>{
1307
- this.assertConnected();
1308
- if (param.locate) {
1309
- await this.moveToElement(param.locate, {
1310
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1311
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1312
- });
1313
- await this.backend.mouseButton('left', 'click');
1314
- await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1315
- }
1316
- if ('typeOnly' !== param.mode) {
1317
- await this.clearInput();
1318
- await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1319
- }
1320
- if ('clear' === param.mode) return;
1321
- if (param.value) await this.backend.typeText(param.value);
1322
- }),
1323
- (0, device_namespaceObject.defineActionClearInput)(async ({ locate })=>{
1324
- this.assertConnected();
1325
- if (locate) {
1326
- await this.moveToElement(locate, {
1327
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1328
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1329
- });
1330
- await this.backend.mouseButton('left', 'click');
1331
- await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1332
- }
1333
- await this.clearInput();
1334
- await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1335
- }),
1336
- (0, device_namespaceObject.defineActionKeyboardPress)(async ({ locate, keyName })=>{
1337
- this.assertConnected();
1338
- if (locate) {
1339
- await this.moveToElement(locate, {
1340
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1341
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1342
- });
1343
- await this.backend.mouseButton('left', 'click');
1344
- }
1345
- await this.backend.keyPress(keyName);
1346
- }),
1347
- (0, device_namespaceObject.defineActionScroll)(async (param)=>{
1348
- this.assertConnected();
1349
- const target = param.locate;
1350
- if (target) await this.moveToElement(target, {
1351
- steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1352
- stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
1353
- });
1354
- if (param.scrollType && 'singleAction' !== param.scrollType) {
1355
- const direction = this.edgeScrollDirection(param.scrollType);
1356
- for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1357
- await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1358
- return;
1359
- }
1360
- await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1361
- await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1362
- }),
1363
- (0, device_namespaceObject.defineActionDragAndDrop)(async ({ from, to })=>{
1364
- this.assertConnected();
1365
- const source = this.requireLocate(from, 'drag source');
1366
- const target = this.requireLocate(to, 'drag target');
1367
- await this.moveToElement(source, {
1368
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1369
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1370
- });
1371
- await this.backend.mouseButton('left', 'down');
1372
- await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1373
- await this.moveToElement(target, {
1374
- steps: SMOOTH_MOVE_STEPS_DRAG,
1375
- stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
1376
- });
1377
- await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1378
- await this.backend.mouseButton('left', 'up');
1379
- }),
1380
- (0, device_namespaceObject.defineAction)({
1381
- name: 'MiddleClick',
1382
- description: 'Middle click the element',
1383
- sample: {
1384
- locate: {
1385
- prompt: 'the browser tab close target'
1386
- }
1387
- },
1388
- paramSchema: device_namespaceObject.actionTapParamSchema,
1389
- call: async ({ locate })=>{
1390
- const element = this.requireLocate(locate, 'middle click');
1391
- await this.moveToElement(element, {
1392
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1393
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1394
- });
1395
- await this.backend.mouseButton('middle', 'click');
1396
- }
1397
- }),
1245
+ ...(0, device_namespaceObject.defineActionsFromInputPrimitives)(this.inputPrimitives),
1398
1246
  (0, device_namespaceObject.defineAction)({
1399
1247
  name: 'ListDisplays',
1400
1248
  description: 'List all available displays/monitors',
@@ -1423,10 +1271,6 @@ class RDPDevice {
1423
1271
  throwIfDestroyed() {
1424
1272
  if (this.destroyed) throw new Error('RDPDevice has been destroyed');
1425
1273
  }
1426
- requireLocate(locate, actionName) {
1427
- if (!locate) throw new Error(`Missing target element for ${actionName}`);
1428
- return locate;
1429
- }
1430
1274
  async moveToElement(element, options) {
1431
1275
  this.assertConnected();
1432
1276
  const targetX = Math.round(element.center[0]);
@@ -1492,6 +1336,113 @@ class RDPDevice {
1492
1336
  device_define_property(this, "destroyed", false);
1493
1337
  device_define_property(this, "cursorPosition", void 0);
1494
1338
  device_define_property(this, "uri", void 0);
1339
+ device_define_property(this, "inputPrimitives", {
1340
+ pointer: {
1341
+ tap: async ({ x, y })=>{
1342
+ await this.movePointer(Math.round(x), Math.round(y), {
1343
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1344
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1345
+ });
1346
+ await this.backend.mouseButton('left', 'down');
1347
+ await (0, utils_namespaceObject.sleep)(device_CLICK_HOLD_DURATION);
1348
+ await this.backend.mouseButton('left', 'up');
1349
+ },
1350
+ doubleClick: async ({ x, y })=>{
1351
+ await this.movePointer(Math.round(x), Math.round(y), {
1352
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1353
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1354
+ });
1355
+ await this.backend.mouseButton('left', 'doubleClick');
1356
+ },
1357
+ rightClick: async ({ x, y })=>{
1358
+ await this.movePointer(Math.round(x), Math.round(y), {
1359
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1360
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1361
+ });
1362
+ await this.backend.mouseButton('right', 'click');
1363
+ },
1364
+ hover: async ({ x, y })=>{
1365
+ await this.movePointer(Math.round(x), Math.round(y), {
1366
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1367
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
1368
+ settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
1369
+ });
1370
+ },
1371
+ dragAndDrop: async (from, to)=>{
1372
+ await this.movePointer(Math.round(from.x), Math.round(from.y), {
1373
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1374
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1375
+ });
1376
+ await this.backend.mouseButton('left', 'down');
1377
+ await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1378
+ await this.movePointer(Math.round(to.x), Math.round(to.y), {
1379
+ steps: SMOOTH_MOVE_STEPS_DRAG,
1380
+ stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
1381
+ });
1382
+ await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1383
+ await this.backend.mouseButton('left', 'up');
1384
+ }
1385
+ },
1386
+ keyboard: {
1387
+ typeText: async (value, opts)=>{
1388
+ this.assertConnected();
1389
+ const target = opts?.target;
1390
+ if (target) {
1391
+ await this.inputPrimitives.pointer.tap({
1392
+ x: target.center[0],
1393
+ y: target.center[1]
1394
+ });
1395
+ await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1396
+ }
1397
+ if (opts?.replace !== false) {
1398
+ await this.clearInput();
1399
+ await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1400
+ }
1401
+ if (opts?.focusOnly || !value) return;
1402
+ await this.backend.typeText(value);
1403
+ },
1404
+ clearInput: async (target)=>{
1405
+ this.assertConnected();
1406
+ const element = target;
1407
+ if (element) {
1408
+ await this.inputPrimitives.pointer.tap({
1409
+ x: element.center[0],
1410
+ y: element.center[1]
1411
+ });
1412
+ await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1413
+ }
1414
+ await this.clearInput();
1415
+ await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1416
+ },
1417
+ keyboardPress: async (keyName, opts)=>{
1418
+ this.assertConnected();
1419
+ const target = opts?.target;
1420
+ if (target) await this.inputPrimitives.pointer.tap({
1421
+ x: target.center[0],
1422
+ y: target.center[1]
1423
+ });
1424
+ await this.backend.keyPress(keyName);
1425
+ }
1426
+ },
1427
+ scroll: {
1428
+ scroll: async (param)=>{
1429
+ this.assertConnected();
1430
+ const target = param.locate;
1431
+ if (target) await this.moveToElement(target, {
1432
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1433
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
1434
+ });
1435
+ if (param.scrollType && 'singleAction' !== param.scrollType) {
1436
+ const direction = this.edgeScrollDirection(param.scrollType);
1437
+ for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1438
+ await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1439
+ return;
1440
+ }
1441
+ await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1442
+ await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1443
+ }
1444
+ }
1445
+ });
1495
1446
  this.options = {
1496
1447
  port: 3389,
1497
1448
  securityProtocol: 'auto',
@@ -1539,6 +1490,7 @@ async function agentForRDPComputer(opts) {
1539
1490
  await device.connect();
1540
1491
  return new ComputerAgent(device, opts);
1541
1492
  }
1493
+ const core_namespaceObject = require("@midscene/core");
1542
1494
  const base_tools_namespaceObject = require("@midscene/shared/mcp/base-tools");
1543
1495
  function mcp_tools_define_property(obj, key, value) {
1544
1496
  if (key in obj) Object.defineProperty(obj, key, {
@@ -1719,26 +1671,38 @@ class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools
1719
1671
  }
1720
1672
  const env_namespaceObject = require("@midscene/shared/env");
1721
1673
  function version() {
1722
- const currentVersion = "1.8.1-beta-20260513084557.0";
1674
+ const currentVersion = "1.8.1";
1723
1675
  console.log(`@midscene/computer v${currentVersion}`);
1724
1676
  return currentVersion;
1725
1677
  }
1678
+ function loadMacPermissions() {
1679
+ if ('darwin' !== process.platform) return {
1680
+ permissions: null
1681
+ };
1682
+ try {
1683
+ const dynamicRequire = (0, external_node_module_namespaceObject.createRequire)(__rslib_import_meta_url__);
1684
+ return {
1685
+ permissions: dynamicRequire('node-mac-permissions')
1686
+ };
1687
+ } catch (error) {
1688
+ return {
1689
+ permissions: null,
1690
+ loadError: error instanceof Error ? error.message : String(error)
1691
+ };
1692
+ }
1693
+ }
1726
1694
  function checkAccessibilityPermission(promptIfNeeded = false) {
1727
1695
  if ('darwin' !== process.platform) return {
1728
1696
  hasPermission: true,
1729
1697
  platform: process.platform
1730
1698
  };
1731
1699
  try {
1732
- let permissions;
1733
- try {
1734
- const dynamicRequire = (0, external_node_module_namespaceObject.createRequire)(__rslib_import_meta_url__);
1735
- permissions = dynamicRequire('node-mac-permissions');
1736
- } catch {
1737
- return {
1738
- hasPermission: true,
1739
- platform: process.platform
1740
- };
1741
- }
1700
+ const { permissions, loadError } = loadMacPermissions();
1701
+ if (!permissions) return {
1702
+ hasPermission: false,
1703
+ platform: process.platform,
1704
+ error: `Cannot verify macOS Accessibility permission: node-mac-permissions is unavailable${loadError ? ` (${loadError})` : ''}. The native module may need to be rebuilt for the current Node/Electron ABI.`
1705
+ };
1742
1706
  const status = permissions.getAuthStatus('accessibility');
1743
1707
  if ('authorized' === status) return {
1744
1708
  hasPermission: true,
@@ -1758,6 +1722,37 @@ function checkAccessibilityPermission(promptIfNeeded = false) {
1758
1722
  };
1759
1723
  }
1760
1724
  }
1725
+ function checkScreenRecordingPermission(promptIfNeeded = false) {
1726
+ if ('darwin' !== process.platform) return {
1727
+ hasPermission: true,
1728
+ platform: process.platform
1729
+ };
1730
+ try {
1731
+ const { permissions, loadError } = loadMacPermissions();
1732
+ if (!permissions) return {
1733
+ hasPermission: false,
1734
+ platform: process.platform,
1735
+ error: `Cannot verify macOS Screen Recording permission: node-mac-permissions is unavailable${loadError ? ` (${loadError})` : ''}. The native module may need to be rebuilt for the current Node/Electron ABI.`
1736
+ };
1737
+ const status = permissions.getAuthStatus('screen');
1738
+ if ('authorized' === status) return {
1739
+ hasPermission: true,
1740
+ platform: process.platform
1741
+ };
1742
+ if (promptIfNeeded) permissions.askForScreenCaptureAccess(true);
1743
+ return {
1744
+ hasPermission: false,
1745
+ platform: process.platform,
1746
+ error: `macOS Screen Recording permission is required (current status: ${status}).\n\nPlease follow these steps:\n1. Open System Settings > Privacy & Security > Screen Recording\n2. Enable the application running this script (e.g., Terminal, iTerm2, VS Code, WebStorm, or Midscene Studio)\n3. Fully quit and relaunch that application after granting permission — macOS only re-reads this permission on process launch.`
1747
+ };
1748
+ } catch (error) {
1749
+ return {
1750
+ hasPermission: false,
1751
+ platform: process.platform,
1752
+ error: `Failed to check screen recording permission: ${error instanceof Error ? error.message : String(error)}`
1753
+ };
1754
+ }
1755
+ }
1761
1756
  async function checkComputerEnvironment() {
1762
1757
  try {
1763
1758
  const libnutModule = await import("@computer-use/libnut/dist/import_libnut.js");
@@ -1799,6 +1794,7 @@ exports.agentForRDPComputer = __webpack_exports__.agentForRDPComputer;
1799
1794
  exports.agentFromComputer = __webpack_exports__.agentFromComputer;
1800
1795
  exports.checkAccessibilityPermission = __webpack_exports__.checkAccessibilityPermission;
1801
1796
  exports.checkComputerEnvironment = __webpack_exports__.checkComputerEnvironment;
1797
+ exports.checkScreenRecordingPermission = __webpack_exports__.checkScreenRecordingPermission;
1802
1798
  exports.checkXvfbInstalled = __webpack_exports__.checkXvfbInstalled;
1803
1799
  exports.createDefaultRDPBackendClient = __webpack_exports__.createDefaultRDPBackendClient;
1804
1800
  exports.getConnectedDisplays = __webpack_exports__.getConnectedDisplays;
@@ -1817,6 +1813,7 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
1817
1813
  "agentFromComputer",
1818
1814
  "checkAccessibilityPermission",
1819
1815
  "checkComputerEnvironment",
1816
+ "checkScreenRecordingPermission",
1820
1817
  "checkXvfbInstalled",
1821
1818
  "createDefaultRDPBackendClient",
1822
1819
  "getConnectedDisplays",