@midscene/computer 1.8.1-beta-20260513084557.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,7 +49,6 @@ const external_node_fs_namespaceObject = require("node:fs");
49
49
  const external_node_module_namespaceObject = require("node:module");
50
50
  const external_node_path_namespaceObject = require("node:path");
51
51
  const external_node_url_namespaceObject = require("node:url");
52
- const core_namespaceObject = require("@midscene/core");
53
52
  const device_namespaceObject = require("@midscene/core/device");
54
53
  const utils_namespaceObject = require("@midscene/core/utils");
55
54
  const img_namespaceObject = require("@midscene/shared/img");
@@ -133,15 +132,6 @@ function _define_property(obj, key, value) {
133
132
  else obj[key] = value;
134
133
  return obj;
135
134
  }
136
- const computerInputParamSchema = core_namespaceObject.z.object({
137
- value: core_namespaceObject.z.string().describe('The text to input'),
138
- mode: core_namespaceObject.z["enum"]([
139
- 'replace',
140
- 'clear',
141
- 'append'
142
- ]).default('replace').optional().describe('Input mode: replace, clear, or append'),
143
- locate: (0, core_namespaceObject.getMidsceneLocationSchema)().describe('The input field to be filled').optional()
144
- });
145
135
  const SMOOTH_MOVE_STEPS_TAP = 8;
146
136
  const SMOOTH_MOVE_STEPS_MOUSE_MOVE = 10;
147
137
  const SMOOTH_MOVE_DELAY_TAP = 8;
@@ -452,7 +442,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
452
442
  }
453
443
  async healthCheck() {
454
444
  console.log('[HealthCheck] Starting health check...');
455
- console.log("[HealthCheck] @midscene/computer v1.8.1-beta-20260513084557.0");
445
+ console.log("[HealthCheck] @midscene/computer v1.8.1");
456
446
  console.log('[HealthCheck] Taking screenshot...');
457
447
  const screenshotTimeout = 15000;
458
448
  let timeoutId;
@@ -518,21 +508,38 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
518
508
  debugDevice('Taking screenshot', {
519
509
  displayId: this.displayId
520
510
  });
521
- try {
522
- const options = {
523
- format: 'png'
524
- };
525
- if (void 0 !== this.displayId) if ('darwin' === process.platform) {
526
- const screenIndex = Number(this.displayId);
527
- if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
528
- } else options.screen = this.displayId;
529
- debugDevice('Screenshot options', options);
511
+ const options = {
512
+ format: 'png'
513
+ };
514
+ if (void 0 !== this.displayId) if ('darwin' === process.platform) {
515
+ const screenIndex = Number(this.displayId);
516
+ if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
517
+ } else options.screen = this.displayId;
518
+ debugDevice('Screenshot options', options);
519
+ const MAX_ATTEMPTS = 3;
520
+ const RETRY_DELAY_MS = 300;
521
+ let lastRawMessage = '';
522
+ for(let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++)try {
530
523
  const buffer = await external_screenshot_desktop_default()(options);
524
+ if (attempt > 1) debugDevice(`Screenshot succeeded on attempt ${attempt}`);
531
525
  return (0, img_namespaceObject.createImgBase64ByFormat)('png', buffer.toString('base64'));
532
526
  } catch (error) {
533
- debugDevice(`Screenshot failed: ${error}`);
534
- throw new Error(`Failed to take screenshot: ${error}`);
527
+ lastRawMessage = error instanceof Error ? error.message : String(error);
528
+ const isMacTransient = 'darwin' === process.platform && /could not create image from display/i.test(lastRawMessage);
529
+ const willRetry = isMacTransient && attempt < MAX_ATTEMPTS;
530
+ debugDevice(`Screenshot attempt ${attempt} failed: ${lastRawMessage}${willRetry ? ' — retrying' : ''}`);
531
+ if (!willRetry) break;
532
+ await (0, utils_namespaceObject.sleep)(RETRY_DELAY_MS);
535
533
  }
534
+ if ('darwin' === process.platform && /could not create image from display/i.test(lastRawMessage)) throw new Error(`Failed to take screenshot on macOS: the host process is missing Screen Recording permission, or the target display is locked/sleeping.
535
+
536
+ Please follow these steps:
537
+ 1. Open System Settings > Privacy & Security > Screen Recording
538
+ 2. Enable the application running this script (e.g., Terminal, iTerm2, VS Code, WebStorm, or Midscene Studio)
539
+ 3. Fully quit and relaunch that application after granting permission — macOS only re-reads this permission on process launch.
540
+
541
+ Original error: ${lastRawMessage}`);
542
+ throw new Error(`Failed to take screenshot: ${lastRawMessage}`);
536
543
  }
537
544
  async size() {
538
545
  external_node_assert_default()(libnut, 'libnut not initialized');
@@ -578,228 +585,111 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
578
585
  external_node_assert_default()(libnut, 'libnut not initialized');
579
586
  await this.typeViaClipboard(text);
580
587
  }
588
+ async selectAllAndDelete() {
589
+ external_node_assert_default()(libnut, 'libnut not initialized');
590
+ if (this.useAppleScript) {
591
+ sendKeyViaAppleScript('a', [
592
+ 'command'
593
+ ]);
594
+ await (0, utils_namespaceObject.sleep)(50);
595
+ sendKeyViaAppleScript('backspace', []);
596
+ return;
597
+ }
598
+ const modifier = 'darwin' === process.platform ? 'command' : 'control';
599
+ libnut.keyTap('a', [
600
+ modifier
601
+ ]);
602
+ await (0, utils_namespaceObject.sleep)(50);
603
+ libnut.keyTap('backspace');
604
+ }
605
+ async pressKeyboardShortcut(keyName) {
606
+ external_node_assert_default()(libnut, 'libnut not initialized');
607
+ const keys = keyName.split('+');
608
+ const modifiers = keys.slice(0, -1).map(normalizeKeyName);
609
+ const key = normalizePrimaryKey(keys[keys.length - 1]);
610
+ debugDevice('KeyboardPress', {
611
+ original: keyName,
612
+ key,
613
+ modifiers,
614
+ driver: this.useAppleScript ? "applescript" : 'libnut'
615
+ });
616
+ if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
617
+ else if (modifiers.length > 0) libnut.keyTap(key, modifiers);
618
+ else libnut.keyTap(key);
619
+ }
620
+ async performScroll(param) {
621
+ external_node_assert_default()(libnut, 'libnut not initialized');
622
+ if (param.locate) {
623
+ const element = param.locate;
624
+ const [x, y] = element.center;
625
+ libnut.moveMouse(Math.round(x), Math.round(y));
626
+ }
627
+ const scrollType = param?.scrollType;
628
+ const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
629
+ if (edgeSpec) {
630
+ if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
631
+ if (this.useAppleScript) {
632
+ sendKeyViaAppleScript(edgeSpec.key);
633
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
634
+ return;
635
+ }
636
+ const [dx, dy] = edgeSpec.libnut;
637
+ for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
638
+ libnut.scrollMouse(dx, dy);
639
+ await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
640
+ }
641
+ return;
642
+ }
643
+ if ('singleAction' === scrollType || !scrollType) {
644
+ const distance = param?.distance || 500;
645
+ const direction = param?.direction || 'down';
646
+ const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
647
+ if (isKnownDirection) {
648
+ const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
649
+ if (runPhasedScroll(direction, distance, steps)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
650
+ }
651
+ if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
652
+ const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
653
+ const key = 'up' === direction ? 'pageup' : 'pagedown';
654
+ for(let i = 0; i < pages; i++){
655
+ sendKeyViaAppleScript(key);
656
+ await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
657
+ }
658
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
659
+ return;
660
+ }
661
+ const ticks = Math.ceil(distance / 100);
662
+ const directionMap = {
663
+ up: [
664
+ 0,
665
+ ticks
666
+ ],
667
+ down: [
668
+ 0,
669
+ -ticks
670
+ ],
671
+ left: [
672
+ -ticks,
673
+ 0
674
+ ],
675
+ right: [
676
+ ticks,
677
+ 0
678
+ ]
679
+ };
680
+ const [dx, dy] = directionMap[direction] || [
681
+ 0,
682
+ -ticks
683
+ ];
684
+ libnut.scrollMouse(dx, dy);
685
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
686
+ return;
687
+ }
688
+ throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
689
+ }
581
690
  actionSpace() {
582
691
  const defaultActions = [
583
- (0, device_namespaceObject.defineActionTap)(async (param)=>{
584
- external_node_assert_default()(libnut, 'libnut not initialized');
585
- const element = param.locate;
586
- external_node_assert_default()(element, 'Element not found, cannot tap');
587
- const [x, y] = element.center;
588
- const targetX = Math.round(x);
589
- const targetY = Math.round(y);
590
- await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
591
- libnut.mouseToggle('down', 'left');
592
- await (0, utils_namespaceObject.sleep)(CLICK_HOLD_DURATION);
593
- libnut.mouseToggle('up', 'left');
594
- }),
595
- (0, device_namespaceObject.defineActionDoubleClick)(async (param)=>{
596
- external_node_assert_default()(libnut, 'libnut not initialized');
597
- const element = param.locate;
598
- external_node_assert_default()(element, 'Element not found, cannot double click');
599
- const [x, y] = element.center;
600
- libnut.moveMouse(Math.round(x), Math.round(y));
601
- libnut.mouseClick('left', true);
602
- }),
603
- (0, device_namespaceObject.defineActionRightClick)(async (param)=>{
604
- external_node_assert_default()(libnut, 'libnut not initialized');
605
- const element = param.locate;
606
- external_node_assert_default()(element, 'Element not found, cannot right click');
607
- const [x, y] = element.center;
608
- libnut.moveMouse(Math.round(x), Math.round(y));
609
- libnut.mouseClick('right');
610
- }),
611
- (0, device_namespaceObject.defineAction)({
612
- name: 'MouseMove',
613
- description: 'Move the mouse to the element',
614
- interfaceAlias: 'aiHover',
615
- paramSchema: device_namespaceObject.actionHoverParamSchema,
616
- sample: {
617
- locate: {
618
- prompt: 'the navigation menu item "Products"'
619
- }
620
- },
621
- call: async (param)=>{
622
- external_node_assert_default()(libnut, 'libnut not initialized');
623
- const element = param.locate;
624
- external_node_assert_default()(element, 'Element not found, cannot move mouse');
625
- const [x, y] = element.center;
626
- const targetX = Math.round(x);
627
- const targetY = Math.round(y);
628
- await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
629
- await (0, utils_namespaceObject.sleep)(MOUSE_MOVE_EFFECT_WAIT);
630
- }
631
- }),
632
- (0, device_namespaceObject.defineAction)({
633
- name: 'Input',
634
- description: 'Input text into the input field',
635
- interfaceAlias: 'aiInput',
636
- paramSchema: computerInputParamSchema,
637
- sample: {
638
- value: 'test@example.com',
639
- locate: {
640
- prompt: 'the email input field'
641
- }
642
- },
643
- call: async (param)=>{
644
- external_node_assert_default()(libnut, 'libnut not initialized');
645
- const element = param.locate;
646
- if (element) {
647
- const [x, y] = element.center;
648
- libnut.moveMouse(Math.round(x), Math.round(y));
649
- libnut.mouseClick('left');
650
- await (0, utils_namespaceObject.sleep)(INPUT_FOCUS_DELAY);
651
- if ('append' !== param.mode) {
652
- if (this.useAppleScript) {
653
- sendKeyViaAppleScript('a', [
654
- 'command'
655
- ]);
656
- await (0, utils_namespaceObject.sleep)(50);
657
- sendKeyViaAppleScript('backspace', []);
658
- } else {
659
- const modifier = 'darwin' === process.platform ? 'command' : 'control';
660
- libnut.keyTap('a', [
661
- modifier
662
- ]);
663
- await (0, utils_namespaceObject.sleep)(50);
664
- libnut.keyTap('backspace');
665
- }
666
- await (0, utils_namespaceObject.sleep)(INPUT_CLEAR_DELAY);
667
- }
668
- }
669
- if ('clear' === param.mode) return;
670
- if (!param.value) return;
671
- await this.smartTypeString(param.value);
672
- }
673
- }),
674
- (0, device_namespaceObject.defineActionScroll)(async (param)=>{
675
- external_node_assert_default()(libnut, 'libnut not initialized');
676
- if (param.locate) {
677
- const element = param.locate;
678
- const [x, y] = element.center;
679
- libnut.moveMouse(Math.round(x), Math.round(y));
680
- }
681
- const scrollType = param?.scrollType;
682
- const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
683
- if (edgeSpec) {
684
- if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
685
- if (this.useAppleScript) {
686
- sendKeyViaAppleScript(edgeSpec.key);
687
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
688
- return;
689
- }
690
- const [dx, dy] = edgeSpec.libnut;
691
- for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
692
- libnut.scrollMouse(dx, dy);
693
- await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
694
- }
695
- return;
696
- }
697
- if ('singleAction' === scrollType || !scrollType) {
698
- const distance = param?.distance || 500;
699
- const direction = param?.direction || 'down';
700
- const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
701
- if (isKnownDirection) {
702
- const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
703
- if (runPhasedScroll(direction, distance, steps)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
704
- }
705
- if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
706
- const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
707
- const key = 'up' === direction ? 'pageup' : 'pagedown';
708
- for(let i = 0; i < pages; i++){
709
- sendKeyViaAppleScript(key);
710
- await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
711
- }
712
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
713
- return;
714
- }
715
- const ticks = Math.ceil(distance / 100);
716
- const directionMap = {
717
- up: [
718
- 0,
719
- ticks
720
- ],
721
- down: [
722
- 0,
723
- -ticks
724
- ],
725
- left: [
726
- -ticks,
727
- 0
728
- ],
729
- right: [
730
- ticks,
731
- 0
732
- ]
733
- };
734
- const [dx, dy] = directionMap[direction] || [
735
- 0,
736
- -ticks
737
- ];
738
- libnut.scrollMouse(dx, dy);
739
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
740
- return;
741
- }
742
- throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
743
- }),
744
- (0, device_namespaceObject.defineActionKeyboardPress)(async (param)=>{
745
- external_node_assert_default()(libnut, 'libnut not initialized');
746
- if (param.locate) {
747
- const [x, y] = param.locate.center;
748
- libnut.moveMouse(Math.round(x), Math.round(y));
749
- libnut.mouseClick('left');
750
- await (0, utils_namespaceObject.sleep)(50);
751
- }
752
- const keys = param.keyName.split('+');
753
- const modifiers = keys.slice(0, -1).map(normalizeKeyName);
754
- const key = normalizePrimaryKey(keys[keys.length - 1]);
755
- debugDevice('KeyboardPress', {
756
- original: param.keyName,
757
- key,
758
- modifiers,
759
- driver: this.useAppleScript ? "applescript" : 'libnut'
760
- });
761
- if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
762
- else if (modifiers.length > 0) libnut.keyTap(key, modifiers);
763
- else libnut.keyTap(key);
764
- }),
765
- (0, device_namespaceObject.defineActionDragAndDrop)(async (param)=>{
766
- external_node_assert_default()(libnut, 'libnut not initialized');
767
- const from = param.from;
768
- const to = param.to;
769
- external_node_assert_default()(from, 'missing "from" param for drag and drop');
770
- external_node_assert_default()(to, 'missing "to" param for drag and drop');
771
- const [fromX, fromY] = from.center;
772
- const [toX, toY] = to.center;
773
- libnut.moveMouse(Math.round(fromX), Math.round(fromY));
774
- libnut.mouseToggle('down', 'left');
775
- await (0, utils_namespaceObject.sleep)(100);
776
- libnut.moveMouse(Math.round(toX), Math.round(toY));
777
- await (0, utils_namespaceObject.sleep)(100);
778
- libnut.mouseToggle('up', 'left');
779
- }),
780
- (0, device_namespaceObject.defineActionClearInput)(async (param)=>{
781
- external_node_assert_default()(libnut, 'libnut not initialized');
782
- const element = param.locate;
783
- external_node_assert_default()(element, 'Element not found, cannot clear input');
784
- const [x, y] = element.center;
785
- libnut.moveMouse(Math.round(x), Math.round(y));
786
- libnut.mouseClick('left');
787
- await (0, utils_namespaceObject.sleep)(100);
788
- if (this.useAppleScript) {
789
- sendKeyViaAppleScript('a', [
790
- 'command'
791
- ]);
792
- await (0, utils_namespaceObject.sleep)(50);
793
- sendKeyViaAppleScript('backspace', []);
794
- } else {
795
- const modifier = 'darwin' === process.platform ? 'command' : 'control';
796
- libnut.keyTap('a', [
797
- modifier
798
- ]);
799
- libnut.keyTap('backspace');
800
- }
801
- await (0, utils_namespaceObject.sleep)(50);
802
- })
692
+ ...(0, device_namespaceObject.defineActionsFromInputPrimitives)(this.inputPrimitives)
803
693
  ];
804
694
  const platformActions = Object.values(createPlatformActions());
805
695
  const customActions = this.options?.customActions || [];
@@ -837,6 +727,88 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
837
727
  _define_property(this, "xvfbCleanup", void 0);
838
728
  _define_property(this, "useAppleScript", void 0);
839
729
  _define_property(this, "uri", void 0);
730
+ _define_property(this, "inputPrimitives", {
731
+ pointer: {
732
+ tap: async ({ x, y })=>{
733
+ external_node_assert_default()(libnut, 'libnut not initialized');
734
+ const targetX = Math.round(x);
735
+ const targetY = Math.round(y);
736
+ await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
737
+ libnut.mouseToggle('down', 'left');
738
+ await (0, utils_namespaceObject.sleep)(CLICK_HOLD_DURATION);
739
+ libnut.mouseToggle('up', 'left');
740
+ },
741
+ doubleClick: async ({ x, y })=>{
742
+ external_node_assert_default()(libnut, 'libnut not initialized');
743
+ libnut.moveMouse(Math.round(x), Math.round(y));
744
+ libnut.mouseClick('left', true);
745
+ },
746
+ rightClick: async ({ x, y })=>{
747
+ external_node_assert_default()(libnut, 'libnut not initialized');
748
+ libnut.moveMouse(Math.round(x), Math.round(y));
749
+ libnut.mouseClick('right');
750
+ },
751
+ hover: async ({ x, y })=>{
752
+ external_node_assert_default()(libnut, 'libnut not initialized');
753
+ await smoothMoveMouse(Math.round(x), Math.round(y), SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
754
+ await (0, utils_namespaceObject.sleep)(MOUSE_MOVE_EFFECT_WAIT);
755
+ },
756
+ dragAndDrop: async (from, to)=>{
757
+ external_node_assert_default()(libnut, 'libnut not initialized');
758
+ libnut.moveMouse(Math.round(from.x), Math.round(from.y));
759
+ libnut.mouseToggle('down', 'left');
760
+ await (0, utils_namespaceObject.sleep)(100);
761
+ libnut.moveMouse(Math.round(to.x), Math.round(to.y));
762
+ await (0, utils_namespaceObject.sleep)(100);
763
+ libnut.mouseToggle('up', 'left');
764
+ }
765
+ },
766
+ keyboard: {
767
+ typeText: async (value, opts)=>{
768
+ external_node_assert_default()(libnut, 'libnut not initialized');
769
+ const element = opts?.target;
770
+ if (element) {
771
+ const [x, y] = element.center;
772
+ libnut.moveMouse(Math.round(x), Math.round(y));
773
+ libnut.mouseClick('left');
774
+ await (0, utils_namespaceObject.sleep)(INPUT_FOCUS_DELAY);
775
+ if (opts?.replace !== false) {
776
+ await this.selectAllAndDelete();
777
+ await (0, utils_namespaceObject.sleep)(INPUT_CLEAR_DELAY);
778
+ }
779
+ }
780
+ await this.smartTypeString(value);
781
+ },
782
+ keyboardPress: async (keyName, opts)=>{
783
+ external_node_assert_default()(libnut, 'libnut not initialized');
784
+ const target = opts?.target;
785
+ if (target) {
786
+ const [x, y] = target.center;
787
+ libnut.moveMouse(Math.round(x), Math.round(y));
788
+ libnut.mouseClick('left');
789
+ await (0, utils_namespaceObject.sleep)(50);
790
+ }
791
+ await this.pressKeyboardShortcut(keyName);
792
+ },
793
+ clearInput: async (target)=>{
794
+ external_node_assert_default()(libnut, 'libnut not initialized');
795
+ if (target) {
796
+ const element = target;
797
+ const [x, y] = element.center;
798
+ libnut.moveMouse(Math.round(x), Math.round(y));
799
+ libnut.mouseClick('left');
800
+ await (0, utils_namespaceObject.sleep)(100);
801
+ }
802
+ await this.selectAllAndDelete();
803
+ await (0, utils_namespaceObject.sleep)(50);
804
+ }
805
+ },
806
+ scroll: {
807
+ scroll: async (param)=>{
808
+ await this.performScroll(param);
809
+ }
810
+ }
811
+ });
840
812
  this.options = options;
841
813
  this.displayId = options?.displayId;
842
814
  this.useAppleScript = 'darwin' === process.platform && options?.keyboardDriver !== 'libnut';
@@ -1213,132 +1185,7 @@ class RDPDevice {
1213
1185
  }
1214
1186
  actionSpace() {
1215
1187
  const defaultActions = [
1216
- (0, device_namespaceObject.defineActionTap)(async ({ locate })=>{
1217
- const element = this.requireLocate(locate, 'tap');
1218
- await this.moveToElement(element, {
1219
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1220
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1221
- });
1222
- await this.backend.mouseButton('left', 'down');
1223
- await (0, utils_namespaceObject.sleep)(device_CLICK_HOLD_DURATION);
1224
- await this.backend.mouseButton('left', 'up');
1225
- }),
1226
- (0, device_namespaceObject.defineActionDoubleClick)(async ({ locate })=>{
1227
- const element = this.requireLocate(locate, 'double click');
1228
- await this.moveToElement(element, {
1229
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1230
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1231
- });
1232
- await this.backend.mouseButton('left', 'doubleClick');
1233
- }),
1234
- (0, device_namespaceObject.defineActionRightClick)(async ({ locate })=>{
1235
- const element = this.requireLocate(locate, 'right click');
1236
- await this.moveToElement(element, {
1237
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1238
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1239
- });
1240
- await this.backend.mouseButton('right', 'click');
1241
- }),
1242
- (0, device_namespaceObject.defineActionHover)(async ({ locate })=>{
1243
- const element = this.requireLocate(locate, 'hover');
1244
- await this.moveToElement(element, {
1245
- steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1246
- stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
1247
- settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
1248
- });
1249
- }),
1250
- (0, device_namespaceObject.defineActionInput)(async (param)=>{
1251
- this.assertConnected();
1252
- if (param.locate) {
1253
- await this.moveToElement(param.locate, {
1254
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1255
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1256
- });
1257
- await this.backend.mouseButton('left', 'click');
1258
- await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1259
- }
1260
- if ('typeOnly' !== param.mode) {
1261
- await this.clearInput();
1262
- await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1263
- }
1264
- if ('clear' === param.mode) return;
1265
- if (param.value) await this.backend.typeText(param.value);
1266
- }),
1267
- (0, device_namespaceObject.defineActionClearInput)(async ({ locate })=>{
1268
- this.assertConnected();
1269
- if (locate) {
1270
- await this.moveToElement(locate, {
1271
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1272
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1273
- });
1274
- await this.backend.mouseButton('left', 'click');
1275
- await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1276
- }
1277
- await this.clearInput();
1278
- await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1279
- }),
1280
- (0, device_namespaceObject.defineActionKeyboardPress)(async ({ locate, keyName })=>{
1281
- this.assertConnected();
1282
- if (locate) {
1283
- await this.moveToElement(locate, {
1284
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1285
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1286
- });
1287
- await this.backend.mouseButton('left', 'click');
1288
- }
1289
- await this.backend.keyPress(keyName);
1290
- }),
1291
- (0, device_namespaceObject.defineActionScroll)(async (param)=>{
1292
- this.assertConnected();
1293
- const target = param.locate;
1294
- if (target) await this.moveToElement(target, {
1295
- steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1296
- stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
1297
- });
1298
- if (param.scrollType && 'singleAction' !== param.scrollType) {
1299
- const direction = this.edgeScrollDirection(param.scrollType);
1300
- for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1301
- await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1302
- return;
1303
- }
1304
- await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1305
- await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1306
- }),
1307
- (0, device_namespaceObject.defineActionDragAndDrop)(async ({ from, to })=>{
1308
- this.assertConnected();
1309
- const source = this.requireLocate(from, 'drag source');
1310
- const target = this.requireLocate(to, 'drag target');
1311
- await this.moveToElement(source, {
1312
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1313
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1314
- });
1315
- await this.backend.mouseButton('left', 'down');
1316
- await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1317
- await this.moveToElement(target, {
1318
- steps: SMOOTH_MOVE_STEPS_DRAG,
1319
- stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
1320
- });
1321
- await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1322
- await this.backend.mouseButton('left', 'up');
1323
- }),
1324
- (0, device_namespaceObject.defineAction)({
1325
- name: 'MiddleClick',
1326
- description: 'Middle click the element',
1327
- sample: {
1328
- locate: {
1329
- prompt: 'the browser tab close target'
1330
- }
1331
- },
1332
- paramSchema: device_namespaceObject.actionTapParamSchema,
1333
- call: async ({ locate })=>{
1334
- const element = this.requireLocate(locate, 'middle click');
1335
- await this.moveToElement(element, {
1336
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1337
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1338
- });
1339
- await this.backend.mouseButton('middle', 'click');
1340
- }
1341
- }),
1188
+ ...(0, device_namespaceObject.defineActionsFromInputPrimitives)(this.inputPrimitives),
1342
1189
  (0, device_namespaceObject.defineAction)({
1343
1190
  name: 'ListDisplays',
1344
1191
  description: 'List all available displays/monitors',
@@ -1367,10 +1214,6 @@ class RDPDevice {
1367
1214
  throwIfDestroyed() {
1368
1215
  if (this.destroyed) throw new Error('RDPDevice has been destroyed');
1369
1216
  }
1370
- requireLocate(locate, actionName) {
1371
- if (!locate) throw new Error(`Missing target element for ${actionName}`);
1372
- return locate;
1373
- }
1374
1217
  async moveToElement(element, options) {
1375
1218
  this.assertConnected();
1376
1219
  const targetX = Math.round(element.center[0]);
@@ -1436,6 +1279,113 @@ class RDPDevice {
1436
1279
  device_define_property(this, "destroyed", false);
1437
1280
  device_define_property(this, "cursorPosition", void 0);
1438
1281
  device_define_property(this, "uri", void 0);
1282
+ device_define_property(this, "inputPrimitives", {
1283
+ pointer: {
1284
+ tap: async ({ x, y })=>{
1285
+ await this.movePointer(Math.round(x), Math.round(y), {
1286
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1287
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1288
+ });
1289
+ await this.backend.mouseButton('left', 'down');
1290
+ await (0, utils_namespaceObject.sleep)(device_CLICK_HOLD_DURATION);
1291
+ await this.backend.mouseButton('left', 'up');
1292
+ },
1293
+ doubleClick: async ({ x, y })=>{
1294
+ await this.movePointer(Math.round(x), Math.round(y), {
1295
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1296
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1297
+ });
1298
+ await this.backend.mouseButton('left', 'doubleClick');
1299
+ },
1300
+ rightClick: async ({ x, y })=>{
1301
+ await this.movePointer(Math.round(x), Math.round(y), {
1302
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1303
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1304
+ });
1305
+ await this.backend.mouseButton('right', 'click');
1306
+ },
1307
+ hover: async ({ x, y })=>{
1308
+ await this.movePointer(Math.round(x), Math.round(y), {
1309
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1310
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
1311
+ settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
1312
+ });
1313
+ },
1314
+ dragAndDrop: async (from, to)=>{
1315
+ await this.movePointer(Math.round(from.x), Math.round(from.y), {
1316
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1317
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1318
+ });
1319
+ await this.backend.mouseButton('left', 'down');
1320
+ await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1321
+ await this.movePointer(Math.round(to.x), Math.round(to.y), {
1322
+ steps: SMOOTH_MOVE_STEPS_DRAG,
1323
+ stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
1324
+ });
1325
+ await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1326
+ await this.backend.mouseButton('left', 'up');
1327
+ }
1328
+ },
1329
+ keyboard: {
1330
+ typeText: async (value, opts)=>{
1331
+ this.assertConnected();
1332
+ const target = opts?.target;
1333
+ if (target) {
1334
+ await this.inputPrimitives.pointer.tap({
1335
+ x: target.center[0],
1336
+ y: target.center[1]
1337
+ });
1338
+ await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1339
+ }
1340
+ if (opts?.replace !== false) {
1341
+ await this.clearInput();
1342
+ await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1343
+ }
1344
+ if (opts?.focusOnly || !value) return;
1345
+ await this.backend.typeText(value);
1346
+ },
1347
+ clearInput: async (target)=>{
1348
+ this.assertConnected();
1349
+ const element = target;
1350
+ if (element) {
1351
+ await this.inputPrimitives.pointer.tap({
1352
+ x: element.center[0],
1353
+ y: element.center[1]
1354
+ });
1355
+ await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1356
+ }
1357
+ await this.clearInput();
1358
+ await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1359
+ },
1360
+ keyboardPress: async (keyName, opts)=>{
1361
+ this.assertConnected();
1362
+ const target = opts?.target;
1363
+ if (target) await this.inputPrimitives.pointer.tap({
1364
+ x: target.center[0],
1365
+ y: target.center[1]
1366
+ });
1367
+ await this.backend.keyPress(keyName);
1368
+ }
1369
+ },
1370
+ scroll: {
1371
+ scroll: async (param)=>{
1372
+ this.assertConnected();
1373
+ const target = param.locate;
1374
+ if (target) await this.moveToElement(target, {
1375
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1376
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
1377
+ });
1378
+ if (param.scrollType && 'singleAction' !== param.scrollType) {
1379
+ const direction = this.edgeScrollDirection(param.scrollType);
1380
+ for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1381
+ await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1382
+ return;
1383
+ }
1384
+ await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1385
+ await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1386
+ }
1387
+ }
1388
+ });
1439
1389
  this.options = {
1440
1390
  port: 3389,
1441
1391
  securityProtocol: 'auto',
@@ -1483,6 +1433,7 @@ async function agentForRDPComputer(opts) {
1483
1433
  await device.connect();
1484
1434
  return new ComputerAgent(device, opts);
1485
1435
  }
1436
+ const core_namespaceObject = require("@midscene/core");
1486
1437
  const base_tools_namespaceObject = require("@midscene/shared/mcp/base-tools");
1487
1438
  function mcp_tools_define_property(obj, key, value) {
1488
1439
  if (key in obj) Object.defineProperty(obj, key, {
@@ -1668,7 +1619,7 @@ class ComputerMCPServer extends mcp_namespaceObject.BaseMCPServer {
1668
1619
  constructor(toolsManager){
1669
1620
  super({
1670
1621
  name: '@midscene/computer-mcp',
1671
- version: "1.8.1-beta-20260513084557.0",
1622
+ version: "1.8.1",
1672
1623
  description: 'Control the computer desktop using natural language commands'
1673
1624
  }, toolsManager);
1674
1625
  }