@midscene/computer 1.8.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,7 +49,6 @@ const external_node_fs_namespaceObject = require("node:fs");
49
49
  const external_node_module_namespaceObject = require("node:module");
50
50
  const external_node_path_namespaceObject = require("node:path");
51
51
  const external_node_url_namespaceObject = require("node:url");
52
- const core_namespaceObject = require("@midscene/core");
53
52
  const device_namespaceObject = require("@midscene/core/device");
54
53
  const utils_namespaceObject = require("@midscene/core/utils");
55
54
  const img_namespaceObject = require("@midscene/shared/img");
@@ -133,15 +132,6 @@ function _define_property(obj, key, value) {
133
132
  else obj[key] = value;
134
133
  return obj;
135
134
  }
136
- const computerInputParamSchema = core_namespaceObject.z.object({
137
- value: core_namespaceObject.z.string().describe('The text to input'),
138
- mode: core_namespaceObject.z["enum"]([
139
- 'replace',
140
- 'clear',
141
- 'append'
142
- ]).default('replace').optional().describe('Input mode: replace, clear, or append'),
143
- locate: (0, core_namespaceObject.getMidsceneLocationSchema)().describe('The input field to be filled').optional()
144
- });
145
135
  const SMOOTH_MOVE_STEPS_TAP = 8;
146
136
  const SMOOTH_MOVE_STEPS_MOUSE_MOVE = 10;
147
137
  const SMOOTH_MOVE_DELAY_TAP = 8;
@@ -452,7 +442,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
452
442
  }
453
443
  async healthCheck() {
454
444
  console.log('[HealthCheck] Starting health check...');
455
- console.log("[HealthCheck] @midscene/computer v1.8.0");
445
+ console.log("[HealthCheck] @midscene/computer v1.8.1");
456
446
  console.log('[HealthCheck] Taking screenshot...');
457
447
  const screenshotTimeout = 15000;
458
448
  let timeoutId;
@@ -518,21 +508,38 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
518
508
  debugDevice('Taking screenshot', {
519
509
  displayId: this.displayId
520
510
  });
521
- try {
522
- const options = {
523
- format: 'png'
524
- };
525
- if (void 0 !== this.displayId) if ('darwin' === process.platform) {
526
- const screenIndex = Number(this.displayId);
527
- if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
528
- } else options.screen = this.displayId;
529
- debugDevice('Screenshot options', options);
511
+ const options = {
512
+ format: 'png'
513
+ };
514
+ if (void 0 !== this.displayId) if ('darwin' === process.platform) {
515
+ const screenIndex = Number(this.displayId);
516
+ if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
517
+ } else options.screen = this.displayId;
518
+ debugDevice('Screenshot options', options);
519
+ const MAX_ATTEMPTS = 3;
520
+ const RETRY_DELAY_MS = 300;
521
+ let lastRawMessage = '';
522
+ for(let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++)try {
530
523
  const buffer = await external_screenshot_desktop_default()(options);
524
+ if (attempt > 1) debugDevice(`Screenshot succeeded on attempt ${attempt}`);
531
525
  return (0, img_namespaceObject.createImgBase64ByFormat)('png', buffer.toString('base64'));
532
526
  } catch (error) {
533
- debugDevice(`Screenshot failed: ${error}`);
534
- throw new Error(`Failed to take screenshot: ${error}`);
527
+ lastRawMessage = error instanceof Error ? error.message : String(error);
528
+ const isMacTransient = 'darwin' === process.platform && /could not create image from display/i.test(lastRawMessage);
529
+ const willRetry = isMacTransient && attempt < MAX_ATTEMPTS;
530
+ debugDevice(`Screenshot attempt ${attempt} failed: ${lastRawMessage}${willRetry ? ' — retrying' : ''}`);
531
+ if (!willRetry) break;
532
+ await (0, utils_namespaceObject.sleep)(RETRY_DELAY_MS);
535
533
  }
534
+ if ('darwin' === process.platform && /could not create image from display/i.test(lastRawMessage)) throw new Error(`Failed to take screenshot on macOS: the host process is missing Screen Recording permission, or the target display is locked/sleeping.
535
+
536
+ Please follow these steps:
537
+ 1. Open System Settings > Privacy & Security > Screen Recording
538
+ 2. Enable the application running this script (e.g., Terminal, iTerm2, VS Code, WebStorm, or Midscene Studio)
539
+ 3. Fully quit and relaunch that application after granting permission — macOS only re-reads this permission on process launch.
540
+
541
+ Original error: ${lastRawMessage}`);
542
+ throw new Error(`Failed to take screenshot: ${lastRawMessage}`);
536
543
  }
537
544
  async size() {
538
545
  external_node_assert_default()(libnut, 'libnut not initialized');
@@ -578,228 +585,111 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
578
585
  external_node_assert_default()(libnut, 'libnut not initialized');
579
586
  await this.typeViaClipboard(text);
580
587
  }
588
+ async selectAllAndDelete() {
589
+ external_node_assert_default()(libnut, 'libnut not initialized');
590
+ if (this.useAppleScript) {
591
+ sendKeyViaAppleScript('a', [
592
+ 'command'
593
+ ]);
594
+ await (0, utils_namespaceObject.sleep)(50);
595
+ sendKeyViaAppleScript('backspace', []);
596
+ return;
597
+ }
598
+ const modifier = 'darwin' === process.platform ? 'command' : 'control';
599
+ libnut.keyTap('a', [
600
+ modifier
601
+ ]);
602
+ await (0, utils_namespaceObject.sleep)(50);
603
+ libnut.keyTap('backspace');
604
+ }
605
+ async pressKeyboardShortcut(keyName) {
606
+ external_node_assert_default()(libnut, 'libnut not initialized');
607
+ const keys = keyName.split('+');
608
+ const modifiers = keys.slice(0, -1).map(normalizeKeyName);
609
+ const key = normalizePrimaryKey(keys[keys.length - 1]);
610
+ debugDevice('KeyboardPress', {
611
+ original: keyName,
612
+ key,
613
+ modifiers,
614
+ driver: this.useAppleScript ? "applescript" : 'libnut'
615
+ });
616
+ if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
617
+ else if (modifiers.length > 0) libnut.keyTap(key, modifiers);
618
+ else libnut.keyTap(key);
619
+ }
620
+ async performScroll(param) {
621
+ external_node_assert_default()(libnut, 'libnut not initialized');
622
+ if (param.locate) {
623
+ const element = param.locate;
624
+ const [x, y] = element.center;
625
+ libnut.moveMouse(Math.round(x), Math.round(y));
626
+ }
627
+ const scrollType = param?.scrollType;
628
+ const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
629
+ if (edgeSpec) {
630
+ if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
631
+ if (this.useAppleScript) {
632
+ sendKeyViaAppleScript(edgeSpec.key);
633
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
634
+ return;
635
+ }
636
+ const [dx, dy] = edgeSpec.libnut;
637
+ for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
638
+ libnut.scrollMouse(dx, dy);
639
+ await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
640
+ }
641
+ return;
642
+ }
643
+ if ('singleAction' === scrollType || !scrollType) {
644
+ const distance = param?.distance || 500;
645
+ const direction = param?.direction || 'down';
646
+ const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
647
+ if (isKnownDirection) {
648
+ const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
649
+ if (runPhasedScroll(direction, distance, steps)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
650
+ }
651
+ if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
652
+ const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
653
+ const key = 'up' === direction ? 'pageup' : 'pagedown';
654
+ for(let i = 0; i < pages; i++){
655
+ sendKeyViaAppleScript(key);
656
+ await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
657
+ }
658
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
659
+ return;
660
+ }
661
+ const ticks = Math.ceil(distance / 100);
662
+ const directionMap = {
663
+ up: [
664
+ 0,
665
+ ticks
666
+ ],
667
+ down: [
668
+ 0,
669
+ -ticks
670
+ ],
671
+ left: [
672
+ -ticks,
673
+ 0
674
+ ],
675
+ right: [
676
+ ticks,
677
+ 0
678
+ ]
679
+ };
680
+ const [dx, dy] = directionMap[direction] || [
681
+ 0,
682
+ -ticks
683
+ ];
684
+ libnut.scrollMouse(dx, dy);
685
+ await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
686
+ return;
687
+ }
688
+ throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
689
+ }
581
690
  actionSpace() {
582
691
  const defaultActions = [
583
- (0, device_namespaceObject.defineActionTap)(async (param)=>{
584
- external_node_assert_default()(libnut, 'libnut not initialized');
585
- const element = param.locate;
586
- external_node_assert_default()(element, 'Element not found, cannot tap');
587
- const [x, y] = element.center;
588
- const targetX = Math.round(x);
589
- const targetY = Math.round(y);
590
- await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
591
- libnut.mouseToggle('down', 'left');
592
- await (0, utils_namespaceObject.sleep)(CLICK_HOLD_DURATION);
593
- libnut.mouseToggle('up', 'left');
594
- }),
595
- (0, device_namespaceObject.defineActionDoubleClick)(async (param)=>{
596
- external_node_assert_default()(libnut, 'libnut not initialized');
597
- const element = param.locate;
598
- external_node_assert_default()(element, 'Element not found, cannot double click');
599
- const [x, y] = element.center;
600
- libnut.moveMouse(Math.round(x), Math.round(y));
601
- libnut.mouseClick('left', true);
602
- }),
603
- (0, device_namespaceObject.defineActionRightClick)(async (param)=>{
604
- external_node_assert_default()(libnut, 'libnut not initialized');
605
- const element = param.locate;
606
- external_node_assert_default()(element, 'Element not found, cannot right click');
607
- const [x, y] = element.center;
608
- libnut.moveMouse(Math.round(x), Math.round(y));
609
- libnut.mouseClick('right');
610
- }),
611
- (0, device_namespaceObject.defineAction)({
612
- name: 'MouseMove',
613
- description: 'Move the mouse to the element',
614
- interfaceAlias: 'aiHover',
615
- paramSchema: device_namespaceObject.actionHoverParamSchema,
616
- sample: {
617
- locate: {
618
- prompt: 'the navigation menu item "Products"'
619
- }
620
- },
621
- call: async (param)=>{
622
- external_node_assert_default()(libnut, 'libnut not initialized');
623
- const element = param.locate;
624
- external_node_assert_default()(element, 'Element not found, cannot move mouse');
625
- const [x, y] = element.center;
626
- const targetX = Math.round(x);
627
- const targetY = Math.round(y);
628
- await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
629
- await (0, utils_namespaceObject.sleep)(MOUSE_MOVE_EFFECT_WAIT);
630
- }
631
- }),
632
- (0, device_namespaceObject.defineAction)({
633
- name: 'Input',
634
- description: 'Input text into the input field',
635
- interfaceAlias: 'aiInput',
636
- paramSchema: computerInputParamSchema,
637
- sample: {
638
- value: 'test@example.com',
639
- locate: {
640
- prompt: 'the email input field'
641
- }
642
- },
643
- call: async (param)=>{
644
- external_node_assert_default()(libnut, 'libnut not initialized');
645
- const element = param.locate;
646
- if (element) {
647
- const [x, y] = element.center;
648
- libnut.moveMouse(Math.round(x), Math.round(y));
649
- libnut.mouseClick('left');
650
- await (0, utils_namespaceObject.sleep)(INPUT_FOCUS_DELAY);
651
- if ('append' !== param.mode) {
652
- if (this.useAppleScript) {
653
- sendKeyViaAppleScript('a', [
654
- 'command'
655
- ]);
656
- await (0, utils_namespaceObject.sleep)(50);
657
- sendKeyViaAppleScript('backspace', []);
658
- } else {
659
- const modifier = 'darwin' === process.platform ? 'command' : 'control';
660
- libnut.keyTap('a', [
661
- modifier
662
- ]);
663
- await (0, utils_namespaceObject.sleep)(50);
664
- libnut.keyTap('backspace');
665
- }
666
- await (0, utils_namespaceObject.sleep)(INPUT_CLEAR_DELAY);
667
- }
668
- }
669
- if ('clear' === param.mode) return;
670
- if (!param.value) return;
671
- await this.smartTypeString(param.value);
672
- }
673
- }),
674
- (0, device_namespaceObject.defineActionScroll)(async (param)=>{
675
- external_node_assert_default()(libnut, 'libnut not initialized');
676
- if (param.locate) {
677
- const element = param.locate;
678
- const [x, y] = element.center;
679
- libnut.moveMouse(Math.round(x), Math.round(y));
680
- }
681
- const scrollType = param?.scrollType;
682
- const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
683
- if (edgeSpec) {
684
- if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
685
- if (this.useAppleScript) {
686
- sendKeyViaAppleScript(edgeSpec.key);
687
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
688
- return;
689
- }
690
- const [dx, dy] = edgeSpec.libnut;
691
- for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
692
- libnut.scrollMouse(dx, dy);
693
- await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
694
- }
695
- return;
696
- }
697
- if ('singleAction' === scrollType || !scrollType) {
698
- const distance = param?.distance || 500;
699
- const direction = param?.direction || 'down';
700
- const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
701
- if (isKnownDirection) {
702
- const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
703
- if (runPhasedScroll(direction, distance, steps)) return void await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
704
- }
705
- if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
706
- const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
707
- const key = 'up' === direction ? 'pageup' : 'pagedown';
708
- for(let i = 0; i < pages; i++){
709
- sendKeyViaAppleScript(key);
710
- await (0, utils_namespaceObject.sleep)(SCROLL_STEP_DELAY);
711
- }
712
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
713
- return;
714
- }
715
- const ticks = Math.ceil(distance / 100);
716
- const directionMap = {
717
- up: [
718
- 0,
719
- ticks
720
- ],
721
- down: [
722
- 0,
723
- -ticks
724
- ],
725
- left: [
726
- -ticks,
727
- 0
728
- ],
729
- right: [
730
- ticks,
731
- 0
732
- ]
733
- };
734
- const [dx, dy] = directionMap[direction] || [
735
- 0,
736
- -ticks
737
- ];
738
- libnut.scrollMouse(dx, dy);
739
- await (0, utils_namespaceObject.sleep)(SCROLL_COMPLETE_DELAY);
740
- return;
741
- }
742
- throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
743
- }),
744
- (0, device_namespaceObject.defineActionKeyboardPress)(async (param)=>{
745
- external_node_assert_default()(libnut, 'libnut not initialized');
746
- if (param.locate) {
747
- const [x, y] = param.locate.center;
748
- libnut.moveMouse(Math.round(x), Math.round(y));
749
- libnut.mouseClick('left');
750
- await (0, utils_namespaceObject.sleep)(50);
751
- }
752
- const keys = param.keyName.split('+');
753
- const modifiers = keys.slice(0, -1).map(normalizeKeyName);
754
- const key = normalizePrimaryKey(keys[keys.length - 1]);
755
- debugDevice('KeyboardPress', {
756
- original: param.keyName,
757
- key,
758
- modifiers,
759
- driver: this.useAppleScript ? "applescript" : 'libnut'
760
- });
761
- if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
762
- else if (modifiers.length > 0) libnut.keyTap(key, modifiers);
763
- else libnut.keyTap(key);
764
- }),
765
- (0, device_namespaceObject.defineActionDragAndDrop)(async (param)=>{
766
- external_node_assert_default()(libnut, 'libnut not initialized');
767
- const from = param.from;
768
- const to = param.to;
769
- external_node_assert_default()(from, 'missing "from" param for drag and drop');
770
- external_node_assert_default()(to, 'missing "to" param for drag and drop');
771
- const [fromX, fromY] = from.center;
772
- const [toX, toY] = to.center;
773
- libnut.moveMouse(Math.round(fromX), Math.round(fromY));
774
- libnut.mouseToggle('down', 'left');
775
- await (0, utils_namespaceObject.sleep)(100);
776
- libnut.moveMouse(Math.round(toX), Math.round(toY));
777
- await (0, utils_namespaceObject.sleep)(100);
778
- libnut.mouseToggle('up', 'left');
779
- }),
780
- (0, device_namespaceObject.defineActionClearInput)(async (param)=>{
781
- external_node_assert_default()(libnut, 'libnut not initialized');
782
- const element = param.locate;
783
- external_node_assert_default()(element, 'Element not found, cannot clear input');
784
- const [x, y] = element.center;
785
- libnut.moveMouse(Math.round(x), Math.round(y));
786
- libnut.mouseClick('left');
787
- await (0, utils_namespaceObject.sleep)(100);
788
- if (this.useAppleScript) {
789
- sendKeyViaAppleScript('a', [
790
- 'command'
791
- ]);
792
- await (0, utils_namespaceObject.sleep)(50);
793
- sendKeyViaAppleScript('backspace', []);
794
- } else {
795
- const modifier = 'darwin' === process.platform ? 'command' : 'control';
796
- libnut.keyTap('a', [
797
- modifier
798
- ]);
799
- libnut.keyTap('backspace');
800
- }
801
- await (0, utils_namespaceObject.sleep)(50);
802
- })
692
+ ...(0, device_namespaceObject.defineActionsFromInputPrimitives)(this.inputPrimitives)
803
693
  ];
804
694
  const platformActions = Object.values(createPlatformActions());
805
695
  const customActions = this.options?.customActions || [];
@@ -837,6 +727,88 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
837
727
  _define_property(this, "xvfbCleanup", void 0);
838
728
  _define_property(this, "useAppleScript", void 0);
839
729
  _define_property(this, "uri", void 0);
730
+ _define_property(this, "inputPrimitives", {
731
+ pointer: {
732
+ tap: async ({ x, y })=>{
733
+ external_node_assert_default()(libnut, 'libnut not initialized');
734
+ const targetX = Math.round(x);
735
+ const targetY = Math.round(y);
736
+ await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
737
+ libnut.mouseToggle('down', 'left');
738
+ await (0, utils_namespaceObject.sleep)(CLICK_HOLD_DURATION);
739
+ libnut.mouseToggle('up', 'left');
740
+ },
741
+ doubleClick: async ({ x, y })=>{
742
+ external_node_assert_default()(libnut, 'libnut not initialized');
743
+ libnut.moveMouse(Math.round(x), Math.round(y));
744
+ libnut.mouseClick('left', true);
745
+ },
746
+ rightClick: async ({ x, y })=>{
747
+ external_node_assert_default()(libnut, 'libnut not initialized');
748
+ libnut.moveMouse(Math.round(x), Math.round(y));
749
+ libnut.mouseClick('right');
750
+ },
751
+ hover: async ({ x, y })=>{
752
+ external_node_assert_default()(libnut, 'libnut not initialized');
753
+ await smoothMoveMouse(Math.round(x), Math.round(y), SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
754
+ await (0, utils_namespaceObject.sleep)(MOUSE_MOVE_EFFECT_WAIT);
755
+ },
756
+ dragAndDrop: async (from, to)=>{
757
+ external_node_assert_default()(libnut, 'libnut not initialized');
758
+ libnut.moveMouse(Math.round(from.x), Math.round(from.y));
759
+ libnut.mouseToggle('down', 'left');
760
+ await (0, utils_namespaceObject.sleep)(100);
761
+ libnut.moveMouse(Math.round(to.x), Math.round(to.y));
762
+ await (0, utils_namespaceObject.sleep)(100);
763
+ libnut.mouseToggle('up', 'left');
764
+ }
765
+ },
766
+ keyboard: {
767
+ typeText: async (value, opts)=>{
768
+ external_node_assert_default()(libnut, 'libnut not initialized');
769
+ const element = opts?.target;
770
+ if (element) {
771
+ const [x, y] = element.center;
772
+ libnut.moveMouse(Math.round(x), Math.round(y));
773
+ libnut.mouseClick('left');
774
+ await (0, utils_namespaceObject.sleep)(INPUT_FOCUS_DELAY);
775
+ if (opts?.replace !== false) {
776
+ await this.selectAllAndDelete();
777
+ await (0, utils_namespaceObject.sleep)(INPUT_CLEAR_DELAY);
778
+ }
779
+ }
780
+ await this.smartTypeString(value);
781
+ },
782
+ keyboardPress: async (keyName, opts)=>{
783
+ external_node_assert_default()(libnut, 'libnut not initialized');
784
+ const target = opts?.target;
785
+ if (target) {
786
+ const [x, y] = target.center;
787
+ libnut.moveMouse(Math.round(x), Math.round(y));
788
+ libnut.mouseClick('left');
789
+ await (0, utils_namespaceObject.sleep)(50);
790
+ }
791
+ await this.pressKeyboardShortcut(keyName);
792
+ },
793
+ clearInput: async (target)=>{
794
+ external_node_assert_default()(libnut, 'libnut not initialized');
795
+ if (target) {
796
+ const element = target;
797
+ const [x, y] = element.center;
798
+ libnut.moveMouse(Math.round(x), Math.round(y));
799
+ libnut.mouseClick('left');
800
+ await (0, utils_namespaceObject.sleep)(100);
801
+ }
802
+ await this.selectAllAndDelete();
803
+ await (0, utils_namespaceObject.sleep)(50);
804
+ }
805
+ },
806
+ scroll: {
807
+ scroll: async (param)=>{
808
+ await this.performScroll(param);
809
+ }
810
+ }
811
+ });
840
812
  this.options = options;
841
813
  this.displayId = options?.displayId;
842
814
  this.useAppleScript = 'darwin' === process.platform && options?.keyboardDriver !== 'libnut';
@@ -851,10 +823,578 @@ function createPlatformActions() {
851
823
  })
852
824
  };
853
825
  }
854
- require("node:events");
855
- require("node:readline");
856
- (0, logger_namespaceObject.getDebug)('rdp:backend');
857
- (0, logger_namespaceObject.getDebug)('rdp:device');
826
+ const external_node_events_namespaceObject = require("node:events");
827
+ const external_node_readline_namespaceObject = require("node:readline");
828
+ const platformBinaryMap = {
829
+ darwin: {
830
+ directory: 'darwin',
831
+ fileName: 'rdp-helper'
832
+ },
833
+ linux: {
834
+ directory: 'linux',
835
+ fileName: 'rdp-helper'
836
+ },
837
+ win32: {
838
+ directory: 'win32',
839
+ fileName: 'rdp-helper.exe'
840
+ }
841
+ };
842
+ function getPlatformBinary(platform) {
843
+ if (platform in platformBinaryMap) return platformBinaryMap[platform];
844
+ }
845
+ function currentDirname() {
846
+ return __dirname;
847
+ }
848
+ function getRdpHelperBinaryPath() {
849
+ const platformBinary = getPlatformBinary(process.platform);
850
+ if (!platformBinary) throw new Error(`@midscene/computer RDP helper does not support platform ${process.platform}`);
851
+ const hereDir = currentDirname();
852
+ const candidateRoots = [
853
+ (0, external_node_path_namespaceObject.resolve)(hereDir, '../..'),
854
+ (0, external_node_path_namespaceObject.resolve)(hereDir, '../../..')
855
+ ];
856
+ for (const root of candidateRoots){
857
+ const binaryPath = (0, external_node_path_namespaceObject.resolve)(root, 'bin', platformBinary.directory, platformBinary.fileName);
858
+ if ((0, external_node_fs_namespaceObject.existsSync)(binaryPath)) return binaryPath;
859
+ }
860
+ throw new Error(`RDP helper binary not found for ${process.platform}. Run \`pnpm --filter @midscene/computer run build:native\` first.`);
861
+ }
862
+ function backend_client_define_property(obj, key, value) {
863
+ if (key in obj) Object.defineProperty(obj, key, {
864
+ value: value,
865
+ enumerable: true,
866
+ configurable: true,
867
+ writable: true
868
+ });
869
+ else obj[key] = value;
870
+ return obj;
871
+ }
872
+ const debug = (0, logger_namespaceObject.getDebug)('rdp:backend');
873
+ const HELPER_SHUTDOWN_TIMEOUT_MS = 3000;
874
+ const MAX_STDERR_LINES = 40;
875
+ class HelperProcessRDPBackendClient {
876
+ async connect(config) {
877
+ this.fatalHelperError = void 0;
878
+ await this.ensureHelperStarted();
879
+ const response = await this.send({
880
+ type: 'connect',
881
+ config
882
+ });
883
+ if ('connected' !== response.type) throw new Error(`Expected connected response, got ${response.type}`);
884
+ this.connected = true;
885
+ this.fatalHelperError = void 0;
886
+ return response.info;
887
+ }
888
+ async disconnect() {
889
+ const child = this.child;
890
+ if (!child) return;
891
+ let disconnectError;
892
+ if (this.connected && null === child.exitCode) try {
893
+ const response = await this.send({
894
+ type: 'disconnect'
895
+ });
896
+ this.expectOk(response, 'disconnect');
897
+ } catch (error) {
898
+ disconnectError = error instanceof Error ? error : new Error(String(error));
899
+ }
900
+ this.connected = false;
901
+ this.fatalHelperError = void 0;
902
+ await this.shutdownHelper();
903
+ if (disconnectError && !/RDP helper exited unexpectedly|RDP helper is not running|RDP helper shut down/u.test(disconnectError.message)) throw disconnectError;
904
+ }
905
+ async screenshotBase64() {
906
+ const response = await this.send({
907
+ type: 'screenshot'
908
+ });
909
+ if ('screenshot' !== response.type) throw new Error(`Expected screenshot response, got ${response.type}`);
910
+ return response.base64;
911
+ }
912
+ async size() {
913
+ const response = await this.send({
914
+ type: 'size'
915
+ });
916
+ if ('size' !== response.type) throw new Error(`Expected size response, got ${response.type}`);
917
+ return response.size;
918
+ }
919
+ async mouseMove(x, y) {
920
+ const response = await this.send({
921
+ type: 'mouseMove',
922
+ x,
923
+ y
924
+ });
925
+ this.expectOk(response, 'mouseMove');
926
+ }
927
+ async mouseButton(button, action) {
928
+ const response = await this.send({
929
+ type: 'mouseButton',
930
+ button,
931
+ action
932
+ });
933
+ this.expectOk(response, 'mouseButton');
934
+ }
935
+ async wheel(direction, amount, x, y) {
936
+ const response = await this.send({
937
+ type: 'wheel',
938
+ direction,
939
+ amount,
940
+ x,
941
+ y
942
+ });
943
+ this.expectOk(response, 'wheel');
944
+ }
945
+ async keyPress(keyName) {
946
+ const response = await this.send({
947
+ type: 'keyPress',
948
+ keyName
949
+ });
950
+ this.expectOk(response, 'keyPress');
951
+ }
952
+ async typeText(text) {
953
+ const response = await this.send({
954
+ type: 'typeText',
955
+ text
956
+ });
957
+ this.expectOk(response, 'typeText');
958
+ }
959
+ async clearInput() {
960
+ const response = await this.send({
961
+ type: 'clearInput'
962
+ });
963
+ this.expectOk(response, 'clearInput');
964
+ }
965
+ async ensureHelperStarted() {
966
+ if (this.child && null === this.child.exitCode) return;
967
+ const helperPath = this.resolveHelperPath();
968
+ debug('starting rdp helper', {
969
+ helperPath
970
+ });
971
+ const child = this.spawnFn(helperPath, [], {
972
+ stdio: [
973
+ 'pipe',
974
+ 'pipe',
975
+ 'pipe'
976
+ ]
977
+ });
978
+ child.stdout.setEncoding('utf8');
979
+ child.stderr.setEncoding('utf8');
980
+ this.child = child;
981
+ this.stderrLines.length = 0;
982
+ this.stdoutReader = (0, external_node_readline_namespaceObject.createInterface)({
983
+ input: child.stdout,
984
+ crlfDelay: 1 / 0
985
+ });
986
+ this.stderrReader = (0, external_node_readline_namespaceObject.createInterface)({
987
+ input: child.stderr,
988
+ crlfDelay: 1 / 0
989
+ });
990
+ this.stdoutReader.on('line', (line)=>{
991
+ this.handleStdoutLine(line);
992
+ });
993
+ this.stderrReader.on('line', (line)=>{
994
+ this.captureStderrLine(line);
995
+ });
996
+ child.on('exit', (code, signal)=>{
997
+ this.connected = false;
998
+ const error = this.createHelperError(`RDP helper exited unexpectedly (code=${code}, signal=${signal})`);
999
+ this.fatalHelperError = error;
1000
+ this.rejectPending(error);
1001
+ this.disposeReaders();
1002
+ this.child = void 0;
1003
+ });
1004
+ child.on('error', (error)=>{
1005
+ this.connected = false;
1006
+ const helperError = this.createHelperError(`Failed to start RDP helper: ${error.message}`);
1007
+ this.fatalHelperError = helperError;
1008
+ this.rejectPending(helperError);
1009
+ this.disposeReaders();
1010
+ this.child = void 0;
1011
+ });
1012
+ }
1013
+ handleStdoutLine(line) {
1014
+ if (!line.trim()) return;
1015
+ let parsed;
1016
+ try {
1017
+ parsed = JSON.parse(line);
1018
+ } catch (error) {
1019
+ const protocolError = this.createHelperError(`RDP helper emitted malformed JSON: ${line}`);
1020
+ this.rejectPending(protocolError);
1021
+ this.shutdownHelper(protocolError);
1022
+ return;
1023
+ }
1024
+ const pending = this.pending.get(parsed.id);
1025
+ if (!pending) return void debug('dropping response for unknown request id', parsed);
1026
+ this.pending.delete(parsed.id);
1027
+ if (parsed.ok) return void pending.resolve(parsed.payload);
1028
+ pending.reject(this.createHelperError(parsed.error.message, parsed.error.code));
1029
+ }
1030
+ captureStderrLine(line) {
1031
+ if (!line.trim()) return;
1032
+ this.stderrLines.push(line);
1033
+ if (this.stderrLines.length > MAX_STDERR_LINES) this.stderrLines.shift();
1034
+ }
1035
+ async send(payload) {
1036
+ if ('connect' !== payload.type && this.fatalHelperError && (!this.child || null !== this.child.exitCode)) throw this.fatalHelperError;
1037
+ await this.ensureHelperStarted();
1038
+ const child = this.child;
1039
+ if (!child || null !== child.exitCode) throw this.createHelperError('RDP helper is not running');
1040
+ const id = `req-${++this.nextRequestId}`;
1041
+ const request = {
1042
+ id,
1043
+ payload
1044
+ };
1045
+ return new Promise((resolve, reject)=>{
1046
+ this.pending.set(id, {
1047
+ resolve,
1048
+ reject
1049
+ });
1050
+ child.stdin.write(`${JSON.stringify(request)}\n`, (error)=>{
1051
+ if (!error) return;
1052
+ this.pending.delete(id);
1053
+ reject(this.createHelperError(`Failed to send ${payload.type} request to RDP helper: ${error.message}`));
1054
+ });
1055
+ });
1056
+ }
1057
+ expectOk(response, actionName) {
1058
+ if ('ok' !== response.type) throw new Error(`Expected ok response for ${actionName}, got ${response.type}`);
1059
+ }
1060
+ rejectPending(error) {
1061
+ for (const { reject } of this.pending.values())reject(error);
1062
+ this.pending.clear();
1063
+ }
1064
+ createHelperError(message, code) {
1065
+ const stderrSummary = this.stderrLines.join('\n').trim();
1066
+ const suffix = stderrSummary ? `\nHelper stderr:\n${stderrSummary}` : '';
1067
+ const error = new Error(`${message}${suffix}`);
1068
+ if (code) error.name = code;
1069
+ return error;
1070
+ }
1071
+ disposeReaders() {
1072
+ this.stdoutReader?.close();
1073
+ this.stderrReader?.close();
1074
+ this.stdoutReader = void 0;
1075
+ this.stderrReader = void 0;
1076
+ }
1077
+ async shutdownHelper(rootError) {
1078
+ const child = this.child;
1079
+ this.child = void 0;
1080
+ this.disposeReaders();
1081
+ if (!child) return;
1082
+ this.rejectPending(rootError || this.createHelperError('RDP helper shut down'));
1083
+ if (null !== child.exitCode) return;
1084
+ child.stdin.end();
1085
+ const exited = Promise.race([
1086
+ (0, external_node_events_namespaceObject.once)(child, 'exit'),
1087
+ new Promise((resolve)=>{
1088
+ setTimeout(()=>resolve('timeout'), HELPER_SHUTDOWN_TIMEOUT_MS);
1089
+ })
1090
+ ]);
1091
+ const result = await exited;
1092
+ if ('timeout' !== result) return;
1093
+ child.kill('SIGTERM');
1094
+ const terminated = Promise.race([
1095
+ (0, external_node_events_namespaceObject.once)(child, 'exit'),
1096
+ new Promise((resolve)=>{
1097
+ setTimeout(()=>resolve('timeout'), HELPER_SHUTDOWN_TIMEOUT_MS);
1098
+ })
1099
+ ]);
1100
+ const terminateResult = await terminated;
1101
+ if ('timeout' !== terminateResult) return;
1102
+ child.kill('SIGKILL');
1103
+ await (0, external_node_events_namespaceObject.once)(child, 'exit');
1104
+ }
1105
+ constructor(options){
1106
+ backend_client_define_property(this, "spawnFn", void 0);
1107
+ backend_client_define_property(this, "resolveHelperPath", void 0);
1108
+ backend_client_define_property(this, "child", void 0);
1109
+ backend_client_define_property(this, "stdoutReader", void 0);
1110
+ backend_client_define_property(this, "stderrReader", void 0);
1111
+ backend_client_define_property(this, "pending", new Map());
1112
+ backend_client_define_property(this, "stderrLines", []);
1113
+ backend_client_define_property(this, "nextRequestId", 0);
1114
+ backend_client_define_property(this, "connected", false);
1115
+ backend_client_define_property(this, "fatalHelperError", void 0);
1116
+ this.spawnFn = options?.spawnFn || external_node_child_process_namespaceObject.spawn;
1117
+ const overridePath = options?.helperPath;
1118
+ this.resolveHelperPath = overridePath ? ()=>overridePath : getRdpHelperBinaryPath;
1119
+ }
1120
+ }
1121
+ function createDefaultRDPBackendClient() {
1122
+ return new HelperProcessRDPBackendClient();
1123
+ }
1124
+ function device_define_property(obj, key, value) {
1125
+ if (key in obj) Object.defineProperty(obj, key, {
1126
+ value: value,
1127
+ enumerable: true,
1128
+ configurable: true,
1129
+ writable: true
1130
+ });
1131
+ else obj[key] = value;
1132
+ return obj;
1133
+ }
1134
+ const device_debug = (0, logger_namespaceObject.getDebug)('rdp:device');
1135
+ const device_SMOOTH_MOVE_STEPS_TAP = 8;
1136
+ const device_SMOOTH_MOVE_STEPS_MOUSE_MOVE = 10;
1137
+ const SMOOTH_MOVE_STEPS_DRAG = 12;
1138
+ const device_SMOOTH_MOVE_DELAY_TAP = 8;
1139
+ const device_SMOOTH_MOVE_DELAY_MOUSE_MOVE = 10;
1140
+ const SMOOTH_MOVE_DELAY_DRAG = 10;
1141
+ const device_MOUSE_MOVE_EFFECT_WAIT = 300;
1142
+ const device_CLICK_HOLD_DURATION = 50;
1143
+ const DRAG_HOLD_DURATION = 100;
1144
+ const device_INPUT_FOCUS_DELAY = 300;
1145
+ const device_INPUT_CLEAR_DELAY = 150;
1146
+ const device_SCROLL_STEP_DELAY = 100;
1147
+ const device_SCROLL_COMPLETE_DELAY = 500;
1148
+ const DEFAULT_SCROLL_DISTANCE = 480;
1149
+ const device_EDGE_SCROLL_STEPS = 10;
1150
+ const DEFAULT_SCROLL_STEP_AMOUNT = 120;
1151
+ class RDPDevice {
1152
+ describe() {
1153
+ const port = this.options.port || 3389;
1154
+ const username = this.options.username ? ` as ${this.options.username}` : '';
1155
+ const session = this.connectionInfo?.sessionId ? ` [session ${this.connectionInfo.sessionId}]` : '';
1156
+ return `RDP Device ${this.options.host}:${port}${username}${session}`;
1157
+ }
1158
+ async connect() {
1159
+ this.throwIfDestroyed();
1160
+ device_debug('connecting to rdp backend', {
1161
+ host: this.options.host,
1162
+ port: this.options.port,
1163
+ username: this.options.username
1164
+ });
1165
+ this.connectionInfo = await this.backend.connect(this.options);
1166
+ this.cursorPosition = [
1167
+ Math.round(this.connectionInfo.size.width / 2),
1168
+ Math.round(this.connectionInfo.size.height / 2)
1169
+ ];
1170
+ }
1171
+ async screenshotBase64() {
1172
+ this.assertConnected();
1173
+ return this.backend.screenshotBase64();
1174
+ }
1175
+ async size() {
1176
+ this.assertConnected();
1177
+ return this.backend.size();
1178
+ }
1179
+ async destroy() {
1180
+ if (this.destroyed) return;
1181
+ this.destroyed = true;
1182
+ this.connectionInfo = void 0;
1183
+ this.cursorPosition = void 0;
1184
+ await this.backend.disconnect();
1185
+ }
1186
+ actionSpace() {
1187
+ const defaultActions = [
1188
+ ...(0, device_namespaceObject.defineActionsFromInputPrimitives)(this.inputPrimitives),
1189
+ (0, device_namespaceObject.defineAction)({
1190
+ name: 'ListDisplays',
1191
+ description: 'List all available displays/monitors',
1192
+ call: async ()=>{
1193
+ this.assertConnected();
1194
+ const size = await this.size();
1195
+ return [
1196
+ {
1197
+ id: this.connectionInfo?.sessionId || this.options.host,
1198
+ name: `RDP ${this.connectionInfo?.server || this.options.host} (${size.width}x${size.height})`,
1199
+ primary: true
1200
+ }
1201
+ ];
1202
+ }
1203
+ })
1204
+ ];
1205
+ return [
1206
+ ...defaultActions,
1207
+ ...this.options.customActions || []
1208
+ ];
1209
+ }
1210
+ assertConnected() {
1211
+ this.throwIfDestroyed();
1212
+ if (!this.connectionInfo) throw new Error('RDPDevice is not connected');
1213
+ }
1214
+ throwIfDestroyed() {
1215
+ if (this.destroyed) throw new Error('RDPDevice has been destroyed');
1216
+ }
1217
+ async moveToElement(element, options) {
1218
+ this.assertConnected();
1219
+ const targetX = Math.round(element.center[0]);
1220
+ const targetY = Math.round(element.center[1]);
1221
+ await this.movePointer(targetX, targetY, options);
1222
+ }
1223
+ async clearInput() {
1224
+ if (this.backend.clearInput) return void await this.backend.clearInput();
1225
+ await this.backend.keyPress('Control+A');
1226
+ await this.backend.keyPress('Backspace');
1227
+ }
1228
+ edgeScrollDirection(scrollType) {
1229
+ switch(scrollType){
1230
+ case 'scrollToTop':
1231
+ return 'up';
1232
+ case 'scrollToBottom':
1233
+ return 'down';
1234
+ case 'scrollToLeft':
1235
+ return 'left';
1236
+ case 'scrollToRight':
1237
+ return 'right';
1238
+ case 'singleAction':
1239
+ return 'down';
1240
+ default:
1241
+ throw new Error(`Unsupported scroll type: ${scrollType}`);
1242
+ }
1243
+ }
1244
+ async movePointer(targetX, targetY, options) {
1245
+ this.assertConnected();
1246
+ const start = this.cursorPosition || [
1247
+ targetX,
1248
+ targetY
1249
+ ];
1250
+ const steps = Math.max(1, options?.steps || 1);
1251
+ const stepDelayMs = options?.stepDelayMs || 0;
1252
+ for(let step = 1; step <= steps; step++){
1253
+ const x = Math.round(start[0] + (targetX - start[0]) * step / steps);
1254
+ const y = Math.round(start[1] + (targetY - start[1]) * step / steps);
1255
+ await this.backend.mouseMove(x, y);
1256
+ this.cursorPosition = [
1257
+ x,
1258
+ y
1259
+ ];
1260
+ if (stepDelayMs > 0 && step < steps) await (0, utils_namespaceObject.sleep)(stepDelayMs);
1261
+ }
1262
+ if (options?.settleDelayMs) await (0, utils_namespaceObject.sleep)(options.settleDelayMs);
1263
+ }
1264
+ async performWheel(direction, amount, x, y) {
1265
+ let remaining = Math.abs(amount);
1266
+ if (0 === remaining) remaining = DEFAULT_SCROLL_STEP_AMOUNT;
1267
+ while(remaining > 0){
1268
+ const chunk = Math.min(remaining, DEFAULT_SCROLL_STEP_AMOUNT);
1269
+ await this.backend.wheel(direction, chunk, x, y);
1270
+ remaining -= chunk;
1271
+ if (remaining > 0) await (0, utils_namespaceObject.sleep)(device_SCROLL_STEP_DELAY);
1272
+ }
1273
+ }
1274
+ constructor(options){
1275
+ device_define_property(this, "interfaceType", 'rdp');
1276
+ device_define_property(this, "options", void 0);
1277
+ device_define_property(this, "backend", void 0);
1278
+ device_define_property(this, "connectionInfo", void 0);
1279
+ device_define_property(this, "destroyed", false);
1280
+ device_define_property(this, "cursorPosition", void 0);
1281
+ device_define_property(this, "uri", void 0);
1282
+ device_define_property(this, "inputPrimitives", {
1283
+ pointer: {
1284
+ tap: async ({ x, y })=>{
1285
+ await this.movePointer(Math.round(x), Math.round(y), {
1286
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1287
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1288
+ });
1289
+ await this.backend.mouseButton('left', 'down');
1290
+ await (0, utils_namespaceObject.sleep)(device_CLICK_HOLD_DURATION);
1291
+ await this.backend.mouseButton('left', 'up');
1292
+ },
1293
+ doubleClick: async ({ x, y })=>{
1294
+ await this.movePointer(Math.round(x), Math.round(y), {
1295
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1296
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1297
+ });
1298
+ await this.backend.mouseButton('left', 'doubleClick');
1299
+ },
1300
+ rightClick: async ({ x, y })=>{
1301
+ await this.movePointer(Math.round(x), Math.round(y), {
1302
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1303
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1304
+ });
1305
+ await this.backend.mouseButton('right', 'click');
1306
+ },
1307
+ hover: async ({ x, y })=>{
1308
+ await this.movePointer(Math.round(x), Math.round(y), {
1309
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1310
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
1311
+ settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
1312
+ });
1313
+ },
1314
+ dragAndDrop: async (from, to)=>{
1315
+ await this.movePointer(Math.round(from.x), Math.round(from.y), {
1316
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1317
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1318
+ });
1319
+ await this.backend.mouseButton('left', 'down');
1320
+ await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1321
+ await this.movePointer(Math.round(to.x), Math.round(to.y), {
1322
+ steps: SMOOTH_MOVE_STEPS_DRAG,
1323
+ stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
1324
+ });
1325
+ await (0, utils_namespaceObject.sleep)(DRAG_HOLD_DURATION);
1326
+ await this.backend.mouseButton('left', 'up');
1327
+ }
1328
+ },
1329
+ keyboard: {
1330
+ typeText: async (value, opts)=>{
1331
+ this.assertConnected();
1332
+ const target = opts?.target;
1333
+ if (target) {
1334
+ await this.inputPrimitives.pointer.tap({
1335
+ x: target.center[0],
1336
+ y: target.center[1]
1337
+ });
1338
+ await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1339
+ }
1340
+ if (opts?.replace !== false) {
1341
+ await this.clearInput();
1342
+ await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1343
+ }
1344
+ if (opts?.focusOnly || !value) return;
1345
+ await this.backend.typeText(value);
1346
+ },
1347
+ clearInput: async (target)=>{
1348
+ this.assertConnected();
1349
+ const element = target;
1350
+ if (element) {
1351
+ await this.inputPrimitives.pointer.tap({
1352
+ x: element.center[0],
1353
+ y: element.center[1]
1354
+ });
1355
+ await (0, utils_namespaceObject.sleep)(device_INPUT_FOCUS_DELAY);
1356
+ }
1357
+ await this.clearInput();
1358
+ await (0, utils_namespaceObject.sleep)(device_INPUT_CLEAR_DELAY);
1359
+ },
1360
+ keyboardPress: async (keyName, opts)=>{
1361
+ this.assertConnected();
1362
+ const target = opts?.target;
1363
+ if (target) await this.inputPrimitives.pointer.tap({
1364
+ x: target.center[0],
1365
+ y: target.center[1]
1366
+ });
1367
+ await this.backend.keyPress(keyName);
1368
+ }
1369
+ },
1370
+ scroll: {
1371
+ scroll: async (param)=>{
1372
+ this.assertConnected();
1373
+ const target = param.locate;
1374
+ if (target) await this.moveToElement(target, {
1375
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1376
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
1377
+ });
1378
+ if (param.scrollType && 'singleAction' !== param.scrollType) {
1379
+ const direction = this.edgeScrollDirection(param.scrollType);
1380
+ for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1381
+ await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1382
+ return;
1383
+ }
1384
+ await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1385
+ await (0, utils_namespaceObject.sleep)(device_SCROLL_COMPLETE_DELAY);
1386
+ }
1387
+ }
1388
+ });
1389
+ this.options = {
1390
+ port: 3389,
1391
+ securityProtocol: 'auto',
1392
+ ignoreCertificate: false,
1393
+ ...options
1394
+ };
1395
+ this.backend = options.backend || createDefaultRDPBackendClient();
1396
+ }
1397
+ }
858
1398
  class ComputerAgent extends agent_namespaceObject.Agent {
859
1399
  }
860
1400
  function createLocalComputerDevice(opts) {
@@ -866,12 +1406,34 @@ function createLocalComputerDevice(opts) {
866
1406
  xvfbResolution: opts?.xvfbResolution
867
1407
  });
868
1408
  }
1409
+ function createRDPComputerDevice(opts) {
1410
+ return new RDPDevice({
1411
+ host: opts.host,
1412
+ port: opts.port,
1413
+ username: opts.username,
1414
+ password: opts.password,
1415
+ domain: opts.domain,
1416
+ adminSession: opts.adminSession,
1417
+ ignoreCertificate: opts.ignoreCertificate,
1418
+ securityProtocol: opts.securityProtocol,
1419
+ desktopWidth: opts.desktopWidth,
1420
+ desktopHeight: opts.desktopHeight,
1421
+ backend: opts.backend,
1422
+ customActions: opts.customActions
1423
+ });
1424
+ }
869
1425
  async function agentForComputer(opts) {
870
1426
  const device = createLocalComputerDevice(opts);
871
1427
  await device.connect();
872
1428
  return new ComputerAgent(device, opts);
873
1429
  }
874
1430
  const agentFromComputer = agentForComputer;
1431
+ async function agentForRDPComputer(opts) {
1432
+ const device = createRDPComputerDevice(opts);
1433
+ await device.connect();
1434
+ return new ComputerAgent(device, opts);
1435
+ }
1436
+ const core_namespaceObject = require("@midscene/core");
875
1437
  const base_tools_namespaceObject = require("@midscene/shared/mcp/base-tools");
876
1438
  function mcp_tools_define_property(obj, key, value) {
877
1439
  if (key in obj) Object.defineProperty(obj, key, {
@@ -884,10 +1446,61 @@ function mcp_tools_define_property(obj, key, value) {
884
1446
  return obj;
885
1447
  }
886
1448
  const mcp_tools_debug = (0, logger_namespaceObject.getDebug)('mcp:computer-tools');
1449
+ const RDP_SECURITY_PROTOCOLS = [
1450
+ 'auto',
1451
+ 'tls',
1452
+ 'nla',
1453
+ 'rdp'
1454
+ ];
887
1455
  const computerInitArgShape = {
888
- displayId: core_namespaceObject.z.string().optional().describe('Display ID (from computer_list_displays)'),
889
- headless: core_namespaceObject.z.boolean().optional().describe('Start virtual display via Xvfb (Linux only)')
1456
+ displayId: core_namespaceObject.z.string().optional().describe('Display ID for local mode (from computer_list_displays). Ignored when host is set.'),
1457
+ headless: core_namespaceObject.z.boolean().optional().describe('Start virtual display via Xvfb (Linux local mode only). Ignored when host is set.'),
1458
+ host: core_namespaceObject.z.string().optional().describe('RDP host (FQDN or IP). Set this to switch into RDP mode.'),
1459
+ port: core_namespaceObject.z.number().optional().describe('RDP port (default 3389). Requires host.'),
1460
+ username: core_namespaceObject.z.string().optional().describe('RDP username. Requires host.'),
1461
+ password: core_namespaceObject.z.string().optional().describe('RDP password. Requires host. Prefer setting via environment or a secrets manager.'),
1462
+ domain: core_namespaceObject.z.string().optional().describe('RDP domain. Requires host.'),
1463
+ adminSession: core_namespaceObject.z.boolean().optional().describe('Attach to the RDP admin/console session. Requires host.'),
1464
+ ignoreCertificate: core_namespaceObject.z.boolean().optional().describe('Skip TLS certificate validation. Requires host.'),
1465
+ securityProtocol: core_namespaceObject.z["enum"](RDP_SECURITY_PROTOCOLS).optional().describe('RDP security protocol negotiation (default auto). Requires host.'),
1466
+ desktopWidth: core_namespaceObject.z.number().optional().describe('Remote desktop width in pixels. Requires host.'),
1467
+ desktopHeight: core_namespaceObject.z.number().optional().describe('Remote desktop height in pixels. Requires host.')
890
1468
  };
1469
+ function adaptComputerInitArgs(extracted) {
1470
+ if (!extracted || 0 === Object.keys(extracted).length) return;
1471
+ if (extracted.host) {
1472
+ const { displayId: _d, headless: _h, ...rdpFields } = extracted;
1473
+ return {
1474
+ mode: 'rdp',
1475
+ ...rdpFields,
1476
+ host: extracted.host
1477
+ };
1478
+ }
1479
+ return {
1480
+ mode: 'local',
1481
+ displayId: extracted.displayId,
1482
+ headless: extracted.headless
1483
+ };
1484
+ }
1485
+ function shouldRetargetAgent(opts) {
1486
+ if (!opts) return false;
1487
+ if ('rdp' === opts.mode) return true;
1488
+ return void 0 !== opts.displayId || void 0 !== opts.headless;
1489
+ }
1490
+ function describeConnectTarget(opts) {
1491
+ if (opts?.mode === 'rdp') {
1492
+ const portSuffix = opts.port ? `:${opts.port}` : '';
1493
+ const userSuffix = opts.username ? ` as ${opts.username}` : '';
1494
+ return ` via RDP (${opts.host}${portSuffix}${userSuffix})`;
1495
+ }
1496
+ if (opts?.mode === 'local' && opts.displayId) return ` (Display: ${opts.displayId})`;
1497
+ return ' (Primary display)';
1498
+ }
1499
+ function getCliReportSessionTarget(opts) {
1500
+ if (opts?.mode === 'rdp') return `rdp:${opts.host}`;
1501
+ if (opts?.mode === 'local' && opts.displayId) return opts.displayId;
1502
+ return 'primary';
1503
+ }
891
1504
  class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools {
892
1505
  getCliReportSessionName() {
893
1506
  return 'midscene-computer';
@@ -896,9 +1509,7 @@ class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools
896
1509
  return new ComputerDevice({});
897
1510
  }
898
1511
  async ensureAgent(opts) {
899
- const displayId = opts?.displayId;
900
- const headless = opts?.headless;
901
- if (this.agent && (void 0 !== displayId || void 0 !== headless)) {
1512
+ if (this.agent && shouldRetargetAgent(opts)) {
902
1513
  try {
903
1514
  await this.agent.destroy?.();
904
1515
  } catch (error) {
@@ -907,8 +1518,20 @@ class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools
907
1518
  this.agent = void 0;
908
1519
  }
909
1520
  if (this.agent) return this.agent;
910
- mcp_tools_debug('Creating Computer agent with displayId:', displayId || 'primary');
911
1521
  const reportOptions = this.readCliReportAgentOptions();
1522
+ if (opts?.mode === 'rdp') {
1523
+ mcp_tools_debug('Creating RDP Computer agent for host:', opts.host);
1524
+ const { mode: _mode, ...rdpFields } = opts;
1525
+ const agent = await agentForRDPComputer({
1526
+ ...rdpFields,
1527
+ ...reportOptions ?? {}
1528
+ });
1529
+ this.agent = agent;
1530
+ return agent;
1531
+ }
1532
+ const displayId = opts?.mode === 'local' ? opts.displayId : void 0;
1533
+ const headless = opts?.mode === 'local' ? opts.headless : void 0;
1534
+ mcp_tools_debug('Creating Computer agent with displayId:', displayId || 'primary');
912
1535
  const agentOpts = {
913
1536
  ...displayId ? {
914
1537
  displayId
@@ -926,12 +1549,12 @@ class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools
926
1549
  return [
927
1550
  {
928
1551
  name: 'computer_connect',
929
- description: 'Connect to computer desktop. Provide displayId to connect to a specific display (use computer_list_displays to get available IDs). If not provided, uses the primary display.',
1552
+ description: "Connect to a computer desktop. Default (local) mode controls the local machine; pass displayId to target a specific local display (see computer_list_displays). Pass host to switch to RDP mode and connect to a remote Windows desktop via the RDP helper binary. RDP-related options (port/username/password/domain/securityProtocol/ignoreCertificate/adminSession/desktopWidth/desktopHeight) only take effect when host is set.",
930
1553
  schema: this.getAgentInitArgSchema(),
931
1554
  cli: this.getAgentInitArgCliMetadata(),
932
1555
  handler: async (args)=>{
933
1556
  const initArgs = this.extractAgentInitParam(args);
934
- const reportSession = this.createNewCliReportSession(initArgs?.displayId ?? 'primary');
1557
+ const reportSession = this.createNewCliReportSession(getCliReportSessionTarget(initArgs));
935
1558
  this.commitCliReportSession(reportSession);
936
1559
  if (this.agent) {
937
1560
  try {
@@ -947,7 +1570,7 @@ class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools
947
1570
  content: [
948
1571
  {
949
1572
  type: 'text',
950
- text: `Connected to computer${initArgs?.displayId ? ` (Display: ${initArgs.displayId})` : ' (Primary display)'}`
1573
+ text: `Connected to computer${describeConnectTarget(initArgs)}`
951
1574
  },
952
1575
  ...this.buildScreenshotContent(screenshot)
953
1576
  ]
@@ -985,7 +1608,7 @@ class ComputerMidsceneTools extends base_tools_namespaceObject.BaseMidsceneTools
985
1608
  cli: {
986
1609
  preferBareKeys: true
987
1610
  },
988
- adapt: (extracted)=>extracted
1611
+ adapt: (extracted)=>adaptComputerInitArgs(extracted)
989
1612
  });
990
1613
  }
991
1614
  }
@@ -996,7 +1619,7 @@ class ComputerMCPServer extends mcp_namespaceObject.BaseMCPServer {
996
1619
  constructor(toolsManager){
997
1620
  super({
998
1621
  name: '@midscene/computer-mcp',
999
- version: "1.8.0",
1622
+ version: "1.8.1",
1000
1623
  description: 'Control the computer desktop using natural language commands'
1001
1624
  }, toolsManager);
1002
1625
  }