@midscene/computer 1.8.1-beta-20260513084557.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/es/cli.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { createReportCliCommands, getMidsceneLocationSchema, z } from "@midscene/core";
1
+ import { createReportCliCommands, z } from "@midscene/core";
2
2
  import { reportCLIError, runToolsCLI } from "@midscene/shared/cli";
3
3
  import { getDebug } from "@midscene/shared/logger";
4
4
  import { BaseMidsceneTools } from "@midscene/shared/mcp/base-tools";
@@ -9,7 +9,7 @@ import { existsSync } from "node:fs";
9
9
  import { createRequire } from "node:module";
10
10
  import { dirname, resolve as external_node_path_resolve } from "node:path";
11
11
  import { fileURLToPath } from "node:url";
12
- import { actionHoverParamSchema, actionTapParamSchema, defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionHover, defineActionInput, defineActionKeyboardPress, defineActionRightClick, defineActionScroll, defineActionTap } from "@midscene/core/device";
12
+ import { defineAction, defineActionsFromInputPrimitives } from "@midscene/core/device";
13
13
  import { sleep } from "@midscene/core/utils";
14
14
  import { createImgBase64ByFormat } from "@midscene/shared/img";
15
15
  import screenshot_desktop from "screenshot-desktop";
@@ -92,15 +92,6 @@ function _define_property(obj, key, value) {
92
92
  else obj[key] = value;
93
93
  return obj;
94
94
  }
95
- const computerInputParamSchema = z.object({
96
- value: z.string().describe('The text to input'),
97
- mode: z["enum"]([
98
- 'replace',
99
- 'clear',
100
- 'append'
101
- ]).default('replace').optional().describe('Input mode: replace, clear, or append'),
102
- locate: getMidsceneLocationSchema().describe('The input field to be filled').optional()
103
- });
104
95
  const SMOOTH_MOVE_STEPS_TAP = 8;
105
96
  const SMOOTH_MOVE_STEPS_MOUSE_MOVE = 10;
106
97
  const SMOOTH_MOVE_DELAY_TAP = 8;
@@ -411,7 +402,7 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
411
402
  }
412
403
  async healthCheck() {
413
404
  console.log('[HealthCheck] Starting health check...');
414
- console.log("[HealthCheck] @midscene/computer v1.8.1-beta-20260513084557.0");
405
+ console.log("[HealthCheck] @midscene/computer v1.8.1");
415
406
  console.log('[HealthCheck] Taking screenshot...');
416
407
  const screenshotTimeout = 15000;
417
408
  let timeoutId;
@@ -477,21 +468,38 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
477
468
  debugDevice('Taking screenshot', {
478
469
  displayId: this.displayId
479
470
  });
480
- try {
481
- const options = {
482
- format: 'png'
483
- };
484
- if (void 0 !== this.displayId) if ('darwin' === process.platform) {
485
- const screenIndex = Number(this.displayId);
486
- if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
487
- } else options.screen = this.displayId;
488
- debugDevice('Screenshot options', options);
471
+ const options = {
472
+ format: 'png'
473
+ };
474
+ if (void 0 !== this.displayId) if ('darwin' === process.platform) {
475
+ const screenIndex = Number(this.displayId);
476
+ if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
477
+ } else options.screen = this.displayId;
478
+ debugDevice('Screenshot options', options);
479
+ const MAX_ATTEMPTS = 3;
480
+ const RETRY_DELAY_MS = 300;
481
+ let lastRawMessage = '';
482
+ for(let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++)try {
489
483
  const buffer = await screenshot_desktop(options);
484
+ if (attempt > 1) debugDevice(`Screenshot succeeded on attempt ${attempt}`);
490
485
  return createImgBase64ByFormat('png', buffer.toString('base64'));
491
486
  } catch (error) {
492
- debugDevice(`Screenshot failed: ${error}`);
493
- throw new Error(`Failed to take screenshot: ${error}`);
487
+ lastRawMessage = error instanceof Error ? error.message : String(error);
488
+ const isMacTransient = 'darwin' === process.platform && /could not create image from display/i.test(lastRawMessage);
489
+ const willRetry = isMacTransient && attempt < MAX_ATTEMPTS;
490
+ debugDevice(`Screenshot attempt ${attempt} failed: ${lastRawMessage}${willRetry ? ' — retrying' : ''}`);
491
+ if (!willRetry) break;
492
+ await sleep(RETRY_DELAY_MS);
494
493
  }
494
+ if ('darwin' === process.platform && /could not create image from display/i.test(lastRawMessage)) throw new Error(`Failed to take screenshot on macOS: the host process is missing Screen Recording permission, or the target display is locked/sleeping.
495
+
496
+ Please follow these steps:
497
+ 1. Open System Settings > Privacy & Security > Screen Recording
498
+ 2. Enable the application running this script (e.g., Terminal, iTerm2, VS Code, WebStorm, or Midscene Studio)
499
+ 3. Fully quit and relaunch that application after granting permission — macOS only re-reads this permission on process launch.
500
+
501
+ Original error: ${lastRawMessage}`);
502
+ throw new Error(`Failed to take screenshot: ${lastRawMessage}`);
495
503
  }
496
504
  async size() {
497
505
  node_assert(libnut, 'libnut not initialized');
@@ -537,228 +545,111 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
537
545
  node_assert(libnut, 'libnut not initialized');
538
546
  await this.typeViaClipboard(text);
539
547
  }
548
+ async selectAllAndDelete() {
549
+ node_assert(libnut, 'libnut not initialized');
550
+ if (this.useAppleScript) {
551
+ sendKeyViaAppleScript('a', [
552
+ 'command'
553
+ ]);
554
+ await sleep(50);
555
+ sendKeyViaAppleScript('backspace', []);
556
+ return;
557
+ }
558
+ const modifier = 'darwin' === process.platform ? 'command' : 'control';
559
+ libnut.keyTap('a', [
560
+ modifier
561
+ ]);
562
+ await sleep(50);
563
+ libnut.keyTap('backspace');
564
+ }
565
+ async pressKeyboardShortcut(keyName) {
566
+ node_assert(libnut, 'libnut not initialized');
567
+ const keys = keyName.split('+');
568
+ const modifiers = keys.slice(0, -1).map(normalizeKeyName);
569
+ const key = normalizePrimaryKey(keys[keys.length - 1]);
570
+ debugDevice('KeyboardPress', {
571
+ original: keyName,
572
+ key,
573
+ modifiers,
574
+ driver: this.useAppleScript ? "applescript" : 'libnut'
575
+ });
576
+ if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
577
+ else if (modifiers.length > 0) libnut.keyTap(key, modifiers);
578
+ else libnut.keyTap(key);
579
+ }
580
+ async performScroll(param) {
581
+ node_assert(libnut, 'libnut not initialized');
582
+ if (param.locate) {
583
+ const element = param.locate;
584
+ const [x, y] = element.center;
585
+ libnut.moveMouse(Math.round(x), Math.round(y));
586
+ }
587
+ const scrollType = param?.scrollType;
588
+ const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
589
+ if (edgeSpec) {
590
+ if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await sleep(SCROLL_COMPLETE_DELAY);
591
+ if (this.useAppleScript) {
592
+ sendKeyViaAppleScript(edgeSpec.key);
593
+ await sleep(SCROLL_COMPLETE_DELAY);
594
+ return;
595
+ }
596
+ const [dx, dy] = edgeSpec.libnut;
597
+ for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
598
+ libnut.scrollMouse(dx, dy);
599
+ await sleep(SCROLL_STEP_DELAY);
600
+ }
601
+ return;
602
+ }
603
+ if ('singleAction' === scrollType || !scrollType) {
604
+ const distance = param?.distance || 500;
605
+ const direction = param?.direction || 'down';
606
+ const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
607
+ if (isKnownDirection) {
608
+ const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
609
+ if (runPhasedScroll(direction, distance, steps)) return void await sleep(SCROLL_COMPLETE_DELAY);
610
+ }
611
+ if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
612
+ const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
613
+ const key = 'up' === direction ? 'pageup' : 'pagedown';
614
+ for(let i = 0; i < pages; i++){
615
+ sendKeyViaAppleScript(key);
616
+ await sleep(SCROLL_STEP_DELAY);
617
+ }
618
+ await sleep(SCROLL_COMPLETE_DELAY);
619
+ return;
620
+ }
621
+ const ticks = Math.ceil(distance / 100);
622
+ const directionMap = {
623
+ up: [
624
+ 0,
625
+ ticks
626
+ ],
627
+ down: [
628
+ 0,
629
+ -ticks
630
+ ],
631
+ left: [
632
+ -ticks,
633
+ 0
634
+ ],
635
+ right: [
636
+ ticks,
637
+ 0
638
+ ]
639
+ };
640
+ const [dx, dy] = directionMap[direction] || [
641
+ 0,
642
+ -ticks
643
+ ];
644
+ libnut.scrollMouse(dx, dy);
645
+ await sleep(SCROLL_COMPLETE_DELAY);
646
+ return;
647
+ }
648
+ throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
649
+ }
540
650
  actionSpace() {
541
651
  const defaultActions = [
542
- defineActionTap(async (param)=>{
543
- node_assert(libnut, 'libnut not initialized');
544
- const element = param.locate;
545
- node_assert(element, 'Element not found, cannot tap');
546
- const [x, y] = element.center;
547
- const targetX = Math.round(x);
548
- const targetY = Math.round(y);
549
- await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
550
- libnut.mouseToggle('down', 'left');
551
- await sleep(CLICK_HOLD_DURATION);
552
- libnut.mouseToggle('up', 'left');
553
- }),
554
- defineActionDoubleClick(async (param)=>{
555
- node_assert(libnut, 'libnut not initialized');
556
- const element = param.locate;
557
- node_assert(element, 'Element not found, cannot double click');
558
- const [x, y] = element.center;
559
- libnut.moveMouse(Math.round(x), Math.round(y));
560
- libnut.mouseClick('left', true);
561
- }),
562
- defineActionRightClick(async (param)=>{
563
- node_assert(libnut, 'libnut not initialized');
564
- const element = param.locate;
565
- node_assert(element, 'Element not found, cannot right click');
566
- const [x, y] = element.center;
567
- libnut.moveMouse(Math.round(x), Math.round(y));
568
- libnut.mouseClick('right');
569
- }),
570
- defineAction({
571
- name: 'MouseMove',
572
- description: 'Move the mouse to the element',
573
- interfaceAlias: 'aiHover',
574
- paramSchema: actionHoverParamSchema,
575
- sample: {
576
- locate: {
577
- prompt: 'the navigation menu item "Products"'
578
- }
579
- },
580
- call: async (param)=>{
581
- node_assert(libnut, 'libnut not initialized');
582
- const element = param.locate;
583
- node_assert(element, 'Element not found, cannot move mouse');
584
- const [x, y] = element.center;
585
- const targetX = Math.round(x);
586
- const targetY = Math.round(y);
587
- await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
588
- await sleep(MOUSE_MOVE_EFFECT_WAIT);
589
- }
590
- }),
591
- defineAction({
592
- name: 'Input',
593
- description: 'Input text into the input field',
594
- interfaceAlias: 'aiInput',
595
- paramSchema: computerInputParamSchema,
596
- sample: {
597
- value: 'test@example.com',
598
- locate: {
599
- prompt: 'the email input field'
600
- }
601
- },
602
- call: async (param)=>{
603
- node_assert(libnut, 'libnut not initialized');
604
- const element = param.locate;
605
- if (element) {
606
- const [x, y] = element.center;
607
- libnut.moveMouse(Math.round(x), Math.round(y));
608
- libnut.mouseClick('left');
609
- await sleep(INPUT_FOCUS_DELAY);
610
- if ('append' !== param.mode) {
611
- if (this.useAppleScript) {
612
- sendKeyViaAppleScript('a', [
613
- 'command'
614
- ]);
615
- await sleep(50);
616
- sendKeyViaAppleScript('backspace', []);
617
- } else {
618
- const modifier = 'darwin' === process.platform ? 'command' : 'control';
619
- libnut.keyTap('a', [
620
- modifier
621
- ]);
622
- await sleep(50);
623
- libnut.keyTap('backspace');
624
- }
625
- await sleep(INPUT_CLEAR_DELAY);
626
- }
627
- }
628
- if ('clear' === param.mode) return;
629
- if (!param.value) return;
630
- await this.smartTypeString(param.value);
631
- }
632
- }),
633
- defineActionScroll(async (param)=>{
634
- node_assert(libnut, 'libnut not initialized');
635
- if (param.locate) {
636
- const element = param.locate;
637
- const [x, y] = element.center;
638
- libnut.moveMouse(Math.round(x), Math.round(y));
639
- }
640
- const scrollType = param?.scrollType;
641
- const edgeSpec = scrollType && scrollType in EDGE_SCROLL_SPEC ? EDGE_SCROLL_SPEC[scrollType] : null;
642
- if (edgeSpec) {
643
- if (runPhasedScroll(edgeSpec.direction, EDGE_SCROLL_TOTAL_PX, EDGE_SCROLL_STEPS)) return void await sleep(SCROLL_COMPLETE_DELAY);
644
- if (this.useAppleScript) {
645
- sendKeyViaAppleScript(edgeSpec.key);
646
- await sleep(SCROLL_COMPLETE_DELAY);
647
- return;
648
- }
649
- const [dx, dy] = edgeSpec.libnut;
650
- for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
651
- libnut.scrollMouse(dx, dy);
652
- await sleep(SCROLL_STEP_DELAY);
653
- }
654
- return;
655
- }
656
- if ('singleAction' === scrollType || !scrollType) {
657
- const distance = param?.distance || 500;
658
- const direction = param?.direction || 'down';
659
- const isKnownDirection = 'up' === direction || 'down' === direction || 'left' === direction || 'right' === direction;
660
- if (isKnownDirection) {
661
- const steps = Math.max(PHASED_MIN_STEPS, Math.round(distance / PHASED_PIXELS_PER_STEP));
662
- if (runPhasedScroll(direction, distance, steps)) return void await sleep(SCROLL_COMPLETE_DELAY);
663
- }
664
- if (this.useAppleScript && ('up' === direction || 'down' === direction)) {
665
- const pages = Math.max(1, Math.round(distance / APPROX_VIEWPORT_HEIGHT_PX));
666
- const key = 'up' === direction ? 'pageup' : 'pagedown';
667
- for(let i = 0; i < pages; i++){
668
- sendKeyViaAppleScript(key);
669
- await sleep(SCROLL_STEP_DELAY);
670
- }
671
- await sleep(SCROLL_COMPLETE_DELAY);
672
- return;
673
- }
674
- const ticks = Math.ceil(distance / 100);
675
- const directionMap = {
676
- up: [
677
- 0,
678
- ticks
679
- ],
680
- down: [
681
- 0,
682
- -ticks
683
- ],
684
- left: [
685
- -ticks,
686
- 0
687
- ],
688
- right: [
689
- ticks,
690
- 0
691
- ]
692
- };
693
- const [dx, dy] = directionMap[direction] || [
694
- 0,
695
- -ticks
696
- ];
697
- libnut.scrollMouse(dx, dy);
698
- await sleep(SCROLL_COMPLETE_DELAY);
699
- return;
700
- }
701
- throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
702
- }),
703
- defineActionKeyboardPress(async (param)=>{
704
- node_assert(libnut, 'libnut not initialized');
705
- if (param.locate) {
706
- const [x, y] = param.locate.center;
707
- libnut.moveMouse(Math.round(x), Math.round(y));
708
- libnut.mouseClick('left');
709
- await sleep(50);
710
- }
711
- const keys = param.keyName.split('+');
712
- const modifiers = keys.slice(0, -1).map(normalizeKeyName);
713
- const key = normalizePrimaryKey(keys[keys.length - 1]);
714
- debugDevice('KeyboardPress', {
715
- original: param.keyName,
716
- key,
717
- modifiers,
718
- driver: this.useAppleScript ? "applescript" : 'libnut'
719
- });
720
- if (this.useAppleScript) sendKeyViaAppleScript(key, modifiers);
721
- else if (modifiers.length > 0) libnut.keyTap(key, modifiers);
722
- else libnut.keyTap(key);
723
- }),
724
- defineActionDragAndDrop(async (param)=>{
725
- node_assert(libnut, 'libnut not initialized');
726
- const from = param.from;
727
- const to = param.to;
728
- node_assert(from, 'missing "from" param for drag and drop');
729
- node_assert(to, 'missing "to" param for drag and drop');
730
- const [fromX, fromY] = from.center;
731
- const [toX, toY] = to.center;
732
- libnut.moveMouse(Math.round(fromX), Math.round(fromY));
733
- libnut.mouseToggle('down', 'left');
734
- await sleep(100);
735
- libnut.moveMouse(Math.round(toX), Math.round(toY));
736
- await sleep(100);
737
- libnut.mouseToggle('up', 'left');
738
- }),
739
- defineActionClearInput(async (param)=>{
740
- node_assert(libnut, 'libnut not initialized');
741
- const element = param.locate;
742
- node_assert(element, 'Element not found, cannot clear input');
743
- const [x, y] = element.center;
744
- libnut.moveMouse(Math.round(x), Math.round(y));
745
- libnut.mouseClick('left');
746
- await sleep(100);
747
- if (this.useAppleScript) {
748
- sendKeyViaAppleScript('a', [
749
- 'command'
750
- ]);
751
- await sleep(50);
752
- sendKeyViaAppleScript('backspace', []);
753
- } else {
754
- const modifier = 'darwin' === process.platform ? 'command' : 'control';
755
- libnut.keyTap('a', [
756
- modifier
757
- ]);
758
- libnut.keyTap('backspace');
759
- }
760
- await sleep(50);
761
- })
652
+ ...defineActionsFromInputPrimitives(this.inputPrimitives)
762
653
  ];
763
654
  const platformActions = Object.values(createPlatformActions());
764
655
  const customActions = this.options?.customActions || [];
@@ -796,6 +687,88 @@ Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ')
796
687
  _define_property(this, "xvfbCleanup", void 0);
797
688
  _define_property(this, "useAppleScript", void 0);
798
689
  _define_property(this, "uri", void 0);
690
+ _define_property(this, "inputPrimitives", {
691
+ pointer: {
692
+ tap: async ({ x, y })=>{
693
+ node_assert(libnut, 'libnut not initialized');
694
+ const targetX = Math.round(x);
695
+ const targetY = Math.round(y);
696
+ await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
697
+ libnut.mouseToggle('down', 'left');
698
+ await sleep(CLICK_HOLD_DURATION);
699
+ libnut.mouseToggle('up', 'left');
700
+ },
701
+ doubleClick: async ({ x, y })=>{
702
+ node_assert(libnut, 'libnut not initialized');
703
+ libnut.moveMouse(Math.round(x), Math.round(y));
704
+ libnut.mouseClick('left', true);
705
+ },
706
+ rightClick: async ({ x, y })=>{
707
+ node_assert(libnut, 'libnut not initialized');
708
+ libnut.moveMouse(Math.round(x), Math.round(y));
709
+ libnut.mouseClick('right');
710
+ },
711
+ hover: async ({ x, y })=>{
712
+ node_assert(libnut, 'libnut not initialized');
713
+ await smoothMoveMouse(Math.round(x), Math.round(y), SMOOTH_MOVE_STEPS_MOUSE_MOVE, SMOOTH_MOVE_DELAY_MOUSE_MOVE);
714
+ await sleep(MOUSE_MOVE_EFFECT_WAIT);
715
+ },
716
+ dragAndDrop: async (from, to)=>{
717
+ node_assert(libnut, 'libnut not initialized');
718
+ libnut.moveMouse(Math.round(from.x), Math.round(from.y));
719
+ libnut.mouseToggle('down', 'left');
720
+ await sleep(100);
721
+ libnut.moveMouse(Math.round(to.x), Math.round(to.y));
722
+ await sleep(100);
723
+ libnut.mouseToggle('up', 'left');
724
+ }
725
+ },
726
+ keyboard: {
727
+ typeText: async (value, opts)=>{
728
+ node_assert(libnut, 'libnut not initialized');
729
+ const element = opts?.target;
730
+ if (element) {
731
+ const [x, y] = element.center;
732
+ libnut.moveMouse(Math.round(x), Math.round(y));
733
+ libnut.mouseClick('left');
734
+ await sleep(INPUT_FOCUS_DELAY);
735
+ if (opts?.replace !== false) {
736
+ await this.selectAllAndDelete();
737
+ await sleep(INPUT_CLEAR_DELAY);
738
+ }
739
+ }
740
+ await this.smartTypeString(value);
741
+ },
742
+ keyboardPress: async (keyName, opts)=>{
743
+ node_assert(libnut, 'libnut not initialized');
744
+ const target = opts?.target;
745
+ if (target) {
746
+ const [x, y] = target.center;
747
+ libnut.moveMouse(Math.round(x), Math.round(y));
748
+ libnut.mouseClick('left');
749
+ await sleep(50);
750
+ }
751
+ await this.pressKeyboardShortcut(keyName);
752
+ },
753
+ clearInput: async (target)=>{
754
+ node_assert(libnut, 'libnut not initialized');
755
+ if (target) {
756
+ const element = target;
757
+ const [x, y] = element.center;
758
+ libnut.moveMouse(Math.round(x), Math.round(y));
759
+ libnut.mouseClick('left');
760
+ await sleep(100);
761
+ }
762
+ await this.selectAllAndDelete();
763
+ await sleep(50);
764
+ }
765
+ },
766
+ scroll: {
767
+ scroll: async (param)=>{
768
+ await this.performScroll(param);
769
+ }
770
+ }
771
+ });
799
772
  this.options = options;
800
773
  this.displayId = options?.displayId;
801
774
  this.useAppleScript = 'darwin' === process.platform && options?.keyboardDriver !== 'libnut';
@@ -1171,132 +1144,7 @@ class RDPDevice {
1171
1144
  }
1172
1145
  actionSpace() {
1173
1146
  const defaultActions = [
1174
- defineActionTap(async ({ locate })=>{
1175
- const element = this.requireLocate(locate, 'tap');
1176
- await this.moveToElement(element, {
1177
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1178
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1179
- });
1180
- await this.backend.mouseButton('left', 'down');
1181
- await sleep(device_CLICK_HOLD_DURATION);
1182
- await this.backend.mouseButton('left', 'up');
1183
- }),
1184
- defineActionDoubleClick(async ({ locate })=>{
1185
- const element = this.requireLocate(locate, 'double click');
1186
- await this.moveToElement(element, {
1187
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1188
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1189
- });
1190
- await this.backend.mouseButton('left', 'doubleClick');
1191
- }),
1192
- defineActionRightClick(async ({ locate })=>{
1193
- const element = this.requireLocate(locate, 'right click');
1194
- await this.moveToElement(element, {
1195
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1196
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1197
- });
1198
- await this.backend.mouseButton('right', 'click');
1199
- }),
1200
- defineActionHover(async ({ locate })=>{
1201
- const element = this.requireLocate(locate, 'hover');
1202
- await this.moveToElement(element, {
1203
- steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1204
- stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
1205
- settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
1206
- });
1207
- }),
1208
- defineActionInput(async (param)=>{
1209
- this.assertConnected();
1210
- if (param.locate) {
1211
- await this.moveToElement(param.locate, {
1212
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1213
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1214
- });
1215
- await this.backend.mouseButton('left', 'click');
1216
- await sleep(device_INPUT_FOCUS_DELAY);
1217
- }
1218
- if ('typeOnly' !== param.mode) {
1219
- await this.clearInput();
1220
- await sleep(device_INPUT_CLEAR_DELAY);
1221
- }
1222
- if ('clear' === param.mode) return;
1223
- if (param.value) await this.backend.typeText(param.value);
1224
- }),
1225
- defineActionClearInput(async ({ locate })=>{
1226
- this.assertConnected();
1227
- if (locate) {
1228
- await this.moveToElement(locate, {
1229
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1230
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1231
- });
1232
- await this.backend.mouseButton('left', 'click');
1233
- await sleep(device_INPUT_FOCUS_DELAY);
1234
- }
1235
- await this.clearInput();
1236
- await sleep(device_INPUT_CLEAR_DELAY);
1237
- }),
1238
- defineActionKeyboardPress(async ({ locate, keyName })=>{
1239
- this.assertConnected();
1240
- if (locate) {
1241
- await this.moveToElement(locate, {
1242
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1243
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1244
- });
1245
- await this.backend.mouseButton('left', 'click');
1246
- }
1247
- await this.backend.keyPress(keyName);
1248
- }),
1249
- defineActionScroll(async (param)=>{
1250
- this.assertConnected();
1251
- const target = param.locate;
1252
- if (target) await this.moveToElement(target, {
1253
- steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1254
- stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
1255
- });
1256
- if (param.scrollType && 'singleAction' !== param.scrollType) {
1257
- const direction = this.edgeScrollDirection(param.scrollType);
1258
- for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1259
- await sleep(device_SCROLL_COMPLETE_DELAY);
1260
- return;
1261
- }
1262
- await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1263
- await sleep(device_SCROLL_COMPLETE_DELAY);
1264
- }),
1265
- defineActionDragAndDrop(async ({ from, to })=>{
1266
- this.assertConnected();
1267
- const source = this.requireLocate(from, 'drag source');
1268
- const target = this.requireLocate(to, 'drag target');
1269
- await this.moveToElement(source, {
1270
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1271
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1272
- });
1273
- await this.backend.mouseButton('left', 'down');
1274
- await sleep(DRAG_HOLD_DURATION);
1275
- await this.moveToElement(target, {
1276
- steps: SMOOTH_MOVE_STEPS_DRAG,
1277
- stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
1278
- });
1279
- await sleep(DRAG_HOLD_DURATION);
1280
- await this.backend.mouseButton('left', 'up');
1281
- }),
1282
- defineAction({
1283
- name: 'MiddleClick',
1284
- description: 'Middle click the element',
1285
- sample: {
1286
- locate: {
1287
- prompt: 'the browser tab close target'
1288
- }
1289
- },
1290
- paramSchema: actionTapParamSchema,
1291
- call: async ({ locate })=>{
1292
- const element = this.requireLocate(locate, 'middle click');
1293
- await this.moveToElement(element, {
1294
- steps: device_SMOOTH_MOVE_STEPS_TAP,
1295
- stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1296
- });
1297
- await this.backend.mouseButton('middle', 'click');
1298
- }
1299
- }),
1147
+ ...defineActionsFromInputPrimitives(this.inputPrimitives),
1300
1148
  defineAction({
1301
1149
  name: 'ListDisplays',
1302
1150
  description: 'List all available displays/monitors',
@@ -1325,10 +1173,6 @@ class RDPDevice {
1325
1173
  throwIfDestroyed() {
1326
1174
  if (this.destroyed) throw new Error('RDPDevice has been destroyed');
1327
1175
  }
1328
- requireLocate(locate, actionName) {
1329
- if (!locate) throw new Error(`Missing target element for ${actionName}`);
1330
- return locate;
1331
- }
1332
1176
  async moveToElement(element, options) {
1333
1177
  this.assertConnected();
1334
1178
  const targetX = Math.round(element.center[0]);
@@ -1394,6 +1238,113 @@ class RDPDevice {
1394
1238
  device_define_property(this, "destroyed", false);
1395
1239
  device_define_property(this, "cursorPosition", void 0);
1396
1240
  device_define_property(this, "uri", void 0);
1241
+ device_define_property(this, "inputPrimitives", {
1242
+ pointer: {
1243
+ tap: async ({ x, y })=>{
1244
+ await this.movePointer(Math.round(x), Math.round(y), {
1245
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1246
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1247
+ });
1248
+ await this.backend.mouseButton('left', 'down');
1249
+ await sleep(device_CLICK_HOLD_DURATION);
1250
+ await this.backend.mouseButton('left', 'up');
1251
+ },
1252
+ doubleClick: async ({ x, y })=>{
1253
+ await this.movePointer(Math.round(x), Math.round(y), {
1254
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1255
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1256
+ });
1257
+ await this.backend.mouseButton('left', 'doubleClick');
1258
+ },
1259
+ rightClick: async ({ x, y })=>{
1260
+ await this.movePointer(Math.round(x), Math.round(y), {
1261
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1262
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1263
+ });
1264
+ await this.backend.mouseButton('right', 'click');
1265
+ },
1266
+ hover: async ({ x, y })=>{
1267
+ await this.movePointer(Math.round(x), Math.round(y), {
1268
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1269
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE,
1270
+ settleDelayMs: device_MOUSE_MOVE_EFFECT_WAIT
1271
+ });
1272
+ },
1273
+ dragAndDrop: async (from, to)=>{
1274
+ await this.movePointer(Math.round(from.x), Math.round(from.y), {
1275
+ steps: device_SMOOTH_MOVE_STEPS_TAP,
1276
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_TAP
1277
+ });
1278
+ await this.backend.mouseButton('left', 'down');
1279
+ await sleep(DRAG_HOLD_DURATION);
1280
+ await this.movePointer(Math.round(to.x), Math.round(to.y), {
1281
+ steps: SMOOTH_MOVE_STEPS_DRAG,
1282
+ stepDelayMs: SMOOTH_MOVE_DELAY_DRAG
1283
+ });
1284
+ await sleep(DRAG_HOLD_DURATION);
1285
+ await this.backend.mouseButton('left', 'up');
1286
+ }
1287
+ },
1288
+ keyboard: {
1289
+ typeText: async (value, opts)=>{
1290
+ this.assertConnected();
1291
+ const target = opts?.target;
1292
+ if (target) {
1293
+ await this.inputPrimitives.pointer.tap({
1294
+ x: target.center[0],
1295
+ y: target.center[1]
1296
+ });
1297
+ await sleep(device_INPUT_FOCUS_DELAY);
1298
+ }
1299
+ if (opts?.replace !== false) {
1300
+ await this.clearInput();
1301
+ await sleep(device_INPUT_CLEAR_DELAY);
1302
+ }
1303
+ if (opts?.focusOnly || !value) return;
1304
+ await this.backend.typeText(value);
1305
+ },
1306
+ clearInput: async (target)=>{
1307
+ this.assertConnected();
1308
+ const element = target;
1309
+ if (element) {
1310
+ await this.inputPrimitives.pointer.tap({
1311
+ x: element.center[0],
1312
+ y: element.center[1]
1313
+ });
1314
+ await sleep(device_INPUT_FOCUS_DELAY);
1315
+ }
1316
+ await this.clearInput();
1317
+ await sleep(device_INPUT_CLEAR_DELAY);
1318
+ },
1319
+ keyboardPress: async (keyName, opts)=>{
1320
+ this.assertConnected();
1321
+ const target = opts?.target;
1322
+ if (target) await this.inputPrimitives.pointer.tap({
1323
+ x: target.center[0],
1324
+ y: target.center[1]
1325
+ });
1326
+ await this.backend.keyPress(keyName);
1327
+ }
1328
+ },
1329
+ scroll: {
1330
+ scroll: async (param)=>{
1331
+ this.assertConnected();
1332
+ const target = param.locate;
1333
+ if (target) await this.moveToElement(target, {
1334
+ steps: device_SMOOTH_MOVE_STEPS_MOUSE_MOVE,
1335
+ stepDelayMs: device_SMOOTH_MOVE_DELAY_MOUSE_MOVE
1336
+ });
1337
+ if (param.scrollType && 'singleAction' !== param.scrollType) {
1338
+ const direction = this.edgeScrollDirection(param.scrollType);
1339
+ for(let i = 0; i < device_EDGE_SCROLL_STEPS; i++)await this.performWheel(direction, DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1340
+ await sleep(device_SCROLL_COMPLETE_DELAY);
1341
+ return;
1342
+ }
1343
+ await this.performWheel(param.direction || 'down', param.distance || DEFAULT_SCROLL_DISTANCE, target?.center[0], target?.center[1]);
1344
+ await sleep(device_SCROLL_COMPLETE_DELAY);
1345
+ }
1346
+ }
1347
+ });
1397
1348
  this.options = {
1398
1349
  port: 3389,
1399
1350
  securityProtocol: 'auto',
@@ -1621,7 +1572,7 @@ class ComputerMidsceneTools extends BaseMidsceneTools {
1621
1572
  const tools = new ComputerMidsceneTools();
1622
1573
  runToolsCLI(tools, 'midscene-computer', {
1623
1574
  stripPrefix: 'computer_',
1624
- version: "1.8.1-beta-20260513084557.0",
1575
+ version: "1.8.1",
1625
1576
  extraCommands: createReportCliCommands()
1626
1577
  }).catch((e)=>{
1627
1578
  process.exit(reportCLIError(e));